• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KDEUI

  • sources
  • kde-4.12
  • kdelibs
  • kdeui
  • widgets
kcharselectdata.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE libraries
2 
3  Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Library General Public
7  License as published by the Free Software Foundation; either
8  version 2 of the License, or (at your option) any later version.
9 
10  This library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Library General Public License for more details.
14 
15  You should have received a copy of the GNU Library General Public License
16  along with this library; see the file COPYING.LIB. If not, write to
17  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  Boston, MA 02110-1301, USA.
19 */
20 
21 #include "kcharselectdata_p.h"
22 
23 #include <QStringList>
24 #include <QFile>
25 #include <qendian.h>
26 #include <QtConcurrentRun>
27 
28 #include <string.h>
29 #include <klocalizedstring.h>
30 #include <kstandarddirs.h>
31 
32 /* constants for hangul (de)composition, see UAX #15 */
33 #define SBase 0xAC00
34 #define LBase 0x1100
35 #define VBase 0x1161
36 #define TBase 0x11A7
37 #define LCount 19
38 #define VCount 21
39 #define TCount 28
40 #define NCount (VCount * TCount)
41 #define SCount (LCount * NCount)
42 
43 static const char JAMO_L_TABLE[][4] =
44  {
45  "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
46  "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
47  };
48 
49 static const char JAMO_V_TABLE[][4] =
50  {
51  "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
52  "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
53  "YU", "EU", "YI", "I"
54  };
55 
56 static const char JAMO_T_TABLE[][4] =
57  {
58  "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
59  "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
60  "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
61  };
62 
63 bool KCharSelectData::openDataFile()
64 {
65  if(!dataFile.isEmpty()) {
66  return true;
67  } else {
68  QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data"));
69  if (!file.open(QIODevice::ReadOnly)) {
70  return false;
71  }
72  dataFile = file.readAll();
73  file.close();
74  futureIndex = QtConcurrent::run(this, &KCharSelectData::createIndex, dataFile);
75  return true;
76  }
77 }
78 
79 quint32 KCharSelectData::getDetailIndex(const QChar& c) const
80 {
81  const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
82  // Convert from little-endian, so that this code works on PPC too.
83  // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
84  const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
85  const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
86 
87  int min = 0;
88  int mid;
89  int max = ((offsetEnd - offsetBegin) / 27) - 1;
90 
91  quint16 unicode = c.unicode();
92 
93  static quint16 most_recent_searched;
94  static quint32 most_recent_result;
95 
96 
97  if (unicode == most_recent_searched)
98  return most_recent_result;
99 
100  most_recent_searched = unicode;
101 
102  while (max >= min) {
103  mid = (min + max) / 2;
104  const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
105  if (unicode > midUnicode)
106  min = mid + 1;
107  else if (unicode < midUnicode)
108  max = mid - 1;
109  else {
110  most_recent_result = offsetBegin + mid*27;
111 
112  return most_recent_result;
113  }
114  }
115 
116  most_recent_result = 0;
117  return 0;
118 }
119 
120 QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base)
121 {
122  QString s = QString::number(code, base).toUpper();
123  while (s.size() < length)
124  s.prepend('0');
125  s.prepend(prefix);
126  return s;
127 }
128 
129 QList<QChar> KCharSelectData::blockContents(int block)
130 {
131  if(!openDataFile()) {
132  return QList<QChar>();
133  }
134 
135  const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
136  const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
137  const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
138 
139  int max = ((offsetEnd - offsetBegin) / 4) - 1;
140 
141  QList<QChar> res;
142 
143  if(block > max)
144  return res;
145 
146  quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
147  quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
148 
149  while(unicodeBegin < unicodeEnd) {
150  res.append(unicodeBegin);
151  unicodeBegin++;
152  }
153  res.append(unicodeBegin); // Be carefull when unicodeEnd==0xffff
154 
155  return res;
156 }
157 
158 QList<int> KCharSelectData::sectionContents(int section)
159 {
160  if(!openDataFile()) {
161  return QList<int>();
162  }
163 
164  const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
165  const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
166  const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
167 
168  int max = ((offsetEnd - offsetBegin) / 4) - 1;
169 
170  QList<int> res;
171 
172  if(section > max)
173  return res;
174 
175  for(int i = 0; i <= max; i++) {
176  const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
177  if(currSection == section) {
178  res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
179  }
180  }
181 
182  return res;
183 }
184 
185 QStringList KCharSelectData::sectionList()
186 {
187  if(!openDataFile()) {
188  return QStringList();
189  }
190 
191  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
192  const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
193  const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
194 
195  const char* data = dataFile.constData();
196  QStringList list;
197  quint32 i = stringBegin;
198  while(i < stringEnd) {
199  list.append(i18nc("KCharSelect section name", data + i));
200  i += strlen(data + i) + 1;
201  }
202 
203  return list;
204 }
205 
206 QString KCharSelectData::block(const QChar& c)
207 {
208  return blockName(blockIndex(c));
209 }
210 
211 QString KCharSelectData::section(const QChar& c)
212 {
213  return sectionName(sectionIndex(blockIndex(c)));
214 }
215 
216 QString KCharSelectData::name(const QChar& c)
217 {
218  if(!openDataFile()) {
219  return QString();
220  }
221 
222  ushort unicode = c.unicode();
223  if ((unicode >= 0x3400 && unicode <= 0x4DB5)
224  || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
225  // || (unicode >= 0x20000 && unicode <= 0x2A6D6) // useless, since limited to 16 bit
226  return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16);
227  } else if (c >= 0xac00 && c <= 0xd7af) {
228  /* compute hangul syllable name as per UAX #15 */
229  int SIndex = c.unicode() - SBase;
230  int LIndex, VIndex, TIndex;
231 
232  if (SIndex < 0 || SIndex >= SCount)
233  return QString();
234 
235  LIndex = SIndex / NCount;
236  VIndex = (SIndex % NCount) / TCount;
237  TIndex = SIndex % TCount;
238 
239  return QLatin1String("HANGUL SYLLABLE ") + QLatin1String(JAMO_L_TABLE[LIndex])
240  + QLatin1String(JAMO_V_TABLE[VIndex]) + QLatin1String(JAMO_T_TABLE[TIndex]);
241  } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
242  return i18n("<Non Private Use High Surrogate>");
243  else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
244  return i18n("<Private Use High Surrogate>");
245  else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
246  return i18n("<Low Surrogate>");
247  else if (unicode >= 0xE000 && unicode <= 0xF8FF)
248  return i18n("<Private Use>");
249 // else if (unicode >= 0xF0000 && unicode <= 0xFFFFD) // 16 bit!
250 // return i18n("<Plane 15 Private Use>");
251 // else if (unicode >= 0x100000 && unicode <= 0x10FFFD)
252 // return i18n("<Plane 16 Private Use>");
253  else {
254  const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
255  const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
256  const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
257 
258  int min = 0;
259  int mid;
260  int max = ((offsetEnd - offsetBegin) / 6) - 1;
261  QString s;
262 
263  while (max >= min) {
264  mid = (min + max) / 2;
265  const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
266  if (unicode > midUnicode)
267  min = mid + 1;
268  else if (unicode < midUnicode)
269  max = mid - 1;
270  else {
271  quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
272  s = QString(dataFile.constData() + offset + 1);
273  break;
274  }
275  }
276 
277  if (s.isNull()) {
278  return i18n("<not assigned>");
279  } else {
280  return s;
281  }
282  }
283 }
284 
285 int KCharSelectData::blockIndex(const QChar& c)
286 {
287  if(!openDataFile()) {
288  return 0;
289  }
290 
291  const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
292  const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
293  const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
294  const quint16 unicode = c.unicode();
295 
296  int max = ((offsetEnd - offsetBegin) / 4) - 1;
297 
298  int i = 0;
299 
300  while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
301  i++;
302  }
303 
304  return i;
305 }
306 
307 int KCharSelectData::sectionIndex(int block)
308 {
309  if(!openDataFile()) {
310  return 0;
311  }
312 
313  const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
314  const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
315  const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
316 
317  int max = ((offsetEnd - offsetBegin) / 4) - 1;
318 
319  for(int i = 0; i <= max; i++) {
320  if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
321  return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
322  }
323  }
324 
325  return 0;
326 }
327 
328 QString KCharSelectData::blockName(int index)
329 {
330  if(!openDataFile()) {
331  return QString();
332  }
333 
334  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
335  const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
336  const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
337 
338  quint32 i = stringBegin;
339  int currIndex = 0;
340 
341  const char* data = dataFile.constData();
342  while(i < stringEnd && currIndex < index) {
343  i += strlen(data + i) + 1;
344  currIndex++;
345  }
346 
347  return i18nc("KCharselect unicode block name", data + i);
348 }
349 
350 QString KCharSelectData::sectionName(int index)
351 {
352  if(!openDataFile()) {
353  return QString();
354  }
355 
356  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
357  const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
358  const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
359 
360  quint32 i = stringBegin;
361  int currIndex = 0;
362 
363  const char* data = dataFile.constData();
364  while(i < stringEnd && currIndex < index) {
365  i += strlen(data + i) + 1;
366  currIndex++;
367  }
368 
369  return i18nc("KCharselect unicode section name", data + i);
370 }
371 
372 QStringList KCharSelectData::aliases(const QChar& c)
373 {
374  if(!openDataFile()) {
375  return QStringList();
376  }
377  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
378  const int detailIndex = getDetailIndex(c);
379  if(detailIndex == 0) {
380  return QStringList();
381  }
382 
383  const quint8 count = * (quint8 *)(udata + detailIndex + 6);
384  quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
385 
386  QStringList aliases;
387 
388  const char* data = dataFile.constData();
389  for (int i = 0; i < count; i++) {
390  aliases.append(QString::fromLatin1(data + offset));
391  offset += strlen(data + offset) + 1;
392  }
393  return aliases;
394 }
395 
396 QStringList KCharSelectData::notes(const QChar& c)
397 {
398  if(!openDataFile()) {
399  return QStringList();
400  }
401  const int detailIndex = getDetailIndex(c);
402  if(detailIndex == 0) {
403  return QStringList();
404  }
405 
406  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
407  const quint8 count = * (quint8 *)(udata + detailIndex + 11);
408  quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
409 
410  QStringList notes;
411 
412  const char* data = dataFile.constData();
413  for (int i = 0; i < count; i++) {
414  notes.append(QString::fromLatin1(data + offset));
415  offset += strlen(data + offset) + 1;
416  }
417 
418  return notes;
419 }
420 
421 QList<QChar> KCharSelectData::seeAlso(const QChar& c)
422 {
423  if(!openDataFile()) {
424  return QList<QChar>();
425  }
426  const int detailIndex = getDetailIndex(c);
427  if(detailIndex == 0) {
428  return QList<QChar>();
429  }
430 
431  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
432  const quint8 count = * (quint8 *)(udata + detailIndex + 26);
433  quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
434 
435  QList<QChar> seeAlso;
436 
437  for (int i = 0; i < count; i++) {
438  seeAlso.append(qFromLittleEndian<quint16> (udata + offset));
439  offset += 2;
440  }
441 
442  return seeAlso;
443 }
444 
445 QStringList KCharSelectData::equivalents(const QChar& c)
446 {
447  if(!openDataFile()) {
448  return QStringList();
449  }
450  const int detailIndex = getDetailIndex(c);
451  if(detailIndex == 0) {
452  return QStringList();
453  }
454 
455  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
456  const quint8 count = * (quint8 *)(udata + detailIndex + 21);
457  quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
458 
459  QStringList equivalents;
460 
461  const char* data = dataFile.constData();
462  for (int i = 0; i < count; i++) {
463  equivalents.append(QString::fromLatin1(data + offset));
464  offset += strlen(data + offset) + 1;
465  }
466 
467  return equivalents;
468 }
469 
470 QStringList KCharSelectData::approximateEquivalents(const QChar& c)
471 {
472  if(!openDataFile()) {
473  return QStringList();
474  }
475  const int detailIndex = getDetailIndex(c);
476  if(detailIndex == 0) {
477  return QStringList();
478  }
479 
480  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
481  const quint8 count = * (quint8 *)(udata + detailIndex + 16);
482  quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
483 
484  QStringList approxEquivalents;
485 
486  const char* data = dataFile.constData();
487  for (int i = 0; i < count; i++) {
488  approxEquivalents.append(QString::fromLatin1(data + offset));
489  offset += strlen(data + offset) + 1;
490  }
491 
492  return approxEquivalents;
493 }
494 
495 QStringList KCharSelectData::unihanInfo(const QChar& c)
496 {
497  if(!openDataFile()) {
498  return QStringList();
499  }
500 
501  const char* data = dataFile.constData();
502  const uchar* udata = reinterpret_cast<const uchar*>(data);
503  const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
504  const quint32 offsetEnd = dataFile.size();
505 
506  int min = 0;
507  int mid;
508  int max = ((offsetEnd - offsetBegin) / 30) - 1;
509  quint16 unicode = c.unicode();
510 
511  while (max >= min) {
512  mid = (min + max) / 2;
513  const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
514  if (unicode > midUnicode)
515  min = mid + 1;
516  else if (unicode < midUnicode)
517  max = mid - 1;
518  else {
519  QStringList res;
520  for(int i = 0; i < 7; i++) {
521  quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
522  if(offset != 0) {
523  res.append(QString::fromLatin1(data + offset));
524  } else {
525  res.append(QString());
526  }
527  }
528  return res;
529  }
530  }
531 
532  return QStringList();
533 }
534 
535 QChar::Category KCharSelectData::category(const QChar& c)
536 {
537  if(!openDataFile()) {
538  return c.category();
539  }
540 
541  ushort unicode = c.unicode();
542 
543  const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
544  const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
545  const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
546 
547  int min = 0;
548  int mid;
549  int max = ((offsetEnd - offsetBegin) / 6) - 1;
550  QString s;
551 
552  while (max >= min) {
553  mid = (min + max) / 2;
554  const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
555  if (unicode > midUnicode)
556  min = mid + 1;
557  else if (unicode < midUnicode)
558  max = mid - 1;
559  else {
560  quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
561  const quint8 categoryCode = * (quint8 *)(data + offset);
562  return QChar::Category(categoryCode);
563  }
564  }
565 
566  return c.category();
567 }
568 
569 bool KCharSelectData::isPrint(const QChar& c)
570 {
571  QChar::Category cat = category(c);
572  return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned);
573 }
574 
575 bool KCharSelectData::isDisplayable(const QChar& c)
576 {
577  // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames.
578  // They should be seen as non-printable characters, as trying to display them leads
579  // to a crash caused by a Qt "noBlockInString" assertion.
580  if(c == 0xFDD0 || c == 0xFDD1)
581  return false;
582 
583  return !isIgnorable(c) && isPrint(c);
584 }
585 
586 bool KCharSelectData::isIgnorable(const QChar& c)
587 {
588  /*
589  * According to the Unicode standard, Default Ignorable Code Points
590  * should be ignored unless explicitly supported. For example, U+202E
591  * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying
592  * it gives the undesired effect of all text being turned RTL. We do not
593  * have a way to "explicitly" support it, so we will treat it as
594  * non-printable.
595  *
596  * There is a list of these on
597  * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the
598  * property Default_Ignorable_Code_Point.
599  */
600 
601  //NOTE: not very nice to hardcode these here; is it worth it to modify
602  // the binary data file to hold them?
603  return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
604  c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
605  (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
606  (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
607  (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
608  (c >= 0xFFF0 && c <= 0xFFF8);
609 }
610 
611 bool KCharSelectData::isCombining(const QChar &c)
612 {
613  return section(c) == i18nc("KCharSelect section name", "Combining Diacritical Marks");
614  //FIXME: this is an imperfect test. There are many combining characters
615  // that are outside of this section. See Grapheme_Extend in
616  // http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
617 }
618 
619 QString KCharSelectData::display(const QChar &c, const QFont &font)
620 {
621  if (!isDisplayable(c)) {
622  return QString("<b>") + i18n("Non-printable") + "</b>";
623  } else {
624  QString s = QString("<font size=\"+4\" face=\"") + font.family() + "\">";
625  if (isCombining(c)) {
626  s += displayCombining(c);
627  } else {
628  s += "&#" + QString::number(c.unicode()) + ';';
629  }
630  s += "</font>";
631  return s;
632  }
633 }
634 
635 QString KCharSelectData::displayCombining(const QChar &c)
636 {
637  /*
638  * The purpose of this is to make it easier to see how a combining
639  * character affects the text around it.
640  * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose,
641  * as seen in pdfs from Unicode, but there seem to be a lot of alignment
642  * problems with that.
643  *
644  * Eventually, it would be nice to determine whether the character
645  * combines to the left or to the right, etc.
646  */
647  QString s = "&nbsp;&#" + QString::number(c.unicode()) + ";&nbsp;" +
648  " (ab&#" + QString::number(c.unicode()) + ";c)";
649  return s;
650 }
651 
652 QString KCharSelectData::categoryText(QChar::Category category)
653 {
654  switch (category) {
655  case QChar::Other_Control: return i18n("Other, Control");
656  case QChar::Other_Format: return i18n("Other, Format");
657  case QChar::Other_NotAssigned: return i18n("Other, Not Assigned");
658  case QChar::Other_PrivateUse: return i18n("Other, Private Use");
659  case QChar::Other_Surrogate: return i18n("Other, Surrogate");
660  case QChar::Letter_Lowercase: return i18n("Letter, Lowercase");
661  case QChar::Letter_Modifier: return i18n("Letter, Modifier");
662  case QChar::Letter_Other: return i18n("Letter, Other");
663  case QChar::Letter_Titlecase: return i18n("Letter, Titlecase");
664  case QChar::Letter_Uppercase: return i18n("Letter, Uppercase");
665  case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining");
666  case QChar::Mark_Enclosing: return i18n("Mark, Enclosing");
667  case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing");
668  case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit");
669  case QChar::Number_Letter: return i18n("Number, Letter");
670  case QChar::Number_Other: return i18n("Number, Other");
671  case QChar::Punctuation_Connector: return i18n("Punctuation, Connector");
672  case QChar::Punctuation_Dash: return i18n("Punctuation, Dash");
673  case QChar::Punctuation_Close: return i18n("Punctuation, Close");
674  case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote");
675  case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote");
676  case QChar::Punctuation_Other: return i18n("Punctuation, Other");
677  case QChar::Punctuation_Open: return i18n("Punctuation, Open");
678  case QChar::Symbol_Currency: return i18n("Symbol, Currency");
679  case QChar::Symbol_Modifier: return i18n("Symbol, Modifier");
680  case QChar::Symbol_Math: return i18n("Symbol, Math");
681  case QChar::Symbol_Other: return i18n("Symbol, Other");
682  case QChar::Separator_Line: return i18n("Separator, Line");
683  case QChar::Separator_Paragraph: return i18n("Separator, Paragraph");
684  case QChar::Separator_Space: return i18n("Separator, Space");
685  default: return i18n("Unknown");
686  }
687 }
688 
689 QList<QChar> KCharSelectData::find(const QString& needle)
690 {
691  QSet<quint16> result;
692 
693  QList<QChar> returnRes;
694  QString simplified = needle.simplified();
695  QStringList searchStrings = splitString(needle.simplified());
696 
697  if(simplified.length() == 1) {
698  // search for hex representation of the character
699  searchStrings = QStringList(formatCode(simplified.at(0).unicode()));
700  }
701 
702  if (searchStrings.count() == 0) {
703  return returnRes;
704  }
705 
706  QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
707  foreach(const QString &s, searchStrings) {
708  if(regExp.exactMatch(s)) {
709  returnRes.append(regExp.cap(2).toInt(0, 16));
710  // search for "1234" instead of "0x1234"
711  if (s.length() == 6) {
712  searchStrings[searchStrings.indexOf(s)] = regExp.cap(2);
713  }
714  }
715  // try to parse string as decimal number
716  bool ok;
717  int unicode = s.toInt(&ok);
718  if (ok && unicode >= 0 && unicode <= 0xFFFF) {
719  returnRes.append(unicode);
720  }
721  }
722 
723  bool firstSubString = true;
724  foreach(const QString &s, searchStrings) {
725  QSet<quint16> partResult = getMatchingChars(s.toLower());
726  if (firstSubString) {
727  result = partResult;
728  firstSubString = false;
729  } else {
730  result = result.intersect(partResult);
731  }
732  }
733 
734  // remove results found by matching the code point to prevent duplicate results
735  // while letting these characters stay at the beginning
736  foreach(const QChar &c, returnRes) {
737  result.remove(c.unicode());
738  }
739 
740  QList<quint16> sortedResult = result.toList();
741  qSort(sortedResult);
742 
743  foreach(const quint16 &c, sortedResult) {
744  returnRes.append(c);
745  }
746 
747  return returnRes;
748 }
749 
750 QSet<quint16> KCharSelectData::getMatchingChars(const QString& s)
751 {
752  futureIndex.waitForFinished();
753  const Index index = futureIndex;
754  Index::const_iterator pos = index.lowerBound(s);
755  QSet<quint16> result;
756 
757  while (pos != index.constEnd() && pos.key().startsWith(s)) {
758  foreach (const quint16 &c, pos.value()) {
759  result.insert(c);
760  }
761  ++pos;
762  }
763 
764  return result;
765 }
766 
767 QStringList KCharSelectData::splitString(const QString& s)
768 {
769  QStringList result;
770  int start = 0;
771  int end = 0;
772  int length = s.length();
773  while (end < length) {
774  while (end < length && (s[end].isLetterOrNumber() || s[end] == '+')) {
775  end++;
776  }
777  if (start != end) {
778  result.append(s.mid(start, end - start));
779  }
780  start = end;
781  while (end < length && !(s[end].isLetterOrNumber() || s[end] == '+')) {
782  end++;
783  start++;
784  }
785  }
786  return result;
787 }
788 
789 void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString& s)
790 {
791  const QStringList strings = splitString(s);
792  foreach(const QString &s, strings) {
793  (*index)[s.toLower()].append(unicode);
794  }
795 }
796 
797 Index KCharSelectData::createIndex(const QByteArray& dataFile)
798 {
799  Index i;
800 
801  // character names
802  const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
803  const char* data = dataFile.constData();
804  const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4);
805  const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8);
806 
807  int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
808 
809  for (int pos = 0; pos <= max; pos++) {
810  const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6);
811  quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2);
812  appendToIndex(&i, unicode, QString(data + offset + 1));
813  }
814 
815  // details
816  const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12);
817  const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16);
818 
819  max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
820 
821  for (int pos = 0; pos <= max; pos++) {
822  const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27);
823 
824  // aliases
825  const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6);
826  quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2);
827 
828  for (int j = 0; j < aliasCount; j++) {
829  appendToIndex(&i, unicode, QString::fromLatin1(data + aliasOffset));
830  aliasOffset += strlen(data + aliasOffset) + 1;
831  }
832 
833  // notes
834  const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11);
835  quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7);
836 
837  for (int j = 0; j < notesCount; j++) {
838  appendToIndex(&i, unicode, QString::fromLatin1(data + notesOffset));
839  notesOffset += strlen(data + notesOffset) + 1;
840  }
841 
842  // approximate equivalents
843  const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16);
844  quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12);
845 
846  for (int j = 0; j < apprCount; j++) {
847  appendToIndex(&i, unicode, QString::fromLatin1(data + apprOffset));
848  apprOffset += strlen(data + apprOffset) + 1;
849  }
850 
851  // equivalents
852  const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21);
853  quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17);
854 
855  for (int j = 0; j < equivCount; j++) {
856  appendToIndex(&i, unicode, QString::fromLatin1(data + equivOffset));
857  equivOffset += strlen(data + equivOffset) + 1;
858  }
859 
860  // see also - convert to string (hex)
861  const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26);
862  quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22);
863 
864  for (int j = 0; j < seeAlsoCount; j++) {
865  quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset);
866  appendToIndex(&i, unicode, formatCode(seeAlso, 4, QString()));
867  equivOffset += strlen(data + equivOffset) + 1;
868  }
869  }
870 
871  // unihan data
872  // temporary disabled due to the huge amount of data
873 // const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36);
874 // const quint32 unihanOffsetEnd = dataFile.size();
875 // max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1;
876 //
877 // for (int pos = 0; pos <= max; pos++) {
878 // const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30);
879 // for(int j = 0; j < 7; j++) {
880 // quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4);
881 // if(offset != 0) {
882 // appendToIndex(&i, unicode, QString::fromUtf8(data + offset));
883 // }
884 // }
885 // }
886 
887  return i;
888 }
i18n
QString i18n(const char *text)
JAMO_V_TABLE
static const char JAMO_V_TABLE[][4]
Definition: kcharselectdata.cpp:49
JAMO_T_TABLE
static const char JAMO_T_TABLE[][4]
Definition: kcharselectdata.cpp:56
JAMO_L_TABLE
static const char JAMO_L_TABLE[][4]
Definition: kcharselectdata.cpp:43
KStandardDirs::locate
static QString locate(const char *type, const QString &filename, const KComponentData &cData=KGlobal::mainComponent())
KStandardAction::name
const char * name(StandardAction id)
This will return the internal name of a given standard action.
Definition: kstandardaction.cpp:223
KStandardAction::find
KAction * find(const QObject *recvr, const char *slot, QObject *parent)
Initiate a 'find' request in the current document.
Definition: kstandardaction.cpp:329
quint32
QString
i18nc
QString i18nc(const char *ctxt, const char *text)
NCount
#define NCount
Definition: kcharselectdata.cpp:40
QStringList
TCount
#define TCount
Definition: kcharselectdata.cpp:39
SBase
#define SBase
Definition: kcharselectdata.cpp:33
QSet
KStandardGuiItem::ok
KGuiItem ok()
Returns the 'Ok' gui item.
Definition: kstandardguiitem.cpp:107
QFont
klocalizedstring.h
kstandarddirs.h
SCount
#define SCount
Definition: kcharselectdata.cpp:41
KStandardShortcut::end
const KShortcut & end()
Goto end of the document.
Definition: kstandardshortcut.cpp:348
QList< QChar >
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:49:14 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDEUI

Skip menu "KDEUI"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal