21 #include "kcharselectdata_p.h"
23 #include <QStringList>
26 #include <QtConcurrentRun>
40 #define NCount (VCount * TCount)
41 #define SCount (LCount * NCount)
45 "G",
"GG",
"N",
"D",
"DD",
"R",
"M",
"B",
"BB",
46 "S",
"SS",
"",
"J",
"JJ",
"C",
"K",
"T",
"P",
"H"
51 "A",
"AE",
"YA",
"YAE",
"EO",
"E",
"YEO",
"YE",
"O",
52 "WA",
"WAE",
"OE",
"YO",
"U",
"WEO",
"WE",
"WI",
58 "",
"G",
"GG",
"GS",
"N",
"NJ",
"NH",
"D",
"L",
"LG",
"LM",
59 "LB",
"LS",
"LT",
"LP",
"LH",
"M",
"B",
"BS",
60 "S",
"SS",
"NG",
"J",
"C",
"K",
"T",
"P",
"H"
63 bool KCharSelectData::openDataFile()
65 if(!dataFile.isEmpty()) {
69 if (!file.open(QIODevice::ReadOnly)) {
72 dataFile = file.readAll();
74 futureIndex = QtConcurrent::run(
this, &KCharSelectData::createIndex, dataFile);
79 quint32 KCharSelectData::getDetailIndex(
const QChar& c)
const
81 const uchar* data =
reinterpret_cast<const uchar*
>(dataFile.constData());
84 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
85 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
89 int max = ((offsetEnd - offsetBegin) / 27) - 1;
91 quint16 unicode = c.unicode();
93 static quint16 most_recent_searched;
94 static quint32 most_recent_result;
97 if (unicode == most_recent_searched)
98 return most_recent_result;
100 most_recent_searched = unicode;
103 mid = (min + max) / 2;
104 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
105 if (unicode > midUnicode)
107 else if (unicode < midUnicode)
110 most_recent_result = offsetBegin + mid*27;
112 return most_recent_result;
116 most_recent_result = 0;
120 QString KCharSelectData::formatCode(ushort code,
int length,
const QString& prefix,
int base)
122 QString s = QString::number(code, base).toUpper();
123 while (s.size() < length)
131 if(!openDataFile()) {
135 const uchar* data =
reinterpret_cast<const uchar*
>(dataFile.constData());
136 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
137 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
139 int max = ((offsetEnd - offsetBegin) / 4) - 1;
146 quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
147 quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
149 while(unicodeBegin < unicodeEnd) {
150 res.append(unicodeBegin);
153 res.append(unicodeBegin);
158 QList<int> KCharSelectData::sectionContents(
int section)
160 if(!openDataFile()) {
164 const uchar* data =
reinterpret_cast<const uchar*
>(dataFile.constData());
165 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
166 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
168 int max = ((offsetEnd - offsetBegin) / 4) - 1;
175 for(
int i = 0; i <= max; i++) {
176 const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
177 if(currSection == section) {
178 res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
187 if(!openDataFile()) {
191 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
192 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
193 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
195 const char* data = dataFile.constData();
198 while(i < stringEnd) {
199 list.append(
i18nc(
"KCharSelect section name", data + i));
200 i += strlen(data + i) + 1;
206 QString KCharSelectData::block(
const QChar& c)
208 return blockName(blockIndex(c));
211 QString KCharSelectData::section(
const QChar& c)
213 return sectionName(sectionIndex(blockIndex(c)));
218 if(!openDataFile()) {
222 ushort unicode = c.unicode();
223 if ((unicode >= 0x3400 && unicode <= 0x4DB5)
224 || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
226 return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16);
227 }
else if (c >= 0xac00 && c <= 0xd7af) {
229 int SIndex = c.unicode() -
SBase;
230 int LIndex, VIndex, TIndex;
232 if (SIndex < 0 || SIndex >=
SCount)
239 return QLatin1String(
"HANGUL SYLLABLE ") + QLatin1String(
JAMO_L_TABLE[LIndex])
241 }
else if (unicode >= 0xD800 && unicode <= 0xDB7F)
242 return i18n(
"<Non Private Use High Surrogate>");
243 else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
244 return i18n(
"<Private Use High Surrogate>");
245 else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
246 return i18n(
"<Low Surrogate>");
247 else if (unicode >= 0xE000 && unicode <= 0xF8FF)
248 return i18n(
"<Private Use>");
254 const uchar* data =
reinterpret_cast<const uchar*
>(dataFile.constData());
255 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
256 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
260 int max = ((offsetEnd - offsetBegin) / 6) - 1;
264 mid = (min + max) / 2;
265 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
266 if (unicode > midUnicode)
268 else if (unicode < midUnicode)
271 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
272 s =
QString(dataFile.constData() + offset + 1);
278 return i18n(
"<not assigned>");
285 int KCharSelectData::blockIndex(
const QChar& c)
287 if(!openDataFile()) {
291 const uchar* data =
reinterpret_cast<const uchar*
>(dataFile.constData());
292 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
293 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
294 const quint16 unicode = c.unicode();
296 int max = ((offsetEnd - offsetBegin) / 4) - 1;
300 while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
307 int KCharSelectData::sectionIndex(
int block)
309 if(!openDataFile()) {
313 const uchar* data =
reinterpret_cast<const uchar*
>(dataFile.constData());
314 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
315 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
317 int max = ((offsetEnd - offsetBegin) / 4) - 1;
319 for(
int i = 0; i <= max; i++) {
320 if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
321 return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
328 QString KCharSelectData::blockName(
int index)
330 if(!openDataFile()) {
334 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
335 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
336 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
341 const char* data = dataFile.constData();
342 while(i < stringEnd && currIndex < index) {
343 i += strlen(data + i) + 1;
347 return i18nc(
"KCharselect unicode block name", data + i);
350 QString KCharSelectData::sectionName(
int index)
352 if(!openDataFile()) {
356 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
357 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
358 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
363 const char* data = dataFile.constData();
364 while(i < stringEnd && currIndex < index) {
365 i += strlen(data + i) + 1;
369 return i18nc(
"KCharselect unicode section name", data + i);
372 QStringList KCharSelectData::aliases(
const QChar& c)
374 if(!openDataFile()) {
377 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
378 const int detailIndex = getDetailIndex(c);
379 if(detailIndex == 0) {
383 const quint8 count = * (quint8 *)(udata + detailIndex + 6);
384 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
388 const char* data = dataFile.constData();
389 for (
int i = 0; i < count; i++) {
390 aliases.append(QString::fromLatin1(data + offset));
391 offset += strlen(data + offset) + 1;
398 if(!openDataFile()) {
401 const int detailIndex = getDetailIndex(c);
402 if(detailIndex == 0) {
406 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
407 const quint8 count = * (quint8 *)(udata + detailIndex + 11);
408 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
412 const char* data = dataFile.constData();
413 for (
int i = 0; i < count; i++) {
414 notes.append(QString::fromLatin1(data + offset));
415 offset += strlen(data + offset) + 1;
423 if(!openDataFile()) {
426 const int detailIndex = getDetailIndex(c);
427 if(detailIndex == 0) {
431 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
432 const quint8 count = * (quint8 *)(udata + detailIndex + 26);
433 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
437 for (
int i = 0; i < count; i++) {
438 seeAlso.append(qFromLittleEndian<quint16> (udata + offset));
445 QStringList KCharSelectData::equivalents(
const QChar& c)
447 if(!openDataFile()) {
450 const int detailIndex = getDetailIndex(c);
451 if(detailIndex == 0) {
455 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
456 const quint8 count = * (quint8 *)(udata + detailIndex + 21);
457 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
461 const char* data = dataFile.constData();
462 for (
int i = 0; i < count; i++) {
463 equivalents.append(QString::fromLatin1(data + offset));
464 offset += strlen(data + offset) + 1;
470 QStringList KCharSelectData::approximateEquivalents(
const QChar& c)
472 if(!openDataFile()) {
475 const int detailIndex = getDetailIndex(c);
476 if(detailIndex == 0) {
480 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
481 const quint8 count = * (quint8 *)(udata + detailIndex + 16);
482 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
486 const char* data = dataFile.constData();
487 for (
int i = 0; i < count; i++) {
488 approxEquivalents.append(QString::fromLatin1(data + offset));
489 offset += strlen(data + offset) + 1;
492 return approxEquivalents;
495 QStringList KCharSelectData::unihanInfo(
const QChar& c)
497 if(!openDataFile()) {
501 const char* data = dataFile.constData();
502 const uchar* udata =
reinterpret_cast<const uchar*
>(data);
503 const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
504 const quint32 offsetEnd = dataFile.size();
508 int max = ((offsetEnd - offsetBegin) / 30) - 1;
509 quint16 unicode = c.unicode();
512 mid = (min + max) / 2;
513 const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
514 if (unicode > midUnicode)
516 else if (unicode < midUnicode)
520 for(
int i = 0; i < 7; i++) {
521 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
523 res.append(QString::fromLatin1(data + offset));
535 QChar::Category KCharSelectData::category(
const QChar& c)
537 if(!openDataFile()) {
541 ushort unicode = c.unicode();
543 const uchar* data =
reinterpret_cast<const uchar*
>(dataFile.constData());
544 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
545 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
549 int max = ((offsetEnd - offsetBegin) / 6) - 1;
553 mid = (min + max) / 2;
554 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
555 if (unicode > midUnicode)
557 else if (unicode < midUnicode)
560 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
561 const quint8 categoryCode = * (quint8 *)(data + offset);
562 return QChar::Category(categoryCode);
569 bool KCharSelectData::isPrint(
const QChar& c)
571 QChar::Category cat = category(c);
572 return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned);
575 bool KCharSelectData::isDisplayable(
const QChar& c)
580 if(c == 0xFDD0 || c == 0xFDD1)
583 return !isIgnorable(c) && isPrint(c);
586 bool KCharSelectData::isIgnorable(
const QChar& c)
603 return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
604 c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
605 (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
606 (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
607 (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
608 (c >= 0xFFF0 && c <= 0xFFF8);
611 bool KCharSelectData::isCombining(
const QChar &c)
613 return section(c) ==
i18nc(
"KCharSelect section name",
"Combining Diacritical Marks");
619 QString KCharSelectData::display(
const QChar &c,
const QFont &font)
621 if (!isDisplayable(c)) {
622 return QString(
"<b>") +
i18n(
"Non-printable") +
"</b>";
624 QString s =
QString(
"<font size=\"+4\" face=\"") + font.family() +
"\">";
625 if (isCombining(c)) {
626 s += displayCombining(c);
628 s +=
"&#" + QString::number(c.unicode()) +
';';
635 QString KCharSelectData::displayCombining(
const QChar &c)
647 QString s =
" &#" + QString::number(c.unicode()) +
"; " +
648 " (ab&#" + QString::number(c.unicode()) +
";c)";
652 QString KCharSelectData::categoryText(QChar::Category category)
655 case QChar::Other_Control:
return i18n(
"Other, Control");
656 case QChar::Other_Format:
return i18n(
"Other, Format");
657 case QChar::Other_NotAssigned:
return i18n(
"Other, Not Assigned");
658 case QChar::Other_PrivateUse:
return i18n(
"Other, Private Use");
659 case QChar::Other_Surrogate:
return i18n(
"Other, Surrogate");
660 case QChar::Letter_Lowercase:
return i18n(
"Letter, Lowercase");
661 case QChar::Letter_Modifier:
return i18n(
"Letter, Modifier");
662 case QChar::Letter_Other:
return i18n(
"Letter, Other");
663 case QChar::Letter_Titlecase:
return i18n(
"Letter, Titlecase");
664 case QChar::Letter_Uppercase:
return i18n(
"Letter, Uppercase");
665 case QChar::Mark_SpacingCombining:
return i18n(
"Mark, Spacing Combining");
666 case QChar::Mark_Enclosing:
return i18n(
"Mark, Enclosing");
667 case QChar::Mark_NonSpacing:
return i18n(
"Mark, Non-Spacing");
668 case QChar::Number_DecimalDigit:
return i18n(
"Number, Decimal Digit");
669 case QChar::Number_Letter:
return i18n(
"Number, Letter");
670 case QChar::Number_Other:
return i18n(
"Number, Other");
671 case QChar::Punctuation_Connector:
return i18n(
"Punctuation, Connector");
672 case QChar::Punctuation_Dash:
return i18n(
"Punctuation, Dash");
673 case QChar::Punctuation_Close:
return i18n(
"Punctuation, Close");
674 case QChar::Punctuation_FinalQuote:
return i18n(
"Punctuation, Final Quote");
675 case QChar::Punctuation_InitialQuote:
return i18n(
"Punctuation, Initial Quote");
676 case QChar::Punctuation_Other:
return i18n(
"Punctuation, Other");
677 case QChar::Punctuation_Open:
return i18n(
"Punctuation, Open");
678 case QChar::Symbol_Currency:
return i18n(
"Symbol, Currency");
679 case QChar::Symbol_Modifier:
return i18n(
"Symbol, Modifier");
680 case QChar::Symbol_Math:
return i18n(
"Symbol, Math");
681 case QChar::Symbol_Other:
return i18n(
"Symbol, Other");
682 case QChar::Separator_Line:
return i18n(
"Separator, Line");
683 case QChar::Separator_Paragraph:
return i18n(
"Separator, Paragraph");
684 case QChar::Separator_Space:
return i18n(
"Separator, Space");
685 default:
return i18n(
"Unknown");
694 QString simplified = needle.simplified();
695 QStringList searchStrings = splitString(needle.simplified());
697 if(simplified.length() == 1) {
699 searchStrings =
QStringList(formatCode(simplified.at(0).unicode()));
702 if (searchStrings.count() == 0) {
706 QRegExp regExp(
"^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
707 foreach(
const QString &s, searchStrings) {
708 if(regExp.exactMatch(s)) {
709 returnRes.append(regExp.cap(2).toInt(0, 16));
711 if (s.length() == 6) {
712 searchStrings[searchStrings.indexOf(s)] = regExp.cap(2);
717 int unicode = s.toInt(&ok);
718 if (ok && unicode >= 0 && unicode <= 0xFFFF) {
719 returnRes.append(unicode);
723 bool firstSubString =
true;
724 foreach(
const QString &s, searchStrings) {
726 if (firstSubString) {
728 firstSubString =
false;
730 result = result.intersect(partResult);
736 foreach(
const QChar &c, returnRes) {
737 result.remove(c.unicode());
743 foreach(
const quint16 &c, sortedResult) {
752 futureIndex.waitForFinished();
753 const Index index = futureIndex;
754 Index::const_iterator pos = index.lowerBound(s);
757 while (pos != index.constEnd() && pos.key().startsWith(s)) {
758 foreach (
const quint16 &c, pos.value()) {
772 int length = s.length();
773 while (end < length) {
774 while (end < length && (s[end].isLetterOrNumber() || s[end] ==
'+')) {
778 result.append(s.mid(start, end - start));
781 while (end < length && !(s[end].isLetterOrNumber() || s[end] ==
'+')) {
789 void KCharSelectData::appendToIndex(Index *index, quint16 unicode,
const QString& s)
792 foreach(
const QString &s, strings) {
793 (*index)[s.toLower()].append(unicode);
797 Index KCharSelectData::createIndex(
const QByteArray& dataFile)
802 const uchar* udata =
reinterpret_cast<const uchar*
>(dataFile.constData());
803 const char* data = dataFile.constData();
804 const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4);
805 const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8);
807 int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
809 for (
int pos = 0; pos <= max; pos++) {
810 const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6);
811 quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2);
812 appendToIndex(&i, unicode,
QString(data + offset + 1));
816 const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12);
817 const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16);
819 max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
821 for (
int pos = 0; pos <= max; pos++) {
822 const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27);
825 const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6);
826 quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2);
828 for (
int j = 0; j < aliasCount; j++) {
829 appendToIndex(&i, unicode, QString::fromLatin1(data + aliasOffset));
830 aliasOffset += strlen(data + aliasOffset) + 1;
834 const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11);
835 quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7);
837 for (
int j = 0; j < notesCount; j++) {
838 appendToIndex(&i, unicode, QString::fromLatin1(data + notesOffset));
839 notesOffset += strlen(data + notesOffset) + 1;
843 const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16);
844 quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12);
846 for (
int j = 0; j < apprCount; j++) {
847 appendToIndex(&i, unicode, QString::fromLatin1(data + apprOffset));
848 apprOffset += strlen(data + apprOffset) + 1;
852 const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21);
853 quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17);
855 for (
int j = 0; j < equivCount; j++) {
856 appendToIndex(&i, unicode, QString::fromLatin1(data + equivOffset));
857 equivOffset += strlen(data + equivOffset) + 1;
861 const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26);
862 quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22);
864 for (
int j = 0; j < seeAlsoCount; j++) {
865 quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset);
866 appendToIndex(&i, unicode, formatCode(seeAlso, 4,
QString()));
867 equivOffset += strlen(data + equivOffset) + 1;
QString i18n(const char *text)
static const char JAMO_V_TABLE[][4]
static const char JAMO_T_TABLE[][4]
static const char JAMO_L_TABLE[][4]
static QString locate(const char *type, const QString &filename, const KComponentData &cData=KGlobal::mainComponent())
const char * name(StandardAction id)
This will return the internal name of a given standard action.
KAction * find(const QObject *recvr, const char *slot, QObject *parent)
Initiate a 'find' request in the current document.
QString i18nc(const char *ctxt, const char *text)
KGuiItem ok()
Returns the 'Ok' gui item.
const KShortcut & end()
Goto end of the document.