• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • kdeedu
  • Sitemap
  • Contact Us
 

kiten/lib

entryKanjidic.cpp

Go to the documentation of this file.
00001 /* This file is part of Kiten, a KDE Japanese Reference Tool...
00002     Copyright (C) 2001 by Jason Katz-Brown
00003               (C) 2006 by Joseph Kerian  <jkerian@gmail.com>
00004               (C) 2006 by Eric Kjeldergaard <kjelderg@gmail.com>
00005 
00006    This library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Library General Public
00008    License as published by the Free Software Foundation; either
00009    version 2 of the License, or (at your option) any later version.
00010 
00011    This library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Library General Public License for more details.
00015 
00016    You should have received a copy of the GNU Library General Public License
00017    along with this library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019    Boston, MA 02110-1301, USA.
00020 */
00021 #include "entryKanjidic.h"
00022 #include "dictFileKanjidic.h"
00023 
00024 #include <klocale.h>
00025 #include <kdebug.h>
00026 
00027 /* DISPLAY FUNCTIONS */
00028 #define QSTRINGLISTCHECK(x) (x==NULL?QStringList():*x)
00029 
00031 QString EntryKanjidic::toHTML() const
00032 {
00033     QString result="<div class=\"KanjidicBrief\">";
00034 
00035     foreach(const QString &field, QSTRINGLISTCHECK(dictFileKanjidic::displayFields)) {
00036         //kDebug() << "Display: "<<field;
00037         if(field == "--NewLine--")          result += "<br>";
00038         else if(field == "Word/Kanji")  result += HTMLWord()+' ';
00039         else if(field == "Meaning")     result += HTMLMeanings()+' ';
00040         else if(field == "Reading")     result += HTMLReadings()+' ';
00041         else if(ExtendedInfo.contains(field))   result+=HTMLExtendedInfo(field) + ' ';
00042     }
00043     result += "</div>";
00044     return result;
00045 }
00046 
00048 QString EntryKanjidic::HTMLReadings() const
00049 {
00050     QString htmlReadings;
00051     foreach(const QString &it,originalReadings)
00052     {
00053         if (it == "T1")
00054             htmlReadings += i18n("In names: ");
00055         else
00056         {
00057             if (it == "T2")
00058                 htmlReadings += i18n("As radical: ");
00059             else
00060                 htmlReadings += makeReadingLink(it) + outputListDelimiter;
00061         }
00062     }
00063     htmlReadings.truncate(htmlReadings.length() - outputListDelimiter.length());
00064                                                     // get rid of last ,
00065 
00066     return "<span class=\"Readings\">" + htmlReadings
00067         + "</span>";
00068 }
00069 
00070 QString EntryKanjidic::HTMLWord() const {
00071     return "<span class=\"Word\">" + makeLink(Word) + "</span>";
00072 }
00073 
00074 QString EntryKanjidic::HTMLExtendedInfo(const QString &field) const {
00075     //kDebug() << field;
00076     return "<span class=\"ExtendedInfo\">" + field + ": "+ExtendedInfo[field]+"</span>";
00077 }
00078 
00079 QString EntryKanjidic::makeReadingLink(const QString &inReading) const {
00080     QString reading = inReading;
00081     return "<a href=\""+reading.remove(".").remove("-")+"\">"+inReading+"</a>";
00082 }
00083 
00084 
00087 /* TODO: Error checking */
00088 bool EntryKanjidic::loadEntry(const QString &entryLine)
00089 {
00090     unsigned int length = entryLine.length();
00091 
00092     /* The loop would be a bit faster if we first grabbed the kanji (2 bytes) and then the
00093         space that follows, etc. for the fixed-space portion of the entries let's try that.
00094         First the first 2 bytes are guaranteed to be our kanji.  The 3rd byte is a space.
00095         The 4th through 7th are an ascii representation of the JIS code.  One more space
00096         Currently, kana are not detected so readings are anything that is not otherwise
00097         in the 8th position */
00098     Word = entryLine.left(1);
00099 //  QString strjis = raw.mid(2, 4);
00100 
00101     /* variables for the loop */
00102     unsigned int i;
00103     QChar ichar;
00104     QString curString;
00105 
00106     /* we would need to do these exact things ... many times so here now. */
00107 #define INCI if(i < length) \
00108     { \
00109         i++; \
00110         ichar = entryLine.at(i); \
00111     }
00112 #define LOADSTRING(stringToLoad) while(entryLine.at(i) != ' ') \
00113     { \
00114         stringToLoad += entryLine.at(i); \
00115         if(i < length) i++; \
00116         else break; \
00117     }
00118 
00119 //  kDebug() << "LOADSTRING: '" << stringToLoad << "'";
00120 
00121     /* we can start looping at 8 because we have guarantees about the initial
00122         data.  This loop is used because the kanjidic format allows the data
00123         to be in any order until the end of the line.  The format was designed
00124         such that the data can be identified by the first byte. */
00125     for (i = 7; i < length - 1; i++)
00126     {
00127         ichar = entryLine.at(i);
00128 
00129         curString = "";
00130         switch(ichar.toAscii())
00131         {
00132             case ' ':
00133                 /* as far as I can tell, there is no real rule forcing only 1 space so
00134                     there's not really any significance to them.  This block is not
00135                     reached in kanjidic itself. */
00136                 break;
00137             case 'B':
00138                 /* the radical, or busyu, number */
00139             case 'C':
00140                 /* the classical radical number, usually doesn't differ from busyu number */
00141             case 'E':
00142                 /* Henshell's "A Guide To Remembering Japanese Characters" index number */
00143             case 'F':
00144                 /* frequency ranking */
00145             case 'G':
00146                 /* grade level Jouyou 1 - 6 or 8 for common use or 9 for Jinmeiyou */
00147             case 'H':
00148                 /* number from Halpern's New Japanese-English Character Dictionary */
00149             case 'K':
00150                 /* Gakken Kanji Dictionary index */
00151             case 'L':
00152                 /* Heisig's "Remembering The Kanji" index */
00153             case 'N':
00154                 /* number from Nelson's Modern Reader's Japanese-English Character Dictionary */
00155             case 'O':
00156                 /* O'Neill's "Japanese Names" index number */
00157             case 'P':
00158                 /* SKIP code ... #-#-# format */
00159             case 'Q':
00160                 /* Four Corner codes, it seems, can be multiple though I'm tempted just to take the last one. */
00161             case 'U':
00162                 /* unicode which we are ignoring as it is found in another way */
00163             case 'V':
00164                 /* number from Haig's New Nelson Japanese-English Character Dictionary */
00165             case 'W':
00166                 /* korean reading */
00167             case 'X':
00168                 /* I don't entirely understand this field. */
00169             case 'Y':
00170                 /* Pinyin reading */
00171             case 'Z':
00172                 /* SKIP misclassifications */
00173 
00174                 /* All of the above are of the format <Char><Data> where <Char> is
00175                     exactly 1 character. */
00176                 i++;
00177                 LOADSTRING(curString);
00178                 ExtendedInfo.insert(QString(ichar), curString);
00179 
00180                 break;
00181             case 'I':
00182                 /* index codes for Spahn & Hadamitzky reference books we need the next
00183                     char to know what to do with it. */
00184                 INCI
00185                 if(ichar == 'N')
00186                 {
00187                     /* a Kanji & Kana book number */
00188                     LOADSTRING(curString)
00189                 }
00190                 else
00191                 {
00192                     /* The Kanji Dictionary number, we need the current ichar. */
00193                     LOADSTRING(curString)
00194                 }
00195                 ExtendedInfo.insert('I' + QString(ichar), curString);
00196                 break;
00197             case 'M':
00198                 /* index and page numbers for Morohashi's Daikanwajiten 2 fields possible */
00199                 INCI
00200                 if(ichar == 'N')
00201                 {
00202                     LOADSTRING(curString)
00203                     /* index number */
00204                 }
00205                 else if(ichar == 'P')
00206                 {
00207                     LOADSTRING(curString)
00208                     /* page number in volume.page format */
00209                 }
00210                 ExtendedInfo.insert('M' + QString(ichar), curString);
00211                 break;
00212             case 'S':
00213                 /* stroke count: may be multiple.  In that case, first is actual, others common
00214                     miscounts */
00215                 i++;
00216                 if(!ExtendedInfo.contains("S"))
00217                 {
00218                     LOADSTRING(curString)
00219                     ExtendedInfo.insert(QString(ichar), curString);
00220                 }
00221                 else
00222                 {
00223                     LOADSTRING(curString)
00224                     ExtendedInfo.insert('_' + QString(ichar), curString);
00225                 }
00226                 break;
00227             case 'D':
00228                 /* dictionary codes */
00229                 INCI
00230                 LOADSTRING(curString)
00231                 ExtendedInfo.insert('D' + QString(ichar), curString);
00232                 break;
00233             case '{':
00234                 /* This should be starting with the first '{' character of a meaning section.
00235                     Let us get take it to the last. */
00236                 INCI
00237                 while(ichar != '}')
00238                 {
00239                     curString += ichar;
00240                     /* sanity */
00241                     if(i < length)
00242                     {
00243                         i++;
00244                     }
00245                     else
00246                     {
00247                         break;
00248                     }
00249                     ichar = entryLine.at(i);
00250                 }
00251                 INCI
00252 //              kDebug() << "Meaning's curString: '" << curString << "'";
00253                 Meanings.append(curString);
00254                 break;
00255             case 'T':
00256                 /* a reading that is used in names for T1, radical names for T2 */
00257             case '-':
00258                 /* a reading that is only in postposition */
00259                 /* any of those 2 signals a reading is to ensue. */
00260                 LOADSTRING(curString)
00261                 originalReadings.append(curString);
00262                 curString = curString.remove('-').remove('.');
00263                 Readings.append(curString);
00264                 break;
00265             default:
00266                 /* either a character we don't address or a problem...we should ignore it */
00267 //              kDebug() << "hit default in kanji parser.  Unicode: '" << ichar.unicode() << "'";
00268 
00269                 /* This should detect unicode kana */
00270                 // Hiragana 0x3040 - 0x309F, Katakana: 0x30A0 - 0x30FF
00271                 if(0x3040 <= ichar.unicode() && ichar.unicode() <= 0x30FF)
00272                 {
00273                     LOADSTRING(curString)
00274                     originalReadings.append(curString);
00275                     curString = curString.remove('-').remove('.');
00276                     Readings.append(curString);
00277                     break;
00278                 }
00279                 /* if it's not a kana reading ... it is something unhandled ...
00280                     possibly a new field in kanjidic.  Let's treat it as the
00281                     oh-so-common <char><data> type of entry.  It could be hotly
00282                     debated what we should actually do about these. */
00283                 i++;
00284                 LOADSTRING(curString);
00285                 ExtendedInfo.insert(QString(ichar), curString);
00286 
00287                 break;
00288         }
00289     }
00290 //  kDebug() << "Parsed: '"<<Word<<"' ("<<Readings.join("^")<<") \""<<
00291 //      Meanings.join("|")<<"\ and " <<ExtendedInfo.keys() << " from :"<<entryLine<<endl;
00292 
00293     return true;
00294 }
00295 
00298 QString EntryKanjidic::dumpEntry() const
00299 {
00300     /* Loop over the ExtendedInfo to add it to the line we produce */
00301     QString dumpExtendedInfo;
00302     QHash<QString,QString>::const_iterator it;
00303     for(it=ExtendedInfo.constBegin() ; it != ExtendedInfo.constEnd(); ++it)
00304         dumpExtendedInfo += ' ' + it.key() + it.value();
00305 
00306     return Word + ' ' + Readings.join(" ") + dumpExtendedInfo;
00307 }
00308 
00309 bool EntryKanjidic::extendedItemCheck(const QString &key, const QString &value) const {
00310     if(key == "common")
00311         return !getExtendedInfoItem("G").isEmpty();
00312     return Entry::extendedItemCheck(key,value);
00313 }

kiten/lib

Skip menu "kiten/lib"
  • Main Page
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members
  • Related Pages

kdeedu

Skip menu "kdeedu"
  • kalzium
  • kanagram
  • kig
  •   lib
  • klettres
  • kstars
  • libkdeedu
  •   keduvocdocument
  •   docs
  •   src
  • parley
  •   stepcore
Generated for kdeedu by doxygen 1.5.4
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal