• Skip to content
  • Skip to link menu
KDE 4.5 API Reference
  • KDE API Reference
  • kdeedu
  • Sitemap
  • Contact Us
 

kiten/lib

entryEdict.cpp

Go to the documentation of this file.
00001 /* This file is part of Kiten, a KDE Japanese Reference Tool...
00002    Copyright (C) 2006 Joseph Kerian <jkerian@gmail.com>
00003               (C) 2006  Eric Kjeldergaard <kjelderg@gmail.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public License
00016    along with this library; see the file COPYING.LIB.  If not, write to
00017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018    Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include "entryEdict.h"
00022 #include "dictFileEdict.h"
00023 #include <kdebug.h>
00024 
00025 #include <klocalizedstring.h>
00026 
00027 class EDICT_formatting {
00028 public:
00029     EDICT_formatting();
00030     QString nounType,verbType,adjectiveType,adverbType,ichidanType,godanType,particleType;
00031     QMultiHash<QString, QString> partOfSpeechCategories;
00032     QSet<QString> partsOfSpeech,miscMarkings,fieldOfApplication;
00033 };
00034 EDICT_formatting *EntryEDICT::m_format = NULL;
00035 
00036 EntryEDICT::EntryEDICT(const QString &dict) : Entry(dict) {
00037 }
00038 
00039 EntryEDICT::EntryEDICT(const QString &dict, const QString &entry) : Entry(dict) {
00040     loadEntry(entry);
00041 }
00042 Entry *EntryEDICT::clone() const { return new EntryEDICT(*this); }
00043 
00044 /* DISPLAY FUNCTIONS */
00045 
00046 #define QSTRINGLISTCHECK(x) (x==NULL?QStringList():*x)
00047 
00049 QString EntryEDICT::toHTML() const
00050 {
00051 
00052     QString result="<div class=\"EDICT\">";
00053     bool isCommon = (getExtendedInfoItem(QString("common")) == "1");
00054     if(isCommon)
00055         result += "<div class=\"Common\">";
00056 
00057     foreach(const QString &field, QSTRINGLISTCHECK(dictFileEdict::displayFields)) {
00058         if(field == "--NewLine--")          result += "<br>";
00059         else if(field == "Word/Kanji")  result += HTMLWord()+' ';
00060         else if(field == "Meaning")     result += HTMLMeanings()+' ';
00061         else if(field == "Reading")     result += HTMLReadings()+' ';
00062         else if(field == "C")           result += Common();
00063         else kDebug() << "Unknown field: " << field;
00064     }
00065     if(isCommon)
00066         result += "</div>";
00067     result += "</div>";
00068     return result;
00069 }
00070 
00071 
00073 QString EntryEDICT::kanjiLinkify(const QString &inString) const
00074 {
00075     QString outString;
00076     int i;
00077 
00078     for(i = 0; i < inString.length(); i++)
00079     {
00080         if(isKanji(inString.at(i)))
00081         {
00082             outString += makeLink(QString(inString.at(i)));
00083         }
00084         else
00085         {
00086             outString += inString.at(i);
00087         }
00088     }
00089 
00090     return outString;
00091 }
00092 
00093 QString EntryEDICT::HTMLWord() const {
00094     return "<span class=\"Word\">"+
00095         ( Word.isEmpty()?kanjiLinkify(Meanings.first()):kanjiLinkify(Word) ) +
00096         "</span>";
00097 }
00098 
00099 QString EntryEDICT::Common() const
00100 {
00101     if (getExtendedInfoItem(QString("common")) == "1")
00102         return "<span>Common</span>";
00103     else
00104         return QString();
00105 }
00106 
00107 /* DATA LOADING FUNCTIONS */
00108 
00110 /* The format is basically: KANJI [KANA] /(general information) gloss/gloss/.../
00111  * Note that they can rudely place more (general information) in gloss's that are
00112  * not the first one */
00113 
00114 bool EntryEDICT::loadEntry(const QString &entryLine)
00115 {
00116     /* Set tempQString to be the reading and word portion of the entryLine */
00117     int endOfKanjiAndKanaSection = entryLine.indexOf('/');
00118     if(endOfKanjiAndKanaSection == -1)
00119         return false;
00120     QString tempQString = entryLine.left(endOfKanjiAndKanaSection);
00121     /* The actual Word is the beginning of the line */
00122     int endOfKanji = tempQString.indexOf(' ');
00123     if(endOfKanji == -1)
00124         return false;
00125     Word = tempQString.left(endOfKanji);
00126 
00127     /* The Reading is either Word or encased in '[' */
00128     Readings.clear();
00129     int startOfReading = tempQString.indexOf('[');
00130     if(startOfReading != -1)  // This field is optional for EDICT (and kiten)
00131         Readings.append(
00132                 tempQString.left(tempQString.lastIndexOf(']')).mid(startOfReading+1));
00133     /* TODO: use this code or not?
00134      * app does not handle only reading and no word entries 
00135      * very well so far
00136     else
00137     {
00138         Readings.append(Word);
00139         Word.clear();
00140     }
00141     */
00142 
00143     /* set Meanings to be all of the meanings in the definition */
00144     QString remainingLine = entryLine.mid(endOfKanjiAndKanaSection);
00145     remainingLine = remainingLine.left(remainingLine.lastIndexOf('/')); //Trim to last '/'
00146     Meanings = remainingLine.split('/', QString::SkipEmptyParts);
00147     if(Meanings.last() == "(P)") {
00148         ExtendedInfo[QString("common")] = "1";
00149         Meanings.removeLast();
00150     }
00151 
00152     QString firstWord = Meanings.first();
00153     QStringList stringTypes;
00154 
00155     //Pulls the various types out
00156     //TODO: Remove them from the original string
00157     for (int i = firstWord.indexOf("("); i != -1; i = firstWord.indexOf("(", i + 1))
00158     {
00159         QString parantheses = firstWord.mid(i + 1, firstWord.indexOf(")", i) - i - 1);
00160         stringTypes += parantheses.split(',');
00161     }
00162     foreach(const QString &str, stringTypes) {
00163         if(format().partsOfSpeech.contains(str))
00164             m_typeList += str;
00165         else if(format().fieldOfApplication.contains(str))
00166             ExtendedInfo["field"] = str;
00167         else if(format().miscMarkings.contains(str))
00168             m_miscMarkings += str;
00169     }
00170 
00171     return true;
00172 }
00173 
00175 QString EntryEDICT::dumpEntry() const
00176 {
00177     return Word +
00178         ((Readings.count() == 0) ? " " : " [" + Readings.first() + "] ")
00179         + '/' + Meanings.join("/") + '/';
00180 }
00181 
00182 const EDICT_formatting &EntryEDICT::format() {
00183     if(EntryEDICT::m_format == NULL)
00184         EntryEDICT::m_format = new EDICT_formatting;
00185     return *EntryEDICT::m_format;
00186 }
00187 
00188 /* TYPES RELATED FUNCTIONS */
00189 
00190 /* The basic idea of this function is to provide a mapping from possible entry types to
00191     possible things the user could enter. Then our code for the matching entry can simply
00192     use this mapping to determine if a given entry could be understood to match the user's
00193     input.
00194 
00195     There are two basic approaches we could take:
00196         Convert the user's entry into a list of types, see if the Entry type matches any of
00197             the conversions from this list. (the list comparisons will be MANY enums)
00198         Convert our Entry types to a list of acceptable string aliases. Then compare the
00199             user's input to this list. (the list will be a relatively small list of strings)
00200 
00201     My gut instinct is that the first case (comparison of a largish list of ints) will be
00202         faster, and so that's the one that's implemented here.
00203 
00204     The following are the minimum list of case-insensitive aliases that the user could enter:
00205     noun
00206     verb:
00207         ichidan
00208         godan
00209     adjective
00210     adverb
00211     particle
00212 
00213     Note that our File Parser will also expand to general cases, if not included already:
00214         For Example: v5aru -> v5aru,v5 (so that a search for "godan" will find it)
00215     Also note that the basic edict dictionary does not separate ikeiyoushi out from the
00216         category "adj", so further breakdown of the "adjective" type would be misleading
00217 */
00218 
00219 EDICT_formatting::EDICT_formatting() {
00220     nounType = QString(i18nc("This must be a single word","Noun"));
00221     verbType = QString(i18nc("This must be a single word","Verb"));
00222     adjectiveType = QString(i18nc("This must be a single word","Adjective"));
00223     adverbType = QString(i18nc("This must be a single word","Adverb"));
00224     particleType = QString(i18nc("This must be a single word","Particle"));
00225     ichidanType = QString(i18nc("This is a technical japanese linguist's term... and probably should not be translated(except possibly in far-eastern languages), this must be a single word","Ichidan"));
00226     godanType = QString(i18nc("This is a technical japanese linguist's term... and probably should not be translated, this must be a single word","Godan"));
00227 
00228         //Nouns
00229     partOfSpeechCategories.insert(nounType,  "n");
00230     partOfSpeechCategories.insert(nounType,  "n-adv");
00231     partOfSpeechCategories.insert(nounType,  "n-pref");
00232     partOfSpeechCategories.insert(nounType,  "n-suf");
00233     partOfSpeechCategories.insert(nounType,  "n-t");
00234     partOfSpeechCategories.insert(nounType,  "adv_n");
00235     //Ichidan Verbs
00236     partOfSpeechCategories.insert(verbType,  "v1");
00237     partOfSpeechCategories.insert(ichidanType,  "v1");
00238     //Godan Verbs
00239     partOfSpeechCategories.insert(verbType,  "v5");
00240     partOfSpeechCategories.insert(verbType,  "v5aru");
00241     partOfSpeechCategories.insert(verbType,  "v5b");
00242     partOfSpeechCategories.insert(verbType,  "v5g");
00243     partOfSpeechCategories.insert(verbType,  "v5k");
00244     partOfSpeechCategories.insert(verbType,  "v5k_s");
00245     partOfSpeechCategories.insert(verbType,  "v5m");
00246     partOfSpeechCategories.insert(verbType,  "v5n");
00247     partOfSpeechCategories.insert(verbType,  "v5r");
00248     partOfSpeechCategories.insert(verbType,  "v5r_i");
00249     partOfSpeechCategories.insert(verbType,  "v5s");
00250     partOfSpeechCategories.insert(verbType,  "v5t");
00251     partOfSpeechCategories.insert(verbType,  "v5u");
00252     partOfSpeechCategories.insert(verbType,  "v5u_s");
00253     partOfSpeechCategories.insert(verbType,  "v5uru");
00254     partOfSpeechCategories.insert(godanType,  "v5");
00255     partOfSpeechCategories.insert(godanType,  "v5aru");
00256     partOfSpeechCategories.insert(godanType,  "v5b");
00257     partOfSpeechCategories.insert(godanType,  "v5g");
00258     partOfSpeechCategories.insert(godanType,  "v5k");
00259     partOfSpeechCategories.insert(godanType,  "v5k_s");
00260     partOfSpeechCategories.insert(godanType,  "v5m");
00261     partOfSpeechCategories.insert(godanType,  "v5n");
00262     partOfSpeechCategories.insert(godanType,  "v5r");
00263     partOfSpeechCategories.insert(godanType,  "v5r_i");
00264     partOfSpeechCategories.insert(godanType,  "v5s");
00265     partOfSpeechCategories.insert(godanType,  "v5t");
00266     partOfSpeechCategories.insert(godanType,  "v5u");
00267     partOfSpeechCategories.insert(godanType,  "v5u_s");
00268     partOfSpeechCategories.insert(godanType,  "v5uru");
00269     //Other Verbs
00270     partOfSpeechCategories.insert(verbType,  "iv");
00271     partOfSpeechCategories.insert(verbType,  "vi");
00272     partOfSpeechCategories.insert(verbType,  "vk");
00273     partOfSpeechCategories.insert(verbType,  "vs");
00274     partOfSpeechCategories.insert(verbType,  "vs_i");
00275     partOfSpeechCategories.insert(verbType,  "vs_s");
00276     partOfSpeechCategories.insert(verbType,  "vt");
00277     partOfSpeechCategories.insert(verbType,  "vz");
00278     //Adjectives
00279     partOfSpeechCategories.insert(adjectiveType,"adj");
00280     partOfSpeechCategories.insert(adjectiveType,"adj_na");
00281     partOfSpeechCategories.insert(adjectiveType,"adj_no");
00282     partOfSpeechCategories.insert(adjectiveType,"adj_pn");
00283     partOfSpeechCategories.insert(adjectiveType,"adj_t");
00284     //Adverbs
00285     partOfSpeechCategories.insert(adverbType,"adv");
00286     partOfSpeechCategories.insert(adverbType,"adv_n");
00287     partOfSpeechCategories.insert(adverbType,"adv_to");
00288     //Particle
00289     partOfSpeechCategories.insert(particleType,"prt");
00290 
00291     partsOfSpeech <<"n" <<"n-adv" <<"n-pref" <<"n-suf" <<"n-t" <<"adv_n"
00292      <<"v1" <<"v1" <<"v5" <<"v5aru" <<"v5b" <<"v5g" <<"v5k" <<"v5k_s"
00293      <<"v5m" <<"v5n" <<"v5r" <<"v5r_i" <<"v5s" <<"v5t" <<"v5u" <<"v5u_s"
00294      <<"v5uru" <<"v5" <<"v5aru" <<"v5b" <<"v5g" <<"v5k" <<"v5k_s" <<"v5m"
00295      <<"v5n" <<"v5r" <<"v5r_i" <<"v5s" <<"v5t" <<"v5u" <<"v5u_s" <<"v5uru"
00296      <<"iv" <<"vi" <<"vk" <<"vs" <<"vs_i" <<"vs_s" <<"vt" <<"vz"
00297      <<"adj" <<"adj_na" <<"adj_no" <<"adj_pn" <<"adj_t"
00298      <<"adv" <<"adv_n" <<"adv_to"
00299      <<"prt";
00300         //Field of Application terms
00301     fieldOfApplication<<"Buddh" <<"MA" <<"comp" <<"food" <<"geom"
00302         <<"ling" <<"math" <<"mil" <<"physics";
00303         //Miscellaneous Markings (in EDICT terms)
00304     miscMarkings<<"X" <<"abbr" <<"arch" <<"ateji" <<"chn" <<"col" <<"derog"
00305         <<"eK" <<"ek" <<"fam" <<"fem" <<"gikun" <<"hon" <<"hum" <<"iK" <<"id"
00306         <<"io" <<"m-sl" <<"male" <<"male-sl" <<"ng" <<"oK" <<"obs" <<"obsc" <<"ok"
00307         <<"poet" <<"pol" <<"rare" <<"sens" <<"sl" <<"uK" <<"uk" <<"vulg";
00308 }

kiten/lib

Skip menu "kiten/lib"
  • Main Page
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members
  • Related Pages

kdeedu

Skip menu "kdeedu"
  •     lib
  • kalzium
  • kanagram
  • kig
  •   lib
  • klettres
  • kstars
  • libkdeedu
  •   keduvocdocument
  • marble
  • parley
  •   stepcore
Generated for kdeedu by doxygen 1.5.9-20090814
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal