00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "entryEdict.h"
00022 #include "dictFileEdict.h"
00023 #include <kdebug.h>
00024
00025 #include <klocalizedstring.h>
00026
00027 class EDICT_formatting {
00028 public:
00029 EDICT_formatting();
00030 QString nounType,verbType,adjectiveType,adverbType,ichidanType,godanType,particleType;
00031 QMultiHash<QString, QString> partOfSpeechCategories;
00032 QSet<QString> partsOfSpeech,miscMarkings,fieldOfApplication;
00033 };
00034 EDICT_formatting *EntryEDICT::m_format = NULL;
00035
00036 EntryEDICT::EntryEDICT(const QString &dict) : Entry(dict) {
00037 }
00038
00039 EntryEDICT::EntryEDICT(const QString &dict, const QString &entry) : Entry(dict) {
00040 loadEntry(entry);
00041 }
00042 Entry *EntryEDICT::clone() const { return new EntryEDICT(*this); }
00043
00044
00045
00046 #define QSTRINGLISTCHECK(x) (x==NULL?QStringList():*x)
00047
00049 QString EntryEDICT::toHTML() const
00050 {
00051
00052 QString result="<div class=\"EDICT\">";
00053 bool isCommon = (getExtendedInfoItem(QString("common")) == "1");
00054 if(isCommon)
00055 result += "<div class=\"Common\">";
00056
00057 foreach(const QString &field, QSTRINGLISTCHECK(dictFileEdict::displayFields)) {
00058 if(field == "--NewLine--") result += "<br>";
00059 else if(field == "Word/Kanji") result += HTMLWord()+' ';
00060 else if(field == "Meaning") result += HTMLMeanings()+' ';
00061 else if(field == "Reading") result += HTMLReadings()+' ';
00062 else if(field == "C") result += Common();
00063 else kDebug() << "Unknown field: " << field;
00064 }
00065 if(isCommon)
00066 result += "</div>";
00067 result += "</div>";
00068 return result;
00069 }
00070
00071
00073 QString EntryEDICT::kanjiLinkify(const QString &inString) const
00074 {
00075 QString outString;
00076 int i;
00077
00078 for(i = 0; i < inString.length(); i++)
00079 {
00080 if(isKanji(inString.at(i)))
00081 {
00082 outString += makeLink(QString(inString.at(i)));
00083 }
00084 else
00085 {
00086 outString += inString.at(i);
00087 }
00088 }
00089
00090 return outString;
00091 }
00092
00093 QString EntryEDICT::HTMLWord() const {
00094 return "<span class=\"Word\">"+
00095 ( Word.isEmpty()?kanjiLinkify(Meanings.first()):kanjiLinkify(Word) ) +
00096 "</span>";
00097 }
00098
00099 QString EntryEDICT::Common() const
00100 {
00101 if (getExtendedInfoItem(QString("common")) == "1")
00102 return "<span>Common</span>";
00103 else
00104 return QString();
00105 }
00106
00107
00108
00110
00111
00112
00113
00114 bool EntryEDICT::loadEntry(const QString &entryLine)
00115 {
00116
00117 int endOfKanjiAndKanaSection = entryLine.indexOf('/');
00118 if(endOfKanjiAndKanaSection == -1)
00119 return false;
00120 QString tempQString = entryLine.left(endOfKanjiAndKanaSection);
00121
00122 int endOfKanji = tempQString.indexOf(' ');
00123 if(endOfKanji == -1)
00124 return false;
00125 Word = tempQString.left(endOfKanji);
00126
00127
00128 Readings.clear();
00129 int startOfReading = tempQString.indexOf('[');
00130 if(startOfReading != -1)
00131 Readings.append(
00132 tempQString.left(tempQString.lastIndexOf(']')).mid(startOfReading+1));
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144 QString remainingLine = entryLine.mid(endOfKanjiAndKanaSection);
00145 remainingLine = remainingLine.left(remainingLine.lastIndexOf('/'));
00146 Meanings = remainingLine.split('/', QString::SkipEmptyParts);
00147 if(Meanings.last() == "(P)") {
00148 ExtendedInfo[QString("common")] = "1";
00149 Meanings.removeLast();
00150 }
00151
00152 QString firstWord = Meanings.first();
00153 QStringList stringTypes;
00154
00155
00156
00157 for (int i = firstWord.indexOf("("); i != -1; i = firstWord.indexOf("(", i + 1))
00158 {
00159 QString parantheses = firstWord.mid(i + 1, firstWord.indexOf(")", i) - i - 1);
00160 stringTypes += parantheses.split(',');
00161 }
00162 foreach(const QString &str, stringTypes) {
00163 if(format().partsOfSpeech.contains(str))
00164 m_typeList += str;
00165 else if(format().fieldOfApplication.contains(str))
00166 ExtendedInfo["field"] = str;
00167 else if(format().miscMarkings.contains(str))
00168 m_miscMarkings += str;
00169 }
00170
00171 return true;
00172 }
00173
00175 QString EntryEDICT::dumpEntry() const
00176 {
00177 return Word +
00178 ((Readings.count() == 0) ? " " : " [" + Readings.first() + "] ")
00179 + '/' + Meanings.join("/") + '/';
00180 }
00181
00182 const EDICT_formatting &EntryEDICT::format() {
00183 if(EntryEDICT::m_format == NULL)
00184 EntryEDICT::m_format = new EDICT_formatting;
00185 return *EntryEDICT::m_format;
00186 }
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219 EDICT_formatting::EDICT_formatting() {
00220 nounType = QString(i18nc("This must be a single word","Noun"));
00221 verbType = QString(i18nc("This must be a single word","Verb"));
00222 adjectiveType = QString(i18nc("This must be a single word","Adjective"));
00223 adverbType = QString(i18nc("This must be a single word","Adverb"));
00224 particleType = QString(i18nc("This must be a single word","Particle"));
00225 ichidanType = QString(i18nc("This is a technical japanese linguist's term... and probably should not be translated(except possibly in far-eastern languages), this must be a single word","Ichidan"));
00226 godanType = QString(i18nc("This is a technical japanese linguist's term... and probably should not be translated, this must be a single word","Godan"));
00227
00228
00229 partOfSpeechCategories.insert(nounType, "n");
00230 partOfSpeechCategories.insert(nounType, "n-adv");
00231 partOfSpeechCategories.insert(nounType, "n-pref");
00232 partOfSpeechCategories.insert(nounType, "n-suf");
00233 partOfSpeechCategories.insert(nounType, "n-t");
00234 partOfSpeechCategories.insert(nounType, "adv_n");
00235
00236 partOfSpeechCategories.insert(verbType, "v1");
00237 partOfSpeechCategories.insert(ichidanType, "v1");
00238
00239 partOfSpeechCategories.insert(verbType, "v5");
00240 partOfSpeechCategories.insert(verbType, "v5aru");
00241 partOfSpeechCategories.insert(verbType, "v5b");
00242 partOfSpeechCategories.insert(verbType, "v5g");
00243 partOfSpeechCategories.insert(verbType, "v5k");
00244 partOfSpeechCategories.insert(verbType, "v5k_s");
00245 partOfSpeechCategories.insert(verbType, "v5m");
00246 partOfSpeechCategories.insert(verbType, "v5n");
00247 partOfSpeechCategories.insert(verbType, "v5r");
00248 partOfSpeechCategories.insert(verbType, "v5r_i");
00249 partOfSpeechCategories.insert(verbType, "v5s");
00250 partOfSpeechCategories.insert(verbType, "v5t");
00251 partOfSpeechCategories.insert(verbType, "v5u");
00252 partOfSpeechCategories.insert(verbType, "v5u_s");
00253 partOfSpeechCategories.insert(verbType, "v5uru");
00254 partOfSpeechCategories.insert(godanType, "v5");
00255 partOfSpeechCategories.insert(godanType, "v5aru");
00256 partOfSpeechCategories.insert(godanType, "v5b");
00257 partOfSpeechCategories.insert(godanType, "v5g");
00258 partOfSpeechCategories.insert(godanType, "v5k");
00259 partOfSpeechCategories.insert(godanType, "v5k_s");
00260 partOfSpeechCategories.insert(godanType, "v5m");
00261 partOfSpeechCategories.insert(godanType, "v5n");
00262 partOfSpeechCategories.insert(godanType, "v5r");
00263 partOfSpeechCategories.insert(godanType, "v5r_i");
00264 partOfSpeechCategories.insert(godanType, "v5s");
00265 partOfSpeechCategories.insert(godanType, "v5t");
00266 partOfSpeechCategories.insert(godanType, "v5u");
00267 partOfSpeechCategories.insert(godanType, "v5u_s");
00268 partOfSpeechCategories.insert(godanType, "v5uru");
00269
00270 partOfSpeechCategories.insert(verbType, "iv");
00271 partOfSpeechCategories.insert(verbType, "vi");
00272 partOfSpeechCategories.insert(verbType, "vk");
00273 partOfSpeechCategories.insert(verbType, "vs");
00274 partOfSpeechCategories.insert(verbType, "vs_i");
00275 partOfSpeechCategories.insert(verbType, "vs_s");
00276 partOfSpeechCategories.insert(verbType, "vt");
00277 partOfSpeechCategories.insert(verbType, "vz");
00278
00279 partOfSpeechCategories.insert(adjectiveType,"adj");
00280 partOfSpeechCategories.insert(adjectiveType,"adj_na");
00281 partOfSpeechCategories.insert(adjectiveType,"adj_no");
00282 partOfSpeechCategories.insert(adjectiveType,"adj_pn");
00283 partOfSpeechCategories.insert(adjectiveType,"adj_t");
00284
00285 partOfSpeechCategories.insert(adverbType,"adv");
00286 partOfSpeechCategories.insert(adverbType,"adv_n");
00287 partOfSpeechCategories.insert(adverbType,"adv_to");
00288
00289 partOfSpeechCategories.insert(particleType,"prt");
00290
00291 partsOfSpeech <<"n" <<"n-adv" <<"n-pref" <<"n-suf" <<"n-t" <<"adv_n"
00292 <<"v1" <<"v1" <<"v5" <<"v5aru" <<"v5b" <<"v5g" <<"v5k" <<"v5k_s"
00293 <<"v5m" <<"v5n" <<"v5r" <<"v5r_i" <<"v5s" <<"v5t" <<"v5u" <<"v5u_s"
00294 <<"v5uru" <<"v5" <<"v5aru" <<"v5b" <<"v5g" <<"v5k" <<"v5k_s" <<"v5m"
00295 <<"v5n" <<"v5r" <<"v5r_i" <<"v5s" <<"v5t" <<"v5u" <<"v5u_s" <<"v5uru"
00296 <<"iv" <<"vi" <<"vk" <<"vs" <<"vs_i" <<"vs_s" <<"vt" <<"vz"
00297 <<"adj" <<"adj_na" <<"adj_no" <<"adj_pn" <<"adj_t"
00298 <<"adv" <<"adv_n" <<"adv_to"
00299 <<"prt";
00300
00301 fieldOfApplication<<"Buddh" <<"MA" <<"comp" <<"food" <<"geom"
00302 <<"ling" <<"math" <<"mil" <<"physics";
00303
00304 miscMarkings<<"X" <<"abbr" <<"arch" <<"ateji" <<"chn" <<"col" <<"derog"
00305 <<"eK" <<"ek" <<"fam" <<"fem" <<"gikun" <<"hon" <<"hum" <<"iK" <<"id"
00306 <<"io" <<"m-sl" <<"male" <<"male-sl" <<"ng" <<"oK" <<"obs" <<"obsc" <<"ok"
00307 <<"poet" <<"pol" <<"rare" <<"sens" <<"sl" <<"uK" <<"uk" <<"vulg";
00308 }