• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdeedu API Reference
  • KDE Home
  • Contact Us
 

kiten/lib

  • sources
  • kde-4.14
  • kdeedu
  • kiten
  • lib
  • DictKanjidic
entrykanjidic.cpp
Go to the documentation of this file.
1 /*****************************************************************************
2  * This file is part of Kiten, a KDE Japanese Reference Tool *
3  * Copyright (C) 2001 Jason Katz-Brown <jason@katzbrown.com> *
4  * Copyright (C) 2006 Joseph Kerian <jkerian@gmail.com> *
5  * Copyright (C) 2006 Eric Kjeldergaard <kjelderg@gmail.com> *
6  * Copyright (C) 2011 Daniel E. Moctezuma <democtezuma@gmail.com> *
7  * *
8  * This library is free software; you can redistribute it and/or *
9  * modify it under the terms of the GNU Library General Public *
10  * License as published by the Free Software Foundation; either *
11  * version 2 of the License, or (at your option) any later version. *
12  * *
13  * This library is distributed in the hope that it will be useful, *
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
16  * Library General Public License for more details. *
17  * *
18  * You should have received a copy of the GNU Library General Public License *
19  * along with this library; see the file COPYING.LIB. If not, write to *
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, *
21  * Boston, MA 02110-1301, USA. *
22  *****************************************************************************/
23 
24 #include "entrykanjidic.h"
25 
26 #include "dictfilekanjidic.h"
27 #include "kitenmacros.h"
28 
29 #include <KLocale>
30 #include <KDebug>
31 
32 #define QSTRINGLISTCHECK(x) (x==NULL?QStringList():*x)
33 
34 EntryKanjidic::EntryKanjidic( const EntryKanjidic &dict )
35 : Entry( dict )
36 {
37 }
38 
39 EntryKanjidic::EntryKanjidic( const QString &dict )
40 : Entry( dict )
41 {
42 }
43 
44 EntryKanjidic::EntryKanjidic( const QString &dict, const QString &entry )
45 : Entry( dict )
46 {
47  loadEntry( entry );
48 }
49 
50 QString EntryKanjidic::addReadings( const QStringList &list ) const
51 {
52  QString readings;
53  foreach( const QString &reading, list )
54  {
55  readings.append( makeReadingLink( reading ) + outputListDelimiter );
56  }
57 
58  return readings;
59 }
60 
61 Entry* EntryKanjidic::clone() const
62 {
63  return new EntryKanjidic( *this );
64 }
65 
70 QString EntryKanjidic::dumpEntry() const
71 {
72  /* Loop over the ExtendedInfo to add it to the line we produce */
73  QString dumpExtendedInfo;
74  QHash<QString,QString>::const_iterator it;
75  for( it = ExtendedInfo.constBegin(); it != ExtendedInfo.constEnd(); ++it )
76  {
77  dumpExtendedInfo += ' ' + it.key() + it.value();
78  }
79 
80  return QString( "%1 %2%3" ).arg( Word )
81  .arg( Readings.join( " " ) )
82  .arg( dumpExtendedInfo );
83 }
84 
85 bool EntryKanjidic::extendedItemCheck( const QString &key, const QString &value ) const
86 {
87  if( key == "common" )
88  {
89  return ! getExtendedInfoItem( "G" ).isEmpty();
90  }
91 
92  return Entry::extendedItemCheck( key, value );
93 }
94 
95 QString EntryKanjidic::getAsRadicalReadings() const
96 {
97  return AsRadicalReadings.join( outputListDelimiter );
98 }
99 
100 QStringList EntryKanjidic::getAsRadicalReadingsList() const
101 {
102  return AsRadicalReadings;
103 }
104 
105 QString EntryKanjidic::getDictionaryType() const
106 {
107  return KANJIDIC;
108 }
109 
110 QString EntryKanjidic::getInNamesReadings() const
111 {
112  return InNamesReadings.join( outputListDelimiter );
113 }
114 
115 QStringList EntryKanjidic::getInNamesReadingsList() const
116 {
117  return InNamesReadings;
118 }
119 
120 QString EntryKanjidic::getKanjiGrade() const
121 {
122  return getExtendedInfoItem( "G" );
123 }
124 
125 QString EntryKanjidic::getKunyomiReadings() const
126 {
127  return KunyomiReadings.join( outputListDelimiter );
128 }
129 
130 QStringList EntryKanjidic::getKunyomiReadingsList() const
131 {
132  return KunyomiReadings;
133 }
134 
135 QString EntryKanjidic::getOnyomiReadings() const
136 {
137  return OnyomiReadings.join( outputListDelimiter );
138 }
139 
140 QStringList EntryKanjidic::getOnyomiReadingsList() const
141 {
142  return OnyomiReadings;
143 }
144 
145 QString EntryKanjidic::getStrokesCount() const
146 {
147  return getExtendedInfoItem( "S" );
148 }
149 
150 QString EntryKanjidic::HTMLExtendedInfo( const QString &field ) const
151 {
152  //kDebug() << field;
153  return QString( "<span class=\"ExtendedInfo\">%1: %2</span>" )
154  .arg( field )
155  .arg( ExtendedInfo[ field ] );
156 }
157 
161 QString EntryKanjidic::HTMLReadings() const
162 {
163  QString htmlReadings;
164  htmlReadings += addReadings( originalReadings );
165 
166  if( InNamesReadings.count() > 0 )
167  {
168  htmlReadings += i18n( "In names: " );
169  htmlReadings += addReadings( InNamesReadings );
170  }
171 
172  if( AsRadicalReadings.count() > 0 )
173  {
174  htmlReadings += i18n( "As radical: " );
175  htmlReadings += addReadings( AsRadicalReadings );
176  }
177 
178  // get rid of last ,
179  htmlReadings.truncate( htmlReadings.length() - outputListDelimiter.length() );
180  return QString( "<span class=\"Readings\">%1</span>" ).arg( htmlReadings );
181 }
182 
183 QString EntryKanjidic::HTMLWord() const
184 {
185  return QString( "<span class=\"Word\">%1</span>" ).arg( makeLink( Word ) );
186 }
187 
192 /* TODO: Error checking */
193 bool EntryKanjidic::loadEntry( const QString &entryLine )
194 {
195  unsigned int length = entryLine.length();
196 
197  /* The loop would be a bit faster if we first grabbed the kanji (2 bytes) and then the
198  space that follows, etc. for the fixed-space portion of the entries let's try that.
199  First the first 2 bytes are guaranteed to be our kanji. The 3rd byte is a space.
200  The 4th through 7th are an ascii representation of the JIS code. One more space
201  Currently, kana are not detected so readings are anything that is not otherwise
202  in the 8th position. */
203  Word = entryLine.left( 1 );
204  // QString strjis = raw.mid( 2, 4 );
205 
206  /* variables for the loop */
207  QChar ichar;
208  QString curString;
209 
210  /* we would need to do these exact things ... many times so here now. */
211  #define INCI if(i < length) \
212  { \
213  i++; \
214  ichar = entryLine.at(i); \
215  }
216  #define LOADSTRING(stringToLoad) while(entryLine.at(i) != ' ') \
217  { \
218  stringToLoad += entryLine.at(i); \
219  if(i < length) i++; \
220  else break; \
221  }
222 
223  // kDebug() << "LOADSTRING: '" << stringToLoad << "'";
224 
225  /* We can start looping at 8 because we have guarantees about the initial
226  data. This loop is used because the kanjidic format allows the data
227  to be in any order until the end of the line. The format was designed
228  such that the data can be identified by the first byte. */
229  for ( unsigned int i = 7; i < length - 1; i++ )
230  {
231  ichar = entryLine.at( i );
232 
233  curString = "";
234  switch( ichar.toAscii() )
235  {
236  case ' ':
237  /* as far as I can tell, there is no real rule forcing only 1 space so
238  there's not really any significance to them. This block is not
239  reached in kanjidic itself. */
240  break;
241  case 'B':
242  /* the radical, or busyu, number */
243  case 'C':
244  /* the classical radical number, usually doesn't differ from busyu number */
245  case 'E':
246  /* Henshell's "A Guide To Remembering Japanese Characters" index number */
247  case 'F':
248  /* frequency ranking */
249  case 'G':
250  /* grade level Jouyou 1 - 6 or 8 for common use or 9 for Jinmeiyou */
251  case 'H':
252  /* number from Halpern's New Japanese-English Character Dictionary */
253  case 'K':
254  /* Gakken Kanji Dictionary index */
255  case 'L':
256  /* Heisig's "Remembering The Kanji" index */
257  case 'N':
258  /* number from Nelson's Modern Reader's Japanese-English Character Dictionary */
259  case 'O':
260  /* O'Neill's "Japanese Names" index number */
261  case 'P':
262  /* SKIP code ... #-#-# format */
263  case 'Q':
264  /* Four Corner codes, it seems, can be multiple though I'm tempted just to take the last one. */
265  case 'U':
266  /* unicode which we are ignoring as it is found in another way */
267  case 'V':
268  /* number from Haig's New Nelson Japanese-English Character Dictionary */
269  case 'W':
270  /* korean reading */
271  case 'X':
272  /* I don't entirely understand this field. */
273  case 'Y':
274  /* Pinyin reading */
275  case 'Z':
276  /* SKIP misclassifications */
277 
278  /* All of the above are of the format <Char><Data> where <Char> is
279  exactly 1 character. */
280  i++;
281  LOADSTRING( curString );
282  ExtendedInfo.insert( QString( ichar ), curString );
283  break;
284  case 'I':
285  /* index codes for Spahn & Hadamitzky reference books we need the next
286  char to know what to do with it. */
287  INCI
288  if( ichar == 'N' )
289  {
290  /* a Kanji & Kana book number */
291  LOADSTRING( curString )
292  }
293  else
294  {
295  /* The Kanji Dictionary number, we need the current ichar. */
296  LOADSTRING( curString )
297  }
298  ExtendedInfo.insert( 'I' + QString( ichar ), curString );
299  break;
300  case 'M':
301  /* index and page numbers for Morohashi's Daikanwajiten 2 fields possible */
302  INCI
303  if( ichar == 'N' )
304  {
305  LOADSTRING( curString )
306  /* index number */
307  }
308  else if( ichar == 'P' )
309  {
310  LOADSTRING( curString )
311  /* page number in volume.page format */
312  }
313  ExtendedInfo.insert( 'M' + QString( ichar ), curString );
314  break;
315  case 'S':
316  /* stroke count: may be multiple. In that case, first is actual, others common
317  miscounts */
318  i++;
319  if( ! ExtendedInfo.contains( "S" ) )
320  {
321  LOADSTRING( curString )
322  ExtendedInfo.insert( QString( ichar ), curString );
323  }
324  else
325  {
326  LOADSTRING( curString )
327  ExtendedInfo.insert( '_' + QString( ichar ), curString );
328  }
329  break;
330  case 'D':
331  /* dictionary codes */
332  INCI
333  LOADSTRING( curString )
334  ExtendedInfo.insert( 'D' + QString( ichar ), curString );
335  break;
336  case '{':
337  /* This should be starting with the first '{' character of a meaning section.
338  Let us get take it to the last. */
339  INCI
340  while( ichar != '}' )
341  {
342  curString += ichar;
343  /* sanity */
344  if( i < length )
345  {
346  i++;
347  }
348  else
349  {
350  break;
351  }
352  ichar = entryLine.at( i );
353  }
354  INCI
355 // kDebug() << "Meaning's curString: '" << curString << "'";
356  Meanings.append( curString );
357  break;
358  case 'T': /* a reading that is used in names for T1, radical names for T2 */
359  {
360  i++;
361  LOADSTRING( curString )
362  // Get the type number (1 for T1, 2 for T2).
363  int type = curString.toInt();
364  bool finished = false;
365  while( ! finished )
366  {
367  // Skip all whitespaces.
368  INCI
369  while( ichar == ' ' )
370  {
371  INCI
372  }
373  // Check if the current character is Kana.
374  if( 0x3040 <= ichar.unicode() && ichar.unicode() <= 0x30FF )
375  {
376  // Reset our variable and load it with
377  // all available kana until we find a whitespace.
378  curString = "";
379  LOADSTRING( curString );
380  switch( type )
381  {
382  case 1: // Special reading used in names.
383  InNamesReadings.append( curString );
384  break;
385  case 2: // Reading as radical.
386  AsRadicalReadings.append( curString );
387  break;
388  }
389  }
390  else
391  {
392  // There are not more kana characters,
393  // so we finish this loop for now.
394  finished = true;
395  }
396  }
397  // Now 'i' points to a '{' character. We decrease its value
398  // so in the next loop we can reach the "case '{'" section.
399  i--;
400  }
401  break;
402  case '-':
403  /* a reading that is only in postposition */
404  /* any of those 2 signals a reading is to ensue. */
405  LOADSTRING( curString )
406  originalReadings.append( curString );
407 
408  // If it is Hiragana (Kunyomi)
409  if( 0x3040 <= ichar.unicode() && ichar.unicode() <= 0x309F )
410  {
411  KunyomiReadings.append( curString );
412  }
413  // If it is Katakana (Onyomi)
414  else if( 0x30A0 <= ichar.unicode() && ichar.unicode() <= 0x30FF )
415  {
416  OnyomiReadings.append( curString );
417  }
418 
419  curString = curString.remove( '-' ).remove( '.' );
420  Readings.append( curString );
421  break;
422  default:
423  /* either a character we don't address or a problem...we should ignore it */
424 // kDebug() << "hit default in kanji parser. Unicode: '" << ichar.unicode() << "'";
425 
426  /* This should detect unicode kana */
427  // Hiragana 0x3040 - 0x309F, Katakana: 0x30A0 - 0x30FF
428  if( 0x3040 <= ichar.unicode() && ichar.unicode() <= 0x30FF )
429  {
430  LOADSTRING( curString )
431  originalReadings.append( curString );
432 
433  // If it is Hiragana (Kunyomi)
434  if( 0x3040 <=ichar.unicode() && ichar.unicode() <= 0x309F )
435  {
436  KunyomiReadings.append( curString );
437  }
438  // If it is Katakana (Onyomi)
439  else if( 0x30A0 <= ichar.unicode() && ichar.unicode() <= 0x30FF )
440  {
441  OnyomiReadings.append( curString );
442  }
443 
444  curString = curString.remove( '-' ).remove( '.' );
445  Readings.append( curString );
446  break;
447  }
448  /* if it's not a kana reading ... it is something unhandled ...
449  possibly a new field in kanjidic. Let's treat it as the
450  oh-so-common <char><data> type of entry. It could be hotly
451  debated what we should actually do about these. */
452  i++;
453  LOADSTRING( curString );
454  ExtendedInfo.insert( QString( ichar ), curString );
455 
456  break;
457  }
458  }
459 // kDebug() << "Parsed: '"<<Word<<"' ("<<Readings.join("^")<<") \""<<
460 // Meanings.join("|")<<"\ and " <<ExtendedInfo.keys() << " from :"<<entryLine<<endl;
461 
462  return true;
463 }
464 
465 QString EntryKanjidic::makeReadingLink( const QString &inReading ) const
466 {
467  QString reading = inReading;
468  return QString( "<a href=\"%1\">%2</a>" ).arg( reading.remove( "." ).remove( "-" ) )
469  .arg( inReading );
470 }
471 
475 QString EntryKanjidic::toHTML() const
476 {
477  QString result = "<div class=\"KanjidicBrief\">";
478 
479  foreach( const QString &field, QSTRINGLISTCHECK( DictFileKanjidic::displayFields ) )
480  {
481  //kDebug() << "Display: "<<field;
482  if( field == "--NewLine--" ) result += "<br>";
483  else if( field == "Word/Kanji" ) result += HTMLWord() + ' ';
484  else if( field == "Meaning" ) result += HTMLMeanings() + ' ';
485  else if( field == "Reading" ) result += HTMLReadings() + ' ';
486  else if( ExtendedInfo.contains( field ) ) result += HTMLExtendedInfo( field ) + ' ';
487  }
488 
489  result += "</div>";
490  return result;
491 }
EntryKanjidic::OnyomiReadings
QStringList OnyomiReadings
Definition: entrykanjidic.h:67
QString::append
QString & append(QChar ch)
QHash::insert
iterator insert(const Key &key, const T &value)
EntryKanjidic::AsRadicalReadings
QStringList AsRadicalReadings
Definition: entrykanjidic.h:64
QString::truncate
void truncate(int position)
QHash::key
const Key key(const T &value) const
QChar::toAscii
char toAscii() const
EntryKanjidic::getInNamesReadingsList
QStringList getInNamesReadingsList() const
Definition: entrykanjidic.cpp:115
INCI
#define INCI
Entry::extendedItemCheck
virtual bool extendedItemCheck(const QString &key, const QString &value) const
Simple accessor.
Definition: entry.cpp:83
Entry
The Entry class is a generic base class for each particular entry in a given dictionary.
Definition: entry.h:44
EntryKanjidic::getInNamesReadings
QString getInNamesReadings() const
Definition: entrykanjidic.cpp:110
QChar
EntryKanjidic::extendedItemCheck
virtual bool extendedItemCheck(const QString &key, const QString &value) const
Simple accessor.
Definition: entrykanjidic.cpp:85
dictfilekanjidic.h
EntryKanjidic::getKunyomiReadingsList
QStringList getKunyomiReadingsList() const
Definition: entrykanjidic.cpp:130
EntryKanjidic::getOnyomiReadingsList
QStringList getOnyomiReadingsList() const
Definition: entrykanjidic.cpp:140
QSTRINGLISTCHECK
#define QSTRINGLISTCHECK(x)
Definition: entrykanjidic.cpp:32
EntryKanjidic::makeReadingLink
virtual QString makeReadingLink(const QString &inReading) const
Definition: entrykanjidic.cpp:465
EntryKanjidic::HTMLExtendedInfo
virtual QString HTMLExtendedInfo(const QString &field) const
Definition: entrykanjidic.cpp:150
EntryKanjidic::getAsRadicalReadings
QString getAsRadicalReadings() const
Definition: entrykanjidic.cpp:95
QStringList::join
QString join(const QString &separator) const
Entry::HTMLMeanings
virtual QString HTMLMeanings() const
Return and HTML version of a meaning list.
Definition: entry.cpp:158
QString::remove
QString & remove(int position, int n)
EntryKanjidic::getDictionaryType
virtual QString getDictionaryType() const
Get the dictionary type (e.g.
Definition: entrykanjidic.cpp:105
EntryKanjidic::originalReadings
QStringList originalReadings
Definition: entrykanjidic.h:68
entrykanjidic.h
EntryKanjidic::getStrokesCount
QString getStrokesCount() const
Definition: entrykanjidic.cpp:145
Entry::Readings
QStringList Readings
The Readings (usually kana) that match this entry.
Definition: entry.h:203
EntryKanjidic::HTMLReadings
virtual QString HTMLReadings() const
Prepares Readings for output as HTML.
Definition: entrykanjidic.cpp:161
Entry::makeLink
virtual QString makeLink(const QString &entryString) const
Handy function for generating a link from a given QString.
Definition: entry.cpp:289
KANJIDIC
#define KANJIDIC
Definition: kitenmacros.h:25
DictFileKanjidic::displayFields
static QStringList * displayFields
Definition: dictfilekanjidic.h:64
QList::count
int count(const T &value) const
Entry::getExtendedInfoItem
QString getExtendedInfoItem(const QString &x) const
Simple accessor.
Definition: entry.cpp:150
QList::append
void append(const T &value)
QHash::constEnd
const_iterator constEnd() const
QHash
QString::toInt
int toInt(bool *ok, int base) const
EntryKanjidic::dumpEntry
virtual QString dumpEntry() const
This reproduces a kanjidic-formatted line from the Entry.
Definition: entrykanjidic.cpp:70
QString::isEmpty
bool isEmpty() const
Entry::Word
QString Word
The Word (usually containing kanji) that matches this entry.
Definition: entry.h:193
QString
EntryKanjidic::HTMLWord
virtual QString HTMLWord() const
Return and HTML version of a word.
Definition: entrykanjidic.cpp:183
QChar::unicode
ushort unicode() const
EntryKanjidic::clone
Entry * clone() const
A clone method, this should just implement "return new EntrySubClass(*this)".
Definition: entrykanjidic.cpp:61
QStringList
QHash::value
const T value(const Key &key) const
EntryKanjidic::toHTML
virtual QString toHTML() const
Returns a HTML version of an Entry.
Definition: entrykanjidic.cpp:475
EntryKanjidic::InNamesReadings
QStringList InNamesReadings
Definition: entrykanjidic.h:65
EntryKanjidic::getKunyomiReadings
QString getKunyomiReadings() const
Definition: entrykanjidic.cpp:125
QHash::constBegin
const_iterator constBegin() const
EntryKanjidic::getAsRadicalReadingsList
QStringList getAsRadicalReadingsList() const
Definition: entrykanjidic.cpp:100
EntryKanjidic::getKanjiGrade
QString getKanjiGrade() const
Definition: entrykanjidic.cpp:120
Entry::Meanings
QStringList Meanings
The Meanings that match this entry.
Definition: entry.h:198
LOADSTRING
#define LOADSTRING(stringToLoad)
QString::at
const QChar at(int position) const
kitenmacros.h
EntryKanjidic::KunyomiReadings
QStringList KunyomiReadings
Definition: entrykanjidic.h:66
QString::length
int length() const
EntryKanjidic
Definition: entrykanjidic.h:34
EntryKanjidic::getOnyomiReadings
QString getOnyomiReadings() const
Definition: entrykanjidic.cpp:135
QString::left
QString left(int n) const
QHash::contains
bool contains(const Key &key) const
EntryKanjidic::EntryKanjidic
EntryKanjidic(const EntryKanjidic &dict)
Definition: entrykanjidic.cpp:34
EntryKanjidic::loadEntry
virtual bool loadEntry(const QString &entryLine)
Fill the fields of our Entry object apprpriate to the given entry line from Kanjidic.
Definition: entrykanjidic.cpp:193
Entry::ExtendedInfo
QHash< QString, QString > ExtendedInfo
A hash of extended information.
Definition: entry.h:207
QString::arg
QString arg(qlonglong a, int fieldWidth, int base, const QChar &fillChar) const
Entry::outputListDelimiter
QString outputListDelimiter
The delimiter for lists...
Definition: entry.h:216
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:16:38 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

kiten/lib

Skip menu "kiten/lib"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdeedu API Reference

Skip menu "kdeedu API Reference"
  • Analitza
  •     lib
  • kalgebra
  • kalzium
  •   libscience
  • kanagram
  • kig
  •   lib
  • klettres
  • marble
  • parley
  • rocs
  •   App
  •   RocsCore
  •   VisualEditor
  •   stepcore

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal