24 #include <KApplication>
28 #include <KStandardDirs>
40 , m_dictPtr( static_cast<unsigned char*>( MAP_FAILED ) )
41 , m_indexPtr( static_cast<
uint32_t*>( MAP_FAILED ) )
49 munmap( static_cast<void*>( m_dictPtr ), m_dictFile.size() );
50 munmap( static_cast<void*>( m_indexPtr ), m_indexFile.size() );
57 bool IndexedEdictFile::buildIndex()
60 proc << KStandardDirs::findExe(
"kitengen") << m_dictFile.fileName() << m_indexFile.fileName();
62 proc.waitForStarted();
66 KApplication::processEvents();
67 }
while( proc.waitForFinished( 5000 ) );
70 return proc.exitStatus() == QProcess::NormalExit && proc.exitCode() == 0;
74 bool IndexedEdictFile::checkIndex()
const
81 if( 4 == m_indexFile.read( reinterpret_cast<char*>( &indexVersionTest ), 4 ) )
83 if( indexVersionTest == dictionaryLength + indexFileVersion )
100 int IndexedEdictFile::equalOrSubstring(
const char *str1,
const char *str2 )
const
102 for(
unsigned i=0; ; ++i)
104 unsigned char c1 =
static_cast<unsigned char>( str1[ i ] );
105 unsigned char c2 =
static_cast<unsigned char>( str2[ i ] );
125 if( (
'A' <= c1 ) && ( c1 <=
'Z' ) )
129 if( (
'A' <= c2 ) && ( c2 <=
'Z' ) )
136 return (
int)c2 - (int)c1;
147 uint32_t IndexedEdictFile::findFirstMatch(
const QByteArray &query )
const
150 int high = m_indexFile.size() /
sizeof(
uint32_t ) - 1;
156 cur = ( high + low ) / 2;
157 comp = equalOrSubstring( query, lookupDictLine( cur ) );
166 }
while( high >= low && comp != 0 && ! ( high == 0 && low == 0 ) );
173 while( cur - 1 && 0 == equalOrSubstring( query,lookupDictLine( cur ) ) )
183 QVector<QString> results;
189 QTextCodec *codec = QTextCodec::codecForName(
"eucJP" );
195 QByteArray searchString = codec->fromUnicode( query );
196 int indexSize = m_indexFile.size() /
sizeof(
uint32_t );
197 int dictSize = m_dictFile.size() /
sizeof(
unsigned char );
199 int matchLocation = findFirstMatch( searchString );
200 QByteArray currentWord = lookupDictLine( ++matchLocation );
201 if( matchLocation == 0 )
206 QVector<uint32_t> possibleHits;
210 currentWord = lookupDictLine( ++matchLocation );
212 while( lookupDictChar( m_indexPtr[ matchLocation - 1 ] + i - 2 ) != 0x0A )
216 possibleHits.push_back( m_indexPtr[ matchLocation - 1 ] + i - 1 );
217 }
while( matchLocation < indexSize && 0 == equalOrSubstring( searchString, currentWord ) );
219 if( possibleHits.size() <= 0 )
224 qSort( possibleHits );
227 foreach(
uint32_t it, possibleHits )
232 results.push_back( codec->toUnicode( lookupFullLine( it ) ) );
249 #define EUC_LATIN_CHARACTER(x) (('a'<=x && x<='z')||(x==0xA4)||(x==0x80))
251 for(
unsigned i=0; ; ++i)
253 unsigned char c1 =
static_cast<unsigned char>( str1[ i ] );
254 unsigned char c2 =
static_cast<unsigned char>( str2[ i ] );
270 if( (
'A' <= c1 ) && ( c1 <=
'Z' ) )
274 if( (
'A' <= c2 ) && ( c2 <=
'Z' ) )
291 return (
int)c2 - (int)c1;
305 m_dictFile.setFileName( fileName );
306 if( ! m_dictFile.exists() )
311 m_dictPtr =
static_cast<unsigned char*
>( MAP_FAILED );
312 m_indexFile.setFileName( KGlobal::dirs()->saveLocation(
"data",
"kiten/xjdx/",
true )
313 + QFileInfo( fileName ).baseName() +
".xjdx" );
314 m_indexPtr =
static_cast<uint32_t*
>( MAP_FAILED );
315 if( ! m_indexFile.exists() )
325 if( ! m_dictFile.open( QIODevice::ReadOnly ) )
330 if( m_indexFile.open( QIODevice::ReadOnly ) )
351 bool IndexedEdictFile::loadmmaps()
353 m_indexPtr =
static_cast<uint32_t*
>(
354 mmap(0, m_indexFile.size(), PROT_READ, MAP_SHARED, m_indexFile.handle(), 0));
355 if( m_indexPtr == static_cast<uint32_t*>( MAP_FAILED ) )
360 m_dictPtr =
static_cast<unsigned char*
>( mmap( 0
364 , m_dictFile.handle()
366 if( m_dictPtr == static_cast<unsigned char*>( MAP_FAILED ) )
368 munmap( static_cast<void*>( m_indexPtr ), m_indexFile.size() );
369 m_indexPtr =
static_cast<uint32_t*
>( MAP_FAILED );
382 inline unsigned char IndexedEdictFile::lookupDictChar(
uint32_t i )
const
384 if( i > static_cast<uint32_t>( m_dictFile.size() ) )
397 QByteArray IndexedEdictFile::lookupDictLine(
uint32_t i )
const
399 if( i > static_cast<uint32_t>( m_dictFile.size()) )
401 return QByteArray(
"" );
404 uint32_t start = m_indexPtr[ i ] - 1;
406 const unsigned size = m_dictFile.size();
409 while( pos<=size && m_dictPtr[ pos ] != 0 && m_dictPtr[ pos ] != 0x0A )
415 QByteArray retval( (
const char*)( m_dictPtr + start )
424 QByteArray IndexedEdictFile::lookupFullLine(
uint32_t i )
const
426 if( i > static_cast<uint32_t>( m_dictFile.size() ) )
428 return QByteArray (0x0A, 1 );
433 const unsigned max = m_dictFile.size();
434 while( pos <= max && m_dictPtr[ pos ] != 0 && m_dictPtr[ pos ] != 0x0A )
439 QByteArray retval( (
const char*)( m_dictPtr + start )
QVector< QString > findMatches(const QString &query) const
Get everything that looks remotely like a given search string.
bool loadFile(const QString &fileName)
Load a file, generate the index if it doesn't already exist.
#define EUC_LATIN_CHARACTER(x)
IndexedEdictFile()
Create and initialize this object.
bool valid() const
Test if the file was properly loaded.