24 #include <KApplication>
28 #include <KStandardDirs>
40 , m_dictPtr( static_cast<unsigned char*>( MAP_FAILED ) )
41 , m_indexPtr( static_cast<
uint32_t*>( MAP_FAILED ) )
49 munmap( static_cast<void*>( m_dictPtr ), m_dictFile.
size() );
50 munmap( static_cast<void*>( m_indexPtr ), m_indexFile.
size() );
57 bool IndexedEdictFile::buildIndex()
60 proc << KStandardDirs::findExe(
"kitengen") << m_dictFile.
fileName() << m_indexFile.
fileName();
62 proc.waitForStarted();
66 KApplication::processEvents();
67 }
while( proc.waitForFinished( 5000 ) );
70 return proc.exitStatus() == QProcess::NormalExit && proc.exitCode() == 0;
74 bool IndexedEdictFile::checkIndex()
const
81 if( 4 == m_indexFile.
read( reinterpret_cast<char*>( &indexVersionTest ), 4 ) )
83 if( indexVersionTest == dictionaryLength + indexFileVersion )
100 int IndexedEdictFile::equalOrSubstring(
const char *str1,
const char *str2 )
const
102 for(
unsigned i=0; ; ++i)
104 unsigned char c1 =
static_cast<unsigned char>( str1[ i ] );
105 unsigned char c2 =
static_cast<unsigned char>( str2[ i ] );
125 if( (
'A' <= c1 ) && ( c1 <=
'Z' ) )
129 if( (
'A' <= c2 ) && ( c2 <=
'Z' ) )
136 return (
int)c2 - (int)c1;
156 cur = ( high + low ) / 2;
157 comp = equalOrSubstring( query, lookupDictLine( cur ) );
166 }
while( high >= low && comp != 0 && ! ( high == 0 && low == 0 ) );
173 while( cur - 1 && 0 == equalOrSubstring( query,lookupDictLine( cur ) ) )
196 int indexSize = m_indexFile.
size() /
sizeof(
uint32_t );
197 int dictSize = m_dictFile.
size() /
sizeof(
unsigned char );
199 int matchLocation = findFirstMatch( searchString );
200 QByteArray currentWord = lookupDictLine( ++matchLocation );
201 if( matchLocation == 0 )
210 currentWord = lookupDictLine( ++matchLocation );
212 while( lookupDictChar( m_indexPtr[ matchLocation - 1 ] + i - 2 ) != 0x0A )
216 possibleHits.
push_back( m_indexPtr[ matchLocation - 1 ] + i - 1 );
217 }
while( matchLocation < indexSize && 0 == equalOrSubstring( searchString, currentWord ) );
219 if( possibleHits.
size() <= 0 )
224 qSort( possibleHits );
227 foreach(
uint32_t it, possibleHits )
249 #define EUC_LATIN_CHARACTER(x) (('a'<=x && x<='z')||(x==0xA4)||(x==0x80))
251 for(
unsigned i=0; ; ++i)
253 unsigned char c1 =
static_cast<unsigned char>( str1[ i ] );
254 unsigned char c2 =
static_cast<unsigned char>( str2[ i ] );
270 if( (
'A' <= c1 ) && ( c1 <=
'Z' ) )
274 if( (
'A' <= c2 ) && ( c2 <=
'Z' ) )
291 return (
int)c2 - (int)c1;
306 if( ! m_dictFile.
exists() )
311 m_dictPtr =
static_cast<unsigned char*
>( MAP_FAILED );
312 m_indexFile.
setFileName( KGlobal::dirs()->saveLocation(
"data",
"kiten/xjdx/",
true )
313 +
QFileInfo( fileName ).baseName() +
".xjdx" );
314 m_indexPtr =
static_cast<uint32_t*
>( MAP_FAILED );
315 if( ! m_indexFile.
exists() )
325 if( ! m_dictFile.
open( QIODevice::ReadOnly ) )
330 if( m_indexFile.
open( QIODevice::ReadOnly ) )
351 bool IndexedEdictFile::loadmmaps()
353 m_indexPtr =
static_cast<uint32_t*
>(
354 mmap(0, m_indexFile.
size(), PROT_READ, MAP_SHARED, m_indexFile.
handle(), 0));
355 if( m_indexPtr == static_cast<uint32_t*>( MAP_FAILED ) )
360 m_dictPtr =
static_cast<unsigned char*
>( mmap( 0
366 if( m_dictPtr == static_cast<unsigned char*>( MAP_FAILED ) )
368 munmap( static_cast<void*>( m_indexPtr ), m_indexFile.
size() );
369 m_indexPtr =
static_cast<uint32_t*
>( MAP_FAILED );
382 inline unsigned char IndexedEdictFile::lookupDictChar(
uint32_t i )
const
384 if( i > static_cast<uint32_t>( m_dictFile.
size() ) )
399 if( i > static_cast<uint32_t>( m_dictFile.
size()) )
404 uint32_t start = m_indexPtr[ i ] - 1;
406 const unsigned size = m_dictFile.
size();
409 while( pos<=size && m_dictPtr[ pos ] != 0 && m_dictPtr[ pos ] != 0x0A )
415 QByteArray retval( (
const char*)( m_dictPtr + start )
426 if( i > static_cast<uint32_t>( m_dictFile.
size() ) )
433 const unsigned max = m_dictFile.
size();
434 while( pos <= max && m_dictPtr[ pos ] != 0 && m_dictPtr[ pos ] != 0x0A )
439 QByteArray retval( (
const char*)( m_dictPtr + start )
QByteArray fromUnicode(const QString &str) const
QVector< QString > findMatches(const QString &query) const
Get everything that looks remotely like a given search string.
void setFileName(const QString &name)
bool loadFile(const QString &fileName)
Load a file, generate the index if it doesn't already exist.
qint64 read(char *data, qint64 maxSize)
#define EUC_LATIN_CHARACTER(x)
virtual bool open(QFlags< QIODevice::OpenModeFlag > mode)
IndexedEdictFile()
Create and initialize this object.
virtual qint64 size() const
bool valid() const
Test if the file was properly loaded.
QTextCodec * codecForName(const QByteArray &name)
void push_back(const T &value)
QString toUnicode(const QByteArray &a) const