• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • sources
  • kde-4.14
  • kdelibs
  • kdecore
  • sycoca
ksycocadict.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE libraries
2  * Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License version 2 as published by the Free Software Foundation;
7  *
8  * This library is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * Library General Public License for more details.
12  *
13  * You should have received a copy of the GNU Library General Public License
14  * along with this library; see the file COPYING.LIB. If not, write to
15  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
16  * Boston, MA 02110-1301, USA.
17  **/
18 
19 #include "ksycocadict_p.h"
20 #include <kservice.h>
21 #include "ksycocaentry.h"
22 #include "ksycoca.h"
23 #include "kdebug.h"
24 
25 #include <QtCore/QBitArray>
26 
27 namespace
28 {
29 struct string_entry {
30  string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
31  : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
32  {}
33  uint hash;
34  const int length;
35  const QString keyStr;
36  const QChar * const key; // always points to keyStr.unicode(); just an optimization
37  const KSycocaEntry::Ptr payload;
38 };
39 }
40 
41 class KSycocaDictStringList : public QList<string_entry*>
42 {
43 public:
44  ~KSycocaDictStringList() {
45  qDeleteAll(*this);
46  }
47 };
48 
49 class KSycocaDict::Private
50 {
51 public:
52  Private()
53  : stringlist( 0 ),
54  stream( 0 ),
55  offset( 0 )
56  {
57  }
58 
59  ~Private()
60  {
61  delete stringlist;
62  }
63 
64  // Helper for find_string and findMultiString
65  qint32 offsetForKey(const QString& key) const;
66 
67  // Calculate hash - can be used during loading and during saving.
68  quint32 hashKey(const QString & key) const;
69 
70  KSycocaDictStringList *stringlist;
71  QDataStream *stream;
72  qint64 offset;
73  quint32 hashTableSize;
74  QList<qint32> hashList;
75 };
76 
77 KSycocaDict::KSycocaDict()
78  : d( new Private )
79 {
80 }
81 
82 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
83  : d( new Private )
84 {
85  d->stream = str;
86  d->offset = offset;
87 
88  quint32 test1, test2;
89  str->device()->seek(offset);
90  (*str) >> test1 >> test2;
91  if ((test1 > 0x000fffff) || (test2 > 1024))
92  {
93  KSycoca::flagError();
94  d->hashTableSize = 0;
95  d->offset = 0;
96  return;
97  }
98 
99  str->device()->seek(offset);
100  (*str) >> d->hashTableSize;
101  (*str) >> d->hashList;
102  d->offset = str->device()->pos(); // Start of hashtable
103 }
104 
105 KSycocaDict::~KSycocaDict()
106 {
107  delete d;
108 }
109 
110 void
111 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
112 {
113  if (key.isEmpty()) return; // Not allowed (should never happen)
114  if (!payload) return; // Not allowed!
115  if (!d->stringlist)
116  {
117  d->stringlist = new KSycocaDictStringList;
118  }
119 
120  string_entry *entry = new string_entry(key, payload);
121  d->stringlist->append(entry);
122 }
123 
124 void
125 KSycocaDict::remove(const QString &key)
126 {
127  if (!d || !d->stringlist) {
128  return;
129  }
130 
131  bool found = false;
132  for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
133  string_entry* entry = *it;
134  if (entry->keyStr == key) {
135  d->stringlist->erase(it);
136  delete entry;
137  found = true;
138  break;
139  }
140  }
141  if (!found) {
142  kWarning(7011) << "key not found:" << key;
143  }
144 }
145 
146 int KSycocaDict::find_string(const QString &key ) const
147 {
148  Q_ASSERT(d);
149 
150  //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key);
151  qint32 offset = d->offsetForKey(key);
152 
153  //kDebug(7011) << QString("offset is %1").arg(offset,8,16);
154  if (offset == 0)
155  return 0;
156 
157  if (offset > 0)
158  return offset; // Positive ID
159 
160  // Lookup duplicate list.
161  offset = -offset;
162 
163  d->stream->device()->seek(offset);
164  //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
165 
166  while(true)
167  {
168  (*d->stream) >> offset;
169  if (offset == 0) break;
170  QString dupkey;
171  (*d->stream) >> dupkey;
172  //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
173  if (dupkey == key) return offset;
174  }
175  //kWarning(7011) << "Not found!";
176 
177  return 0;
178 }
179 
180 
181 QList<int> KSycocaDict::findMultiString(const QString &key ) const
182 {
183  qint32 offset = d->offsetForKey(key);
184  QList<int> offsetList;
185  if (offset == 0)
186  return offsetList;
187 
188  if (offset > 0) { // Positive ID: one entry found
189  offsetList.append(offset);
190  return offsetList;
191  }
192 
193  // Lookup duplicate list.
194  offset = -offset;
195 
196  d->stream->device()->seek(offset);
197  //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
198 
199  while(true)
200  {
201  (*d->stream) >> offset;
202  if (offset == 0) break;
203  QString dupkey;
204  (*d->stream) >> dupkey;
205  //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
206  if (dupkey == key)
207  offsetList.append(offset);
208  }
209  return offsetList;
210 }
211 
212 uint KSycocaDict::count() const
213 {
214  if ( !d || !d->stringlist ) return 0;
215 
216  return d->stringlist->count();
217 }
218 
219 void
220 KSycocaDict::clear()
221 {
222  delete d;
223  d = 0;
224 }
225 
226 uint KSycocaDict::Private::hashKey( const QString &key) const
227 {
228  int len = key.length();
229  uint h = 0;
230 
231  for(int i = 0; i < hashList.count(); i++)
232  {
233  int pos = hashList[i];
234  if (pos == 0) {
235  continue;
236  } else if (pos < 0) {
237  pos = -pos;
238  if (pos < len)
239  h = ((h * 13) + (key[len-pos].cell() % 29)) & 0x3ffffff;
240  } else {
241  pos = pos-1;
242  if (pos < len)
243  h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
244  }
245  }
246  return h;
247 }
248 
249 // If we have the strings
250 // hello
251 // world
252 // kde
253 // Then we end up with
254 // ABCDE
255 // where A = diversity of 'h' + 'w' + 'k' etc.
256 // Also, diversity(-2) == 'l'+'l'+'d' (second character from the end)
257 
258 // The hasList is used for hashing:
259 // hashList = (-2, 1, 3) means that the hash key comes from
260 // the 2nd character from the right, then the 1st from the left, then the 3rd from the left.
261 
262 // Calculate the diversity of the strings at position 'pos'
263 // NOTE: this code is slow, it takes 12% of the _overall_ `kbuildsycoca4 --noincremental` running time
264 static int
265 calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz)
266 {
267  if (inPos == 0) return 0;
268  QBitArray matrix(sz);
269  int pos;
270 
271  //static const int s_maxItems = 50;
272  //int numItem = 0;
273 
274  if (inPos < 0) {
275  pos = -inPos;
276  for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
277  {
278  string_entry* entry = *it;
279  int len = entry->length;
280  if (pos < len) {
281  uint hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3ffffff;
282  matrix.setBit( hash % sz, true );
283  }
284  //if (++numItem == s_maxItems)
285  // break;
286  }
287  } else {
288  pos = inPos-1;
289  for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
290  {
291  string_entry* entry = *it;
292  if (pos < entry->length) {
293  uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
294  matrix.setBit( hash % sz, true );
295  }
296  //if (++numItem == s_maxItems)
297  // break;
298  }
299  }
300  return matrix.count(true);
301 }
302 
303 //
304 // Add the diversity of the strings at position 'pos'
305 static void
306 addDiversity(KSycocaDictStringList *stringlist, int pos)
307 {
308  if (pos == 0) return;
309  if (pos < 0) {
310  pos = -pos;
311  for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
312  {
313  string_entry* entry = *it;
314  int len = entry->length;
315  if (pos < len)
316  entry->hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3fffffff;
317  }
318  } else {
319  pos = pos - 1;
320  for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
321  {
322  string_entry* entry = *it;
323  if (pos < entry->length)
324  entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
325  }
326  }
327 }
328 
329 
330 void
331 KSycocaDict::save(QDataStream &str)
332 {
333  if (count() == 0)
334  {
335  d->hashTableSize = 0;
336  d->hashList.clear();
337  str << d->hashTableSize;
338  str << d->hashList;
339  return;
340  }
341 
342  d->offset = str.device()->pos();
343 
344  //kDebug(7011) << "KSycocaDict:" << count() << "entries.";
345 
346  //kDebug(7011) << "Calculating hash keys..";
347 
348  int maxLength = 0;
349  //kDebug(7011) << "Finding maximum string length";
350  for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
351  {
352  string_entry* entry = *it;
353  entry->hash = 0;
354  if (entry->length > maxLength)
355  maxLength = entry->length;
356  }
357 
358  //kDebug(7011) << "Max string length=" << maxLength << "existing hashList=" << d->hashList;
359 
360  // use "almost prime" number for sz (to calculate diversity) and later
361  // for the table size of big tables
362  // int sz = d->stringlist->count()*5-1;
363  register unsigned int sz = count()*4 + 1;
364  while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
365  sz+=2;
366 
367  d->hashList.clear();
368 
369  // Times (with warm caches, i.e. after multiple runs)
370  // kbuildsycoca4 --noincremental 2.83s user 0.20s system 95% cpu 3.187 total
371  // kbuildsycoca4 --noincremental 2.74s user 0.25s system 93% cpu 3.205 total
372  // unittest: 0.50-60 msec per iteration / 0.40-50 msec per iteration
373 
374  // Now that MimeTypes are not parsed anymore:
375  // kbuildsycoca4 --noincremental 2.18s user 0.30s system 91% cpu 2.719 total
376  // kbuildsycoca4 --noincremental 2.07s user 0.34s system 89% cpu 2.681 total
377 
378  // If I enabled s_maxItems = 50, it goes down to
379  // but I don't know if that's a good idea.
380  // kbuildsycoca4 --noincremental 1.73s user 0.31s system 85% cpu 2.397 total
381  // kbuildsycoca4 --noincremental 1.84s user 0.29s system 95% cpu 2.230 total
382 
383  // try to limit diversity scan by "predicting" positions
384  // with high diversity
385  QVector<int> oldvec(maxLength*2+1);
386  oldvec.fill(0);
387  int mindiv=0;
388  int lastDiv = 0;
389 
390  while(true)
391  {
392  int divsum=0,divnum=0;
393 
394  int maxDiv = 0;
395  int maxPos = 0;
396  for (int pos = -maxLength; pos <= maxLength; ++pos) {
397  // cut off
398  if (oldvec[pos+maxLength] < mindiv) { oldvec[pos+maxLength]=0; continue; }
399 
400  const int diversity = calcDiversity(d->stringlist, pos, sz);
401  if (diversity > maxDiv) {
402  maxDiv = diversity;
403  maxPos = pos;
404  }
405  oldvec[pos + maxLength] = diversity;
406  divsum += diversity;
407  ++divnum;
408  }
409  // arbitrary cut-off value 3/4 of average seems to work
410  if (divnum)
411  mindiv=(3*divsum)/(4*divnum);
412 
413  if (maxDiv <= lastDiv)
414  break;
415  //kDebug() << "Max Div=" << maxDiv << "at pos" << maxPos;
416  lastDiv = maxDiv;
417  addDiversity(d->stringlist, maxPos);
418  d->hashList.append(maxPos);
419  }
420 
421 
422  for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
423  (*it)->hash = d->hashKey((*it)->keyStr);
424  }
425 // fprintf(stderr, "Calculating minimum table size..\n");
426 
427  d->hashTableSize = sz;
428 
429  //kDebug() << "hashTableSize=" << sz << "hashList=" << d->hashList << "oldvec=" << oldvec;
430 
431  struct hashtable_entry {
432  string_entry *entry;
433  QList<string_entry*>* duplicates;
434  qint64 duplicate_offset;
435  };
436 
437  hashtable_entry *hashTable = new hashtable_entry[ sz ];
438 
439  //kDebug(7011) << "Clearing hashtable...";
440  for (unsigned int i=0; i < sz; i++)
441  {
442  hashTable[i].entry = 0;
443  hashTable[i].duplicates = 0;
444  }
445 
446  //kDebug(7011) << "Filling hashtable...";
447  for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
448  {
449  string_entry* entry = *it;
450  //kDebug(7011) << "entry keyStr=" << entry->keyStr << entry->payload.data() << entry->payload->entryPath();
451  int hash = entry->hash % sz;
452  if (!hashTable[hash].entry)
453  { // First entry
454  hashTable[hash].entry = entry;
455  }
456  else
457  {
458  if (!hashTable[hash].duplicates)
459  { // Second entry, build duplicate list.
460  hashTable[hash].duplicates = new QList<string_entry*>;
461  hashTable[hash].duplicates->append(hashTable[hash].entry);
462  hashTable[hash].duplicate_offset = 0;
463  }
464  hashTable[hash].duplicates->append(entry);
465  }
466  }
467 
468  str << d->hashTableSize;
469  str << d->hashList;
470 
471  d->offset = str.device()->pos(); // d->offset points to start of hashTable
472  //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16);
473 
474  // Write the hashtable + the duplicates twice.
475  // The duplicates are after the normal hashtable, but the offset of each
476  // duplicate entry is written into the normal hashtable.
477  for(int pass = 1; pass <= 2; pass++)
478  {
479  str.device()->seek(d->offset);
480  //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass);
481  for(uint i=0; i < d->hashTableSize; i++)
482  {
483  qint32 tmpid;
484  if (!hashTable[i].entry)
485  tmpid = (qint32) 0;
486  else if (!hashTable[i].duplicates)
487  tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID
488  else
489  tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID
490  str << tmpid;
491  //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16);
492  }
493  //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16);
494 
495  //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass);
496  for(uint i=0; i < d->hashTableSize; i++)
497  {
498  const QList<string_entry*> *dups = hashTable[i].duplicates;
499  if (dups)
500  {
501  hashTable[i].duplicate_offset = str.device()->pos();
502 
503  /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count());
504 */
505  for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
506  {
507  const qint32 offset = (*dup)->payload->offset();
508  if (!offset) {
509  const QString storageId = (*dup)->payload->storageId();
510  kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data();
511  if ((*dup)->payload->isType(KST_KService)) {
512  KService::Ptr service = KService::Ptr::staticCast((*dup)->payload);
513  kDebug() << service->storageId() << service->entryPath();
514  }
515  // save() must have been called on the entry
516  Q_ASSERT_X( offset, "KSycocaDict::save",
517  QByteArray("entry offset is 0, save() was not called on "
518  + (*dup)->payload->storageId().toLatin1()
519  + " entryPath="
520  + (*dup)->payload->entryPath().toLatin1())
521  );
522  }
523  str << offset ; // Positive ID
524  str << (*dup)->keyStr; // Key (QString)
525  }
526  str << (qint32) 0; // End of list marker (0)
527  }
528  }
529  //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16);
530  }
531 
532  //kDebug(7011) << "Cleaning up hash table.";
533  for(uint i=0; i < d->hashTableSize; i++)
534  {
535  delete hashTable[i].duplicates;
536  }
537  delete [] hashTable;
538 }
539 
540 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
541 {
542  if ( !stream || !offset )
543  {
544  kError() << "No ksycoca4 database available!" << endl;
545  return 0;
546  }
547 
548  if (hashTableSize == 0)
549  return 0; // Unlikely to find anything :-]
550 
551  // Read hash-table data
552  const uint hash = hashKey(key) % hashTableSize;
553  //kDebug(7011) << "hash is" << hash;
554 
555  const qint32 off = offset+sizeof(qint32)*hash;
556  //kDebug(7011) << QString("off is %1").arg(off,8,16);
557  stream->device()->seek( off );
558 
559  qint32 retOffset;
560  (*stream) >> retOffset;
561  return retOffset;
562 }
KSharedPtr
Can be used to control the lifetime of an object that has derived QSharedData.
Definition: kconfiggroup.h:38
qint64
QBitArray::count
int count() const
KSycocaDict::add
void add(const QString &key, const KSycocaEntry::Ptr &payload)
Adds a 'payload' to the dictionary with key 'key'.
Definition: ksycocadict.cpp:111
KSycocaDict::remove
void remove(const QString &key)
Removes the 'payload' from the dictionary with key 'key'.
Definition: ksycocadict.cpp:125
kdebug.h
QIODevice::seek
virtual bool seek(qint64 pos)
QByteArray
QDataStream
QBitArray::setBit
void setBit(int i)
QChar
QVector::fill
QVector< T > & fill(const T &value, int size)
kError
static QDebug kError(bool cond, int area=KDE_DEFAULT_DEBUG_AREA)
Definition: kdebug.h:187
KSycocaDict::findMultiString
QList< int > findMultiString(const QString &key) const
Looks up all entries identified by 'key'.
Definition: ksycocadict.cpp:181
quint32
QIODevice::pos
virtual qint64 pos() const
KST_KService
Definition: ksycocatype.h:31
KSycocaDict::save
void save(QDataStream &str)
Save the dictionary to the stream A reasonable fast hash algorithm will be created.
Definition: ksycocadict.cpp:331
KSycocaDict::find_string
int find_string(const QString &key) const
Looks up an entry identified by 'key'.
Definition: ksycocadict.cpp:146
ksycocadict_p.h
QList::append
void append(const T &value)
KSycocaDict::~KSycocaDict
~KSycocaDict()
Definition: ksycocadict.cpp:105
KSycocaEntry::entryPath
QString entryPath() const
Definition: ksycocaentry.cpp:104
QString::isEmpty
bool isEmpty() const
addDiversity
static void addDiversity(KSycocaDictStringList *stringlist, int pos)
Definition: ksycocadict.cpp:306
QString
QList
Definition: kaboutdata.h:33
KSycoca::flagError
static void flagError()
A read error occurs.
Definition: ksycoca.cpp:559
QBitArray
kservice.h
ksycocaentry.h
QList::end
iterator end()
calcDiversity
static int calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz)
Definition: ksycocadict.cpp:265
kWarning
#define kWarning
Definition: kdebug.h:322
KSycocaDict::clear
void clear()
Reset the dictionary.
Definition: ksycocadict.cpp:220
ksycoca.h
QVector
KSharedPtr< KService >::staticCast
static KSharedPtr< KService > staticCast(const KSharedPtr< U > &o)
Convert KSharedPtr to KSharedPtr, using a static_cast.
Definition: ksharedptr.h:173
KService::storageId
QString storageId() const
Returns a normalized ID suitable for storing in configuration files.
Definition: kservice.cpp:783
QString::length
int length() const
KSycocaDict::count
uint count() const
The number of entries in the dictionary.
Definition: ksycocadict.cpp:212
qint32
kDebug
#define kDebug
Definition: kdebug.h:316
QDataStream::device
QIODevice * device() const
KSycocaDict::KSycocaDict
KSycocaDict()
Create an empty dict, for building the database.
Definition: ksycocadict.cpp:77
QString::data
QChar * data()
QList::begin
iterator begin()
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:22:11 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal