• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

Nepomuk-Core

  • sources
  • kde-4.12
  • kdelibs
  • nepomuk-core
  • services
  • fileindexer
indexcleaner.cpp
Go to the documentation of this file.
1 /*
2  This file is part of the Nepomuk KDE project.
3  Copyright (C) 2010-2011 Sebastian Trueg <trueg@kde.org>
4  Copyright (C) 2010-2013 Vishesh Handa <handa.vish@gmail.com>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Lesser General Public
8  License as published by the Free Software Foundation; either
9  version 2.1 of the License, or (at your option) version 3, or any
10  later version accepted by the membership of KDE e.V. (or its
11  successor approved by the membership of KDE e.V.), which shall
12  act as a proxy defined in Section 6 of version 3 of the license.
13 
14  This library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public
20  License along with this library. If not, see <http://www.gnu.org/licenses/>.
21 */
22 
23 #include "indexcleaner.h"
24 #include "fileindexerconfig.h"
25 #include "util.h"
26 
27 #include <QtCore/QTimer>
28 #include <QtCore/QMutexLocker>
29 #include <QtCore/QSet>
30 #include <KDebug>
31 #include <KConfigGroup>
32 
33 #include "resource.h"
34 #include "resourcemanager.h"
35 
36 #include <Soprano/Model>
37 #include <Soprano/QueryResultIterator>
38 #include <Soprano/NodeIterator>
39 #include <Soprano/Node>
40 
41 #include <Soprano/Vocabulary/RDF>
42 #include <Soprano/Vocabulary/Xesam>
43 #include <Soprano/Vocabulary/NAO>
44 #include "nfo.h"
45 #include "nie.h"
46 
47 using namespace Nepomuk2::Vocabulary;
48 using namespace Soprano::Vocabulary;
49 
50 namespace {
57  QString constructExcludeIncludeFoldersFilter(const QStringList& folders)
58  {
59  QStringList filters;
60  QStringList used;
61  foreach( const QString& folder, folders ) {
62  if(!used.contains(folder)) {
63  used << folder;
64  filters << QString::fromLatin1( "(?url!=%1)" ).arg( Soprano::Node::resourceToN3( KUrl( folder ) ) );
65  }
66  }
67  return filters.join( QLatin1String( " && " ) );
68  }
69 }
70 
71 
72 Nepomuk2::IndexCleaner::IndexCleaner(QObject* parent)
73  : KJob(parent),
74  m_suspended(false),
75  m_delay(0)
76 {
77  setCapabilities( Suspendable );
78 }
79 
80 
81 void Nepomuk2::IndexCleaner::start()
82 {
83  kDebug() << "CLEANING!!";
84  const QString folderFilter = constructExcludeFolderFilter(Nepomuk2::FileIndexerConfig::self());
85 
86  const int limit = 20;
87 
88  //
89  // Create all queries that return indexed data which should not be there anymore.
90  //
91 
92  //
93  // Query the nepomukindexer app resource in order to speed up the queries.
94  //
95  QUrl appRes;
96  Soprano::Model* model = ResourceManager::instance()->mainModel();
97  QString appQuery = QString::fromLatin1("select ?app where { ?app nao:identifier %1 . } LIMIT 1")
98  .arg( Soprano::Node::literalToN3(QLatin1String("nepomukindexer")) );
99 
100  Soprano::QueryResultIterator appIt
101  = model->executeQuery( appQuery, Soprano::Query::QueryLanguageSparql);
102  if(appIt.next()) {
103  appRes = appIt[0].uri();
104  }
105 
106  //
107  // 1. Data that has been created in KDE >= 4.7 using the DMS
108  //
109  if(!appRes.isEmpty()) {
110  m_removalQueries << QString::fromLatin1( "select distinct ?r where { "
111  "graph ?g { ?r nie:url ?url . } . "
112  "?g nao:maintainedBy %1 . "
113  " %2 } LIMIT %3" )
114  .arg( Soprano::Node::resourceToN3( appRes ),
115  folderFilter,
116  QString::number( limit ) );
117  }
118 
119 
120  //
121  // 2. Build filter query for all exclude filters
122  //
123  const QString fileFilters = constructExcludeFiltersFilenameFilter(Nepomuk2::FileIndexerConfig::self());
124  const QString includeExcludeFilters = constructExcludeIncludeFoldersFilter(Nepomuk2::FileIndexerConfig::self()->includeFolders());
125 
126  QString filters;
127  if( !includeExcludeFilters.isEmpty() && !fileFilters.isEmpty() )
128  filters = QString::fromLatin1("FILTER((%1) && (%2)) .").arg( includeExcludeFilters, fileFilters );
129  else if( !fileFilters.isEmpty() )
130  filters = QString::fromLatin1("FILTER(%1) .").arg( fileFilters );
131  else if( !includeExcludeFilters.isEmpty() )
132  filters = QString::fromLatin1("FILTER(%1) .").arg( includeExcludeFilters );
133 
134  if(!filters.isEmpty()) {
135  // 2.1. Data for files which are excluded through filters
136  if(!appRes.isEmpty()) {
137  m_removalQueries << QString::fromLatin1( "select distinct ?r where { "
138  "graph ?g { ?r nie:url ?url . } . "
139  "?r nfo:fileName ?fn . "
140  "?g nao:maintainedBy %1 . "
141  "FILTER(REGEX(STR(?url),\"^file:/\")) . "
142  "%2 } LIMIT %3" )
143  .arg( Soprano::Node::resourceToN3( appRes ),
144  filters )
145  .arg(limit);
146  }
147  }
148 
149  // 2.2. Data for files which have paths that are excluded through exclude filters
150  const QString excludeFiltersFolderFilter = constructExcludeFiltersFolderFilter(Nepomuk2::FileIndexerConfig::self());
151  if(!excludeFiltersFolderFilter.isEmpty()) {
152  m_removalQueries << QString::fromLatin1( "select distinct ?r where { "
153  "graph ?g { ?r nie:url ?url . } . "
154  "?g nao:maintainedBy %1 . "
155  "FILTER(REGEX(STR(?url),\"^file:/\") && %2) . "
156  "} LIMIT %3" )
157  .arg( Soprano::Node::resourceToN3( appRes ),
158  excludeFiltersFolderFilter )
159  .arg(limit);
160  }
161 
162  //
163  // Start the removal
164  //
165  m_query = m_removalQueries.dequeue();
166  if( !m_suspended ) {
167  QTimer::singleShot(m_delay, this, SLOT(clearNextBatch()));
168  }
169 }
170 
171 void Nepomuk2::IndexCleaner::slotRemoveResourcesDone(KJob* job)
172 {
173  if( job->error() ) {
174  kDebug() << job->errorString();
175  }
176 
177  QMutexLocker lock(&m_stateMutex);
178  if( !m_suspended ) {
179  QTimer::singleShot(m_delay, this, SLOT(clearNextBatch()));
180  }
181 }
182 
183 void Nepomuk2::IndexCleaner::clearNextBatch()
184 {
185  QList<QUrl> resources;
186  Soprano::QueryResultIterator it
187  = ResourceManager::instance()->mainModel()->executeQuery( m_query, Soprano::Query::QueryLanguageSparqlNoInference );
188  while( it.next() ) {
189  resources << it[0].uri();
190  }
191 
192  if( !resources.isEmpty() ) {
193  kDebug() << m_query;
194  kDebug() << resources;
195  KJob* job = Nepomuk2::clearIndexedData(resources);
196  connect( job, SIGNAL(finished(KJob*)), this, SLOT(slotRemoveResourcesDone(KJob*)), Qt::QueuedConnection );
197  }
198 
199  else if( !m_removalQueries.isEmpty() ) {
200  m_query = m_removalQueries.dequeue();
201  QTimer::singleShot(m_delay, this, SLOT(clearNextBatch()));
202  }
203 
204  else {
205  emitResult();
206  }
207 }
208 
209 bool Nepomuk2::IndexCleaner::doSuspend()
210 {
211  QMutexLocker locker(&m_stateMutex);
212  m_suspended = true;
213  return true;
214 }
215 
216 bool Nepomuk2::IndexCleaner::doResume()
217 {
218  QMutexLocker locker(&m_stateMutex);
219  if(m_suspended) {
220  m_suspended = false;
221  QTimer::singleShot( 0, this, SLOT(clearNextBatch()) );
222  }
223  return true;
224 }
225 
226 void Nepomuk2::IndexCleaner::setDelay(int msecs)
227 {
228  m_delay = msecs;
229 }
230 
231 
232 namespace {
233  QString constructFolderSubFilter( const QList<QPair<QString, bool> > folders, int& index )
234  {
235  QString path = folders[index].first;
236  if ( !path.endsWith( '/' ) )
237  path += '/';
238  const bool include = folders[index].second;
239 
240  ++index;
241 
242  QStringList subFilters;
243  while ( index < folders.count() &&
244  folders[index].first.startsWith( path ) ) {
245  subFilters << constructFolderSubFilter( folders, index );
246  }
247 
248  QString str = QString::fromLatin1( KUrl(path).toEncoded() );
249  str.replace( '\'', QLatin1String("\\'") );
250  QString thisFilter = QString::fromLatin1( "REGEX(STR(?url),'^%1')" ).arg( str );
251 
252  // we want all folders that should NOT be indexed
253  if ( include ) {
254  thisFilter.prepend( '!' );
255  }
256  subFilters.prepend( thisFilter );
257 
258  if ( subFilters.count() > 1 ) {
259  return '(' + subFilters.join( include ? QLatin1String( " || " ) : QLatin1String( " && " ) ) + ')';
260  }
261  else {
262  return subFilters.first();
263  }
264  }
265 
270  bool alreadyIncluded( const QList<QPair<QString, bool> >& folders, const QString& f )
271  {
272  bool included = false;
273  for ( int i = 0; i < folders.count(); ++i ) {
274  if ( f != folders[i].first &&
275  f.startsWith( KUrl( folders[i].first ).path( KUrl::AddTrailingSlash ) ) ) {
276  included = folders[i].second;
277  }
278  }
279  return included;
280  }
281 
286  void cleanupList( QList<QPair<QString, bool> >& result )
287  {
288  int i = 0;
289  while ( i < result.count() ) {
290  if ( result[i].first.isEmpty() ||
291  (result[i].second &&
292  alreadyIncluded( result, result[i].first ) ))
293  result.removeAt( i );
294  else
295  ++i;
296  }
297  }
298 }
299 
300 // static
301 QString Nepomuk2::IndexCleaner::constructExcludeFolderFilter(FileIndexerConfig *cfg)
302 {
303  //
304  // This filter consists of two parts:
305  // 1. A set of filter terms which exlude the actual include folders themselves from being removed
306  // 2. A set of filter terms which is a recursive sequence of inclusion and exclusion
307  //
308  QStringList subFilters( constructExcludeIncludeFoldersFilter(cfg->includeFolders()) );
309 
310  // now add the actual filters
311  QList<QPair<QString, bool> > folders = cfg->folders();
312  cleanupList(folders);
313  int index = 0;
314  while ( index < folders.count() ) {
315  subFilters << constructFolderSubFilter( folders, index );
316  }
317  QString filters = subFilters.join(" && ");
318  if( !filters.isEmpty() )
319  return QString::fromLatin1("FILTER(%1) .").arg(filters);
320 
321  return QString();
322 }
323 
324 
325 namespace {
326  QString excludeFilterToSparqlRegex(const QString& filter) {
327  QString filterRxStr = QRegExp::escape( filter );
328  filterRxStr.replace( "\\*", QLatin1String( ".*" ) );
329  filterRxStr.replace( "\\?", QLatin1String( "." ) );
330  filterRxStr.replace( '\\',"\\\\" );
331  return filterRxStr;
332  }
333 }
334 
335 // static
336 QString Nepomuk2::IndexCleaner::constructExcludeFiltersFilenameFilter(Nepomuk2::FileIndexerConfig *cfg)
337 {
338  //
339  // This is stright-forward: we convert the filters into SPARQL regex syntax
340  // and then combine them with the || operator.
341  //
342  QStringList fileFilters;
343  foreach( const QString& filter, cfg->excludeFilters() ) {
344  fileFilters << QString::fromLatin1( "REGEX(STR(?fn),\"^%1$\")" ).arg( excludeFilterToSparqlRegex(filter) );
345  }
346  return fileFilters.join(QLatin1String(" || "));
347 }
348 
349 
350 // static
351 QString Nepomuk2::IndexCleaner::constructExcludeFiltersFolderFilter(Nepomuk2::FileIndexerConfig *cfg)
352 {
353  //
354  // In order to find the entries which we should remove based on matching exclude filters in path
355  // components we need to consider two things:
356  // 1. For each exclude filter find entries which contain "/FILTER/" in their URL. The ones which
357  // have it in their file name are already matched in constructExcludeFiltersFilenameFilter.
358  // 2. If there are include folders which have a path component matching one of the exclude filters
359  // we need to add additional filter terms to make sure we do not remove any of the files in them.
360  // 2.1. The exception are URLs that have a path component which matches one of the exclude filters
361  // in the path relative to the include folder.
362  //
363 
364  // build our own cache of the exclude filters
365  const QStringList excludeFilters = cfg->excludeFilters();
366  RegExpCache excludeFilterCache;
367  excludeFilterCache.rebuildCacheFromFilterList(excludeFilters);
368  QList<QRegExp> excludeRegExps = excludeFilterCache.regExps();
369 
370  //
371  // Find all the include folders that have a path component which should normally be excluded through
372  // the exclude filters.
373  // We create a mapping from exclude filter to the include folders in question.
374  //
375  QMultiHash<QString, QString> includeFolders;
376  foreach(const QString& folder, cfg->includeFolders()) {
377  const QStringList components = folder.split('/', QString::SkipEmptyParts);
378  foreach(const QString& c, components) {
379  for(int i = 0; i < excludeRegExps.count(); ++i) {
380  if(excludeRegExps[i].exactMatch(c)) {
381  includeFolders.insert(excludeRegExps[i].pattern(), folder);
382  }
383  }
384  }
385  }
386 
387  //
388  // Build the SPARQL filters that match the urls to remove
389  //
390  QStringList urlFilters;
391  foreach( const QString& filter, excludeFilters ) {
392  QStringList terms;
393 
394  // 1. Create the basic filter term to get all urls that match the exclude filter
395  terms << QString::fromLatin1( "REGEX(STR(?url),'/%1/')" ).arg( excludeFilterToSparqlRegex(filter) );
396 
397  // 2. Create special cases for all include folders that have a matching path component
398  // (the "10000" is just some random value which should make sure we get all the urls)
399  foreach(const QString folder, includeFolders.values(filter)) {
400  const QString encodedUrl = QString::fromAscii( KUrl( folder ).toEncoded() );
401  terms << QString::fromLatin1("(!REGEX(STR(?url),'^%1/') || REGEX(bif:substring(STR(?url),%2,10000),'/%3/'))")
402  .arg(encodedUrl)
403  .arg(encodedUrl.length()+1)
404  .arg(excludeFilterToSparqlRegex(filter));
405  }
406 
407  // 3. Put all together
408  urlFilters << QLatin1String("(") + terms.join(QLatin1String(" && ")) + QLatin1String(")");
409  }
410 
411  //
412  // Combine the generated filter terms with the typical include folder exclusion filter which makes
413  // sure that we do not remove the include folders themselves.
414  //
415  if(!urlFilters.isEmpty()) {
416  QString filter;
417  if(!includeFolders.values().isEmpty()) {
418  filter += constructExcludeIncludeFoldersFilter(includeFolders.values())
419  += QLatin1String(" && ");
420  }
421  filter += QLatin1String("(") + urlFilters.join(QLatin1String(" || ")) + QLatin1String(")");
422  return filter;
423  }
424  else {
425  return QString();
426  }
427 }
428 
429 #include "indexcleaner.moc"
fileindexerconfig.h
QMultiHash
Nepomuk2::IndexCleaner::IndexCleaner
IndexCleaner(QObject *parent=0)
Definition: indexcleaner.cpp:72
Nepomuk2::IndexCleaner::constructExcludeFiltersFolderFilter
static QString constructExcludeFiltersFolderFilter(Nepomuk2::FileIndexerConfig *cfg)
Construct a SPARQL filter which matches all file URLs (variable ?url) that should not be indexed acco...
Definition: indexcleaner.cpp:351
RegExpCache::rebuildCacheFromFilterList
void rebuildCacheFromFilterList(const QStringList &filters)
Definition: regexpcache.cpp:60
Nepomuk2::FileIndexerConfig::excludeFilters
QStringList excludeFilters() const
Definition: fileindexerconfig.cpp:116
util.h
indexcleaner.h
Nepomuk2::FileIndexerConfig::includeFolders
QStringList includeFolders() const
The folders to search for files to analyze.
Definition: fileindexerconfig.cpp:94
Nepomuk2::FileIndexerConfig
Active config class which emits signals if the config was changed, for example if the KCM saved the c...
Definition: fileindexerconfig.h:38
QObject
Nepomuk2::IndexCleaner::start
virtual void start()
Definition: indexcleaner.cpp:81
Nepomuk2::IndexCleaner::constructExcludeFolderFilter
static QString constructExcludeFolderFilter(Nepomuk2::FileIndexerConfig *cfg)
Construct a SPARQL filter which matches all URLs (variable ?url) that should not be indexed according...
Definition: indexcleaner.cpp:301
Nepomuk2::clearIndexedData
KJob * clearIndexedData(const QUrl &url)
remove all indexed data for url the datamanagement way
Definition: util.cpp:42
Nepomuk2::FileIndexerConfig::folders
QList< QPair< QString, bool > > folders() const
A cleaned up list of all include and exclude folders with their respective include/exclude flag sorte...
Definition: fileindexerconfig.cpp:88
Nepomuk2::IndexCleaner::setDelay
void setDelay(int msecs)
Set the delay between the cleanup queries.
Definition: indexcleaner.cpp:226
resource.h
RegExpCache
Definition: regexpcache.h:30
Nepomuk2::ResourceManager::instance
static ResourceManager * instance()
Definition: resourcemanager.cpp:270
resourcemanager.h
Nepomuk2::IndexCleaner::doSuspend
virtual bool doSuspend()
Definition: indexcleaner.cpp:209
Nepomuk2::IndexCleaner::constructExcludeFiltersFilenameFilter
static QString constructExcludeFiltersFilenameFilter(Nepomuk2::FileIndexerConfig *cfg)
Construct a SPARQL filter which matches all filenames (variable ?fn) that match one of the exclude fi...
Definition: indexcleaner.cpp:336
Nepomuk2::FileIndexerConfig::self
static FileIndexerConfig * self()
Get the first created instance of FileIndexerConfig.
Definition: fileindexerconfig.cpp:82
Nepomuk2::ResourceManager::mainModel
Soprano::Model * mainModel()
Retrieve the main data storage model.
Definition: resourcemanager.cpp:363
Nepomuk2::IndexCleaner::doResume
virtual bool doResume()
Definition: indexcleaner.cpp:216
KJob
RegExpCache::regExps
QList< QRegExp > regExps() const
Definition: regexpcache.h:41
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:48:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Nepomuk-Core

Skip menu "Nepomuk-Core"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal