• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

Nepomuk-Core

  • sources
  • kde-4.12
  • kdelibs
  • nepomuk-core
  • cleaner
cleaningjobs.cpp
Go to the documentation of this file.
1 /*
2  <one line to give the library's name and an idea of what it does.>
3  Copyright (C) 2012 Vishesh Handa <me@vhanda.in>
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, write to the Free Software
17  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19 
20 
21 #include "cleaningjob.h"
22 #include "cleaningjobs.h"
23 #include "resource.h"
24 #include "variant.h"
25 #include "resourcemanager.h"
26 #include "datamanagement.h"
27 #include "createresourcejob.h"
28 #include "property.h"
29 #include "literal.h"
30 
31 #include <QtCore/QTimer>
32 #include <QtCore/QFile>
33 
34 #include <Soprano/Model>
35 #include <Soprano/QueryResultIterator>
36 #include <Soprano/Vocabulary/NAO>
37 
38 #include "nie.h"
39 #include "nfo.h"
40 #include "nco.h"
41 
42 #include <KDebug>
43 #include <KService>
44 #include <KServiceTypeTrader>
45 
46 using namespace Nepomuk2;
47 using namespace Nepomuk2::Vocabulary;
48 using namespace Soprano::Vocabulary;
49 
50 
51 
52 //
53 // Crappy Inference Data
54 //
55 
56 class CrappyInferenceData : public CleaningJob {
57 public:
58  QString jobName() {
59  return i18n("Removing legacy data");
60  }
61 private:
62  void execute();
63 };
64 
65 void CrappyInferenceData::execute()
66 {
67  Soprano::Model* model = Nepomuk2::ResourceManager::instance()->mainModel();
68 
69  model->removeContext(QUrl::fromEncoded("urn:crappyinference:inferredtriples"));
70  model->removeContext(QUrl::fromEncoded("urn:crappyinference2:inferredtriples"));
71  model->removeContext(QUrl::fromEncoded("nepomuk:/ctx/typevisibility"));
72 }
73 
74 //
75 // Tags
76 //
77 
78 class EmptyTagCleaner : public CleaningJob {
79 public:
80  QString jobName() {
81  return i18n("Removing empty tags");
82  }
83 private:
84  void execute();
85 };
86 
87 void EmptyTagCleaner::execute()
88 {
89  QString query = QString::fromLatin1("select ?r where { ?r a %1 . FILTER NOT EXISTS { ?r %2 ?i . } }")
90  .arg( Soprano::Node::resourceToN3( NAO::Tag() ),
91  Soprano::Node::resourceToN3( NAO::identifier() ) );
92 
93  Soprano::Model* model = Nepomuk2::ResourceManager::instance()->mainModel();
94  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
95  QList<QUrl> deleteList;
96  while( it.next() )
97  deleteList << it[0].uri();
98 
99  if( !deleteList.isEmpty() && !shouldQuit() ) {
100  KJob* job = Nepomuk2::removeResources( deleteList );
101  job->exec();
102  }
103 }
104 
105 //
106 // Duplicates
107 //
108 
109 class DuplicateMergingJob : public CleaningJob {
110 public:
111  explicit DuplicateMergingJob(const QUrl& type, const QUrl& prop, QObject* parent = 0);
112 
113 private:
114  void execute();
115  QUrl m_type;
116  QUrl m_prop;
117 };
118 
119 DuplicateMergingJob::DuplicateMergingJob(const QUrl& type, const QUrl& prop, QObject* parent)
120  : CleaningJob(parent)
121  , m_type(type)
122  , m_prop(prop)
123 {
124 }
125 
126 void DuplicateMergingJob::execute()
127 {
128  QString query = QString::fromLatin1("select distinct ?i where { ?r a %1 . ?r %2 ?i . }")
129  .arg( Soprano::Node::resourceToN3( m_type ),
130  Soprano::Node::resourceToN3( m_prop ) );
131 
132  Soprano::Model* model = Nepomuk2::ResourceManager::instance()->mainModel();
133  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
134  while( it.next() && !shouldQuit() ) {
135  QString query = QString::fromLatin1("select distinct ?r where { ?r a %1 . ?r %2 %3 . }")
136  .arg( Soprano::Node::resourceToN3( m_type ),
137  Soprano::Node::resourceToN3( m_prop ),
138  it[0].toN3() );
139  Soprano::QueryResultIterator iter = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
140 
141  QList<QUrl> resourcesToMerge;
142  while( iter.next() )
143  resourcesToMerge << iter[0].uri();
144 
145  if( resourcesToMerge.size() <= 1 )
146  continue;
147 
148  if( resourcesToMerge.size() > 10 && !shouldQuit() ) {
149  // Splice the first 10 elements
150  QList<QUrl> list = resourcesToMerge.mid( 0, 10 );
151  resourcesToMerge = resourcesToMerge.mid( 10 );
152 
153  KJob* job = Nepomuk2::mergeResources( list );
154  job->exec();
155  if( job->error() )
156  kError() << job->errorString();
157  }
158  }
159 }
160 
161 
162 class DuplicateTagCleaner : public DuplicateMergingJob {
163 public:
164  explicit DuplicateTagCleaner(QObject* parent = 0)
165  : DuplicateMergingJob(NAO::Tag(), NAO::identifier(), parent) {}
166 
167  QString jobName() {
168  return i18n("Merging duplicate tags");
169  }
170 };
171 
172 
173 class DuplicateFileCleaner : public DuplicateMergingJob {
174 public:
175  explicit DuplicateFileCleaner(QObject* parent = 0)
176  : DuplicateMergingJob(NFO::FileDataObject(), NIE::url(), parent) {}
177 
178  QString jobName() {
179  return i18n("Merging duplicate file metadata");
180  }
181 };
182 
183 class DuplicateIconCleaner : public DuplicateMergingJob {
184 public:
185  explicit DuplicateIconCleaner(QObject* parent = 0)
186  : DuplicateMergingJob(NAO::FreeDesktopIcon(), NAO::iconName(), parent) {}
187 
188  QString jobName() {
189  return i18n("Merging duplicate icons");
190  }
191 };
192 
193 //
194 // Akonadi
195 //
196 
197 class AkonadiMigrationJob : public CleaningJob {
198 public:
199  explicit AkonadiMigrationJob(QObject* parent = 0)
200  : CleaningJob(parent) {}
201 
202  QString jobName() {
203  return i18n("Porting legacy Akonadi data");
204  }
205 
206 private:
207  void execute();
208 };
209 
210 void AkonadiMigrationJob::execute()
211 {
212  const QUrl akonadiDataObject("http://akonadi-project.org/ontologies/aneo#AkonadiDataObject");
213 
214  QLatin1String query("select distinct ?r where { ?r ?p ?o. FILTER(REGEX(STR(?r), '^akonadi')). }");
215 
216  Soprano::Model* model = Nepomuk2::ResourceManager::instance()->mainModel();
217  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
218  while( it.next() && !shouldQuit() ) {
219  // FIXME: What about the agent?
220  Nepomuk2::CreateResourceJob* cjob = Nepomuk2::createResource( QList<QUrl>() << akonadiDataObject, QString(), QString() );
221  cjob->exec();
222  if( cjob->error() ) {
223  kDebug() << cjob->errorString();
224  return;
225  }
226 
227  kDebug() << cjob->resourceUri() << " " << it[0].uri();
228  KJob* job = Nepomuk2::mergeResources( cjob->resourceUri(), it[0].uri() );
229  job->exec();
230  if( job->error() ) {
231  kDebug() << job->errorString();
232  return;
233  }
234 
235  job = Nepomuk2::setProperty( QList<QUrl>() << cjob->resourceUri(), NIE::url(), QVariantList() << it[0].uri() );
236  job->exec();
237  if( job->error() ) {
238  kDebug() << job->errorString();
239  return;
240  }
241  }
242 }
243 
244 //
245 // Duplicate contacts
246 //
247 
248 class DuplicateContactJob : public CleaningJob {
249 public:
250  explicit DuplicateContactJob(QObject* parent = 0)
251  : CleaningJob(parent) {}
252 
253  virtual QString jobName() {
254  return i18n("Merging duplicate contacts");
255  }
256 private:
257  virtual void execute();
258 };
259 
260 void DuplicateContactJob::execute()
261 {
262  QLatin1String contactQuery("select distinct ?fn where { ?r a nco:Contact ; nco:fullname ?fn . }");
263 
264  Soprano::Model *model = Nepomuk2::ResourceManager::instance()->mainModel();
265  Soprano::QueryResultIterator it = model->executeQuery( contactQuery, Soprano::Query::QueryLanguageSparql );
266  while( it.next() && !shouldQuit() ) {
267  const QString name( it[0].literal().toString() );
268  // This happens at times, it's weird
269  if( name.isEmpty() )
270  continue;
271 
272  kDebug() << "Looking for " << name;
273  // Get all the contacts with the same first name
274  QString query = QString::fromLatin1("select ?r where { ?r a nco:Contact ; nco:fullname %1 . }")
275  .arg( Soprano::Node::literalToN3( name ) );
276 
277  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
278  QSet<QUrl> contactToMerge;
279  while( it.next() && !shouldQuit() ) {
280  const QUrl resUri = it[0].uri();
281  QString propQuery = QString::fromLatin1("select ?p ?o where { %1 ?p ?o . }")
282  .arg( Soprano::Node::resourceToN3( resUri ) );
283  Soprano::QueryResultIterator qit = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
284  bool isCandiate = true;
285 
286  while( qit.next() && !shouldQuit() ) {
287  const QUrl prop = qit[0].uri();
288 
289  // Ignore meta properties
290  if( prop == NAO::lastModified() || prop == NAO::created() )
291  continue;
292 
293  if( prop == NCO::fullname() )
294  continue;
295 
296  // All other non-resource range properites indicate a failure
297  if( Nepomuk2::Types::Property(prop).literalRangeType().isValid() ) {
298  isCandiate = false;
299  break;
300  }
301  }
302 
303  if( isCandiate ) {
304  contactToMerge << resUri;
305  }
306  }
307 
308 
309  // Merge all the candidates
310  if( contactToMerge.size() > 1 && !shouldQuit() ) {
311  kDebug() << "Merging " << contactToMerge.size() << " contacts for " << name;
312  KJob* job = Nepomuk2::mergeResources( contactToMerge.toList() );
313  job->exec();
314  if( job->error() )
315  kError() << job->errorString();
316  }
317  }
318 }
319 
320 //
321 // Duplicate graphs
322 //
323 
324 class DuplicateStatementJob : public CleaningJob {
325 public:
326  explicit DuplicateStatementJob(QObject* parent = 0)
327  : CleaningJob(parent) {}
328 
329  QString jobName() {
330  return i18n("Removing duplicate metadata");
331  }
332 
333 private:
334  void execute();
335 };
336 
337 void DuplicateStatementJob::execute()
338 {
339  QLatin1String query("select ?r ?p ?o where { graph ?g1 { ?r ?p ?o. } "
340  " graph ?g2 { ?r ?p ?o. } FILTER( ?g1 != ?g2 ) . }");
341 
342  Soprano::Model *model = Nepomuk2::ResourceManager::instance()->mainModel();
343  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
344  while( it.next() && !shouldQuit() ) {
345  Soprano::Statement st( it["r"], it["p"], it["o"] );
346 
347  // List all the graphs that it belongs to
348  QString query = QString::fromLatin1("select ?g where { graph ?g { %1 %2 %3 . } }")
349  .arg( st.subject().toN3(), st.predicate().toN3(), st.object().toN3() );
350  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
351 
352  QList<QUrl> graphs;
353  while( it.next() )
354  graphs << it[0].uri();
355 
356  // Remove all statements apart from the first graph
357  // TODO: Maybe we should be smarter about it?
358  for( int i=1; i<graphs.size() && !shouldQuit(); i++ ) {
359  Soprano::Statement statement( st );
360  statement.setContext( graphs[i] );
361 
362  kDebug() << statement;
363  model->removeAllStatements( statement );
364  }
365  }
366 }
367 
368 class InvalidFileResourcesJob : public CleaningJob {
369 public:
370  explicit InvalidFileResourcesJob(QObject* parent = 0)
371  : CleaningJob(parent) {}
372 
373  virtual QString jobName() {
374  return i18n("Cleaning invalid file metadata");
375  }
376 private:
377  virtual void execute();
378 };
379 
380 void InvalidFileResourcesJob::execute()
381 {
382  //
383  // Delete all the files that do not have a url
384  //
385  QLatin1String query("select distinct ?r where { ?r a nfo:FileDataObject. FILTER NOT EXISTS {"
386  " ?r nie:url ?url . } }");
387 
388  Soprano::Model *model = Nepomuk2::ResourceManager::instance()->mainModel();
389  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
390 
391  QList<QUrl> deleteList;
392  while( it.next() && !shouldQuit() ) {
393  deleteList << it[0].uri();
394 
395  if( deleteList.size() > 10 ) {
396  kDebug() << deleteList;
397  KJob* job = Nepomuk2::removeResources( deleteList );
398  job->exec();
399  deleteList.clear();
400  }
401  }
402 
403  if( !deleteList.isEmpty() ) {
404  KJob* job = Nepomuk2::removeResources( deleteList );
405  job->exec();
406  deleteList.clear();
407  }
408 
409  //
410  // Delete the files whose url does not exist
411  //
412 
413  /*
414  query = QLatin1String( "select distinct ?r ?url where { "
415  "?r a nfo:FileDataObject ; nie:url ?url . }" );
416  it = model->executeQuery( query, Soprano::Query::QueryLanguageSparql );
417 
418  while( it.next() && !shouldQuit() ) {
419  QUrl url( it["url"].uri() );
420  QString file = url.toLocalFile();
421 
422  if( !file.isEmpty() && !QFile::exists(file) ) {
423  deleteList << it["r"].uri();
424  }
425 
426  if( deleteList.size() > 10 ) {
427  KJob* job = Nepomuk2::removeResources( deleteList );
428  job->exec();
429  deleteList.clear();
430  }
431  }
432 
433  if( !deleteList.isEmpty() ) {
434  KJob* job = Nepomuk2::removeResources( deleteList );
435  job->exec();
436  deleteList.clear();
437  }*/
438 }
439 
440 
441 class InvalidResourcesJob : public CleaningJob {
442 public:
443  explicit InvalidResourcesJob(QObject* parent = 0)
444  : CleaningJob(parent) {}
445 
446  virtual QString jobName() {
447  return i18n("Cleaning invalid resources");
448  }
449 private:
450  virtual void execute();
451 };
452 
453 void InvalidResourcesJob::execute()
454 {
455  // Clear all the resources which do not have any rdf:type
456  QLatin1String query("select distinct ?r where { ?r ?p ?o . FILTER NOT EXISTS { ?r a ?t . } }");
457 
458  Soprano::Model *model = Nepomuk2::ResourceManager::instance()->mainModel();
459  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
460 
461  QList<QUrl> deleteList;
462  while( it.next() && !shouldQuit() ) {
463  deleteList << it[0].uri();
464 
465  if( deleteList.size() > 10 ) {
466  kDebug() << deleteList;
467  KJob* job = Nepomuk2::removeResources( deleteList );
468  job->exec();
469  deleteList.clear();
470  }
471  }
472 
473  if( !deleteList.isEmpty() ) {
474  KJob* job = Nepomuk2::removeResources( deleteList );
475  job->exec();
476  deleteList.clear();
477  }
478 }
479 
480 class InvalidStatementsJob : public CleaningJob {
481 public:
482  explicit InvalidStatementsJob(QObject* parent = 0)
483  : CleaningJob(parent) {}
484 
485  virtual QString jobName() {
486  return i18n("Cleaning invalid statements");
487  }
488 private:
489  virtual void execute();
490 };
491 
492 void InvalidStatementsJob::execute()
493 {
494  // Clear all the statements which are violating the nrl:maxCardinality
495  QString query = QString::fromLatin1("select distinct ?r ?p where { ?p nrl:maxCardinality %1 ."
496  " ?r ?p ?o1 , ?o2 . FILTER( ?o1 != ?o2 ) . }")
497  .arg( Soprano::Node::literalToN3( Soprano::LiteralValue(1) ) );
498 
499  Soprano::Model *model = Nepomuk2::ResourceManager::instance()->mainModel();
500  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
501 
502  while( it.next() && !shouldQuit() ) {
503  Soprano::Statement st( it[0], it[1], Soprano::Node() );
504  kDebug() << st;
505 
506  // Keep the oldest statement
507  QString query = QString::fromLatin1("select ?o ?g where { graph ?g { %1 %2 ?o . } "
508  "?g nao:created ?c . } ORDER BY ASC(?c) LIMIT 1")
509  .arg( st.subject().toN3(), st.predicate().toN3() );
510  Soprano::QueryResultIterator iter = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
511  if( iter.next() ) {
512  st.setObject( iter[0] );
513  st.setContext( iter[1] );
514 
515  // FIXME: What about the trailing graphs?
516  kDebug() << st;
517  model->removeAllStatements( st.subject(), st.predicate(), Soprano::Node() );
518  model->addStatement( st );
519  }
520  }
521 
522  // TODO: Figure out how to clear the statements which do not follow the domain and range
523  // restrictions
524 }
525 
526 
527 QList< CleaningJob* > allJobs()
528 {
529  // standard jobs
530  QList<CleaningJob*> list;
531  list << new CrappyInferenceData();
532  list << new EmptyTagCleaner();
533  list << new DuplicateTagCleaner();
534  list << new DuplicateFileCleaner();
535  list << new DuplicateIconCleaner();
536  list << new AkonadiMigrationJob();
537  list << new DuplicateStatementJob();
538  list << new DuplicateContactJob();
539  list << new InvalidFileResourcesJob();
540  list << new InvalidResourcesJob();
541  list << new InvalidStatementsJob();
542 
543  // plugins
544  KService::List plugins = KServiceTypeTrader::self()->query( "NepomukCleaningJob" );
545  for( KService::List::const_iterator it = plugins.constBegin(); it != plugins.constEnd(); it++ ) {
546  KService::Ptr service = *it;
547 
548  QString error;
549  Nepomuk2::CleaningJob* job = service->createInstance<Nepomuk2::CleaningJob>( 0, QVariantList(), &error );
550  if( job ) {
551  list << job;
552  }
553  else {
554  kError() << "Could not create Cleaning job: " << service->library();
555  kError() << error;
556  }
557  }
558 
559  return list;
560 }
561 
562 #include "cleaningjobs.moc"
Nepomuk2::CleaningJob::jobName
virtual QString jobName()=0
Nepomuk2::CreateResourceJob::resourceUri
QUrl resourceUri() const
The returned resource URI.
Definition: createresourcejob.cpp:89
Nepomuk2::mergeResources
KJob * mergeResources(const QUrl &resource1, const QUrl &resource2, const KComponentData &component=KGlobal::mainComponent())
Merge two resources into one.
Definition: datamanagement.cpp:125
QObject
Nepomuk2::Tag
A Tag can be assigned to any Thing.
Definition: tag.h:38
variant.h
createresourcejob.h
resource.h
Nepomuk2::Types::Property
A property is a resource of type rdf:Property which relates a domain with a range.
Definition: libnepomukcore/types/property.h:52
Nepomuk2::ResourceManager::instance
static ResourceManager * instance()
Definition: resourcemanager.cpp:270
resourcemanager.h
cleaningjob.h
datamanagement.h
cleaningjobs.h
Nepomuk2::CreateResourceJob
Job returned by Nepomuk2::createResource().
Definition: createresourcejob.h:47
Nepomuk2::CleaningJob
Definition: cleaningjob.h:47
Nepomuk2::removeResources
KJob * removeResources(const QList< QUrl > &resources, Nepomuk2::RemovalFlags flags=Nepomuk2::NoRemovalFlags, const KComponentData &component=KGlobal::mainComponent())
Completely remove resources from the database.
Nepomuk2::setProperty
KJob * setProperty(const QList< QUrl > &resources, const QUrl &property, const QVariantList &values, const KComponentData &component=KGlobal::mainComponent())
Set the values of a property for one or more resources.
Definition: datamanagement.cpp:48
allJobs
QList< CleaningJob * > allJobs()
Definition: cleaningjobs.cpp:527
Nepomuk2::createResource
CreateResourceJob * createResource(const QList< QUrl > &types, const QString &label, const QString &description, const KComponentData &component=KGlobal::mainComponent())
Create a new resource.
Definition: datamanagement.cpp:85
literal.h
Nepomuk2::ResourceManager::mainModel
Soprano::Model * mainModel()
Retrieve the main data storage model.
Definition: resourcemanager.cpp:363
KJob
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:48:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Nepomuk-Core

Skip menu "Nepomuk-Core"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal