• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

Nepomuk-Core

  • sources
  • kde-4.12
  • kdelibs
  • nepomuk-core
  • services
  • fileindexer
  • indexer
indexer.cpp
Go to the documentation of this file.
1 /*
2  This file is part of the Nepomuk KDE project.
3  Copyright (C) 2010-2011 Sebastian Trueg <trueg@kde.org>
4  Copyright (C) 2011-2012 Vishesh Handa <handa.vish@gmail.com>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Lesser General Public
8  License as published by the Free Software Foundation; either
9  version 2.1 of the License, or (at your option) version 3, or any
10  later version accepted by the membership of KDE e.V. (or its
11  successor approved by the membership of KDE e.V.), which shall
12  act as a proxy defined in Section 6 of version 3 of the license.
13 
14  This library is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  Lesser General Public License for more details.
18 
19  You should have received a copy of the GNU Lesser General Public
20  License along with this library. If not, see <http://www.gnu.org/licenses/>.
21 */
22 
23 #include "indexer.h"
24 #include "extractorplugin.h"
25 #include "extractorpluginmanager.h"
26 #include "simpleindexer.h"
27 #include "../util.h"
28 #include "kext.h"
29 #include "nie.h"
30 
31 #include "storeresourcesjob.h"
32 #include "resourcemanager.h"
33 
34 #include <Soprano/Model>
35 #include <Soprano/QueryResultIterator>
36 
37 #include <KDebug>
38 #include <KJob>
39 
40 #include <KService>
41 #include <KMimeType>
42 #include <KServiceTypeTrader>
43 
44 #include <QtCore/QDataStream>
45 #include <QtCore/QDateTime>
46 #include <QtCore/QFile>
47 #include <QtCore/QFileInfo>
48 #include <QtCore/QTimer>
49 
50 #include <Soprano/Vocabulary/NRL>
51 #include <Soprano/Vocabulary/RDF>
52 
53 using namespace Soprano::Vocabulary;
54 using namespace Nepomuk2::Vocabulary;
55 
56 Nepomuk2::Indexer::Indexer( QObject* parent )
57  : QObject( parent )
58 {
59  m_extractorManager = new ExtractorPluginManager( this );
60 }
61 
62 Nepomuk2::Indexer::~Indexer()
63 {
64 }
65 
66 
67 bool Nepomuk2::Indexer::indexFile(const KUrl& url)
68 {
69  QFileInfo info( url.toLocalFile() );
70  if( !info.exists() ) {
71  m_lastError = QString::fromLatin1("'%1' does not exist.").arg(info.filePath());
72  return false;
73  }
74 
75  QString query = QString::fromLatin1("select ?r ?mtype ?l where { ?r nie:url %1; nie:mimeType ?mtype ;"
76  " kext:indexingLevel ?l . }")
77  .arg( Soprano::Node::resourceToN3( url ) );
78  Soprano::Model* model = ResourceManager::instance()->mainModel();
79 
80  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
81 
82  QUrl uri;
83  QString mimeType;
84  if( it.next() ) {
85  uri = it[0].uri();
86  mimeType = it[1].literal().toString();
87  int level = it[2].literal().toInt();
88 
89  if( level > 1 ) {
90  clearIndexingData( url );
91  if( !simpleIndex( url, &uri, &mimeType ) )
92  return false;
93  }
94  }
95  else {
96  if( !simpleIndex( url, &uri, &mimeType ) )
97  return false;
98  }
99 
100  kDebug() << uri << mimeType;
101  return fileIndex( uri, url, mimeType );
102 }
103 
104 
105 bool Nepomuk2::Indexer::clearIndexingData(const QUrl& url)
106 {
107  kDebug() << "Starting to clear";
108  KJob* job = Nepomuk2::clearIndexedData( url );
109  kDebug() << "Done";
110 
111  job->exec();
112  if( job->error() ) {
113  m_lastError = job->errorString();
114  kError() << m_lastError;
115 
116  return false;
117  }
118 
119  return true;
120 }
121 
122 bool Nepomuk2::Indexer::simpleIndex(const QUrl& url, QUrl* uri, QString* mimetype)
123 {
124  QScopedPointer<SimpleIndexingJob> job( new SimpleIndexingJob( url ) );
125  job->setAutoDelete(false);
126  job->exec();
127 
128  if( job->error() ) {
129  m_lastError = job->errorString();
130  kError() << m_lastError;
131 
132  return false;
133  }
134 
135  *uri = job->uri();
136  *mimetype = job->mimeType();
137  return true;
138 }
139 
140 bool Nepomuk2::Indexer::fileIndex(const QUrl& uri, const QUrl& url, const QString& mimeType)
141 {
142  SimpleResourceGraph graph;
143 
144  QList<ExtractorPlugin*> extractors = m_extractorManager->fetchExtractors( url, mimeType );
145  foreach( ExtractorPlugin* ex, extractors ) {
146  graph += ex->extract( uri, url, mimeType );
147  }
148 
149  if( !graph.isEmpty() ) {
150  // Do not send the full plain text content with all the other properties.
151  // It is too large
152  QString plainText;
153  QVariantList vl = graph[uri].property( NIE::plainTextContent() );
154  if( vl.size() == 1 ) {
155  plainText = vl.first().toString();
156  graph[uri].remove( NIE::plainTextContent() );
157  // Check that the SimpleResource is still valid:
158  // if it only contained text it may not be.
159  if ( !graph[uri].isValid() )
160  graph.remove(uri);
161  }
162 
163  // Check again that the graph is not empty: it may have only contained text
164  if( !graph.isEmpty() ) {
165  QHash<QUrl, QVariant> additionalMetadata;
166  additionalMetadata.insert( RDF::type(), NRL::DiscardableInstanceBase() );
167 
168  // we do not have an event loop - thus, we need to delete the job ourselves
169  QScopedPointer<StoreResourcesJob> job( Nepomuk2::storeResources( graph, IdentifyNew,
170  NoStoreResourcesFlags, additionalMetadata ) );
171  job->setAutoDelete(false);
172  job->exec();
173  if( job->error() ) {
174  m_lastError = job->errorString();
175  kError() << "SimpleIndexerError: " << m_lastError;
176  return false;
177  }
178  }
179 
180  if( plainText.length() ) {
181  kDebug() << "Saving plain text content";
182  setNiePlainTextContent( uri, plainText );
183  }
184  }
185 
186  // Update the indexing level even if no data has changed
187  kDebug() << "Updating indexing level";
188  Nepomuk2::updateIndexingLevel( uri, 2 );
189 
190  return true;
191 }
192 
193 
194 
195 Nepomuk2::SimpleResourceGraph Nepomuk2::Indexer::indexFileGraph(const QUrl& url)
196 {
197  SimpleResource res;
198 
199  QString mimeType = KMimeType::findByUrl( url )->name();
200  res.addProperty(NIE::mimeType(), mimeType);
201  res.addProperty(NIE::url(), url);
202 
203  SimpleResourceGraph graph;
204  graph << res;
205 
206  QList<ExtractorPlugin*> extractors = m_extractorManager->fetchExtractors( url, mimeType );
207  foreach( ExtractorPlugin* ex, extractors ) {
208  graph += ex->extract( res.uri(), url, mimeType );
209  }
210 
211  kDebug() << graph;
212  return graph;
213 }
214 
215 
216 QString Nepomuk2::Indexer::lastError() const
217 {
218  return m_lastError;
219 }
220 
221 void Nepomuk2::Indexer::setNiePlainTextContent(const QUrl& uri, QString& plainText)
222 {
223  // This number has been experimentally chosen. Virtuoso cannot handle more than this
224  static const int maxSize = ExtractorPlugin::maxPlainTextSize();
225  if( plainText.size() > maxSize ) {
226  kWarning() << "Trimming plain text content from " << plainText.size() << " to " << maxSize;
227  plainText.resize( maxSize );
228  }
229 
230  // We can use the kext:indexingLevel graph because they are both added by the same application
231  QString query = QString::fromLatin1("select ?g where { graph ?g { %1 kext:indexingLevel ?l . } }")
232  .arg ( Soprano::Node::resourceToN3(uri) );
233  Soprano::Model* model = ResourceManager::instance()->mainModel();
234  Soprano::QueryResultIterator it = model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
235 
236  Soprano::Node graph;
237  if( it.next() ) {
238  graph = it[0];
239  it.close();
240  }
241 
242  if( !graph.isEmpty() ) {
243  // We use addStatement so that the virtuoso backend internally uses paramertized
244  // queries to push the plain text. Parameterized queries seem to use less memory in
245  // virtuoso when inserting.
246  model->addStatement( uri, NIE::plainTextContent(), Soprano::LiteralValue(plainText), graph );
247  if( model->lastError() ) {
248  kError() << model->lastError().message();
249  }
250  }
251 }
252 
253 
254 #include "indexer.moc"
storeresourcesjob.h
Nepomuk2::ExtractorPlugin
The ExtractorPlugin is the base class for all file metadata extractors.
Definition: extractorplugin.h:60
Nepomuk2::SimpleResource
Represents a snapshot of one Nepomuk resource.
Definition: simpleresource.h:46
QHash
QObject
Nepomuk2::SimpleResource::addProperty
void addProperty(const QUrl &property, const QVariant &value)
Add a property.
Definition: simpleresource.cpp:206
Nepomuk2::SimpleResourceGraph
Definition: simpleresourcegraph.h:48
Nepomuk2::clearIndexedData
KJob * clearIndexedData(const QUrl &url)
remove all indexed data for url the datamanagement way
Definition: util.cpp:42
indexer.h
Nepomuk2::storeResources
StoreResourcesJob * storeResources(const Nepomuk2::SimpleResourceGraph &resources, Nepomuk2::StoreIdentificationMode identificationMode=Nepomuk2::IdentifyNew, Nepomuk2::StoreResourcesFlags flags=Nepomuk2::NoStoreResourcesFlags, const QHash< QUrl, QVariant > &additionalMetadata=QHash< QUrl, QVariant >(), const KComponentData &component=KGlobal::mainComponent())
Store many resources at once.
Definition: datamanagement.cpp:144
Nepomuk2::ResourceManager::instance
static ResourceManager * instance()
Definition: resourcemanager.cpp:270
resourcemanager.h
Nepomuk2::Indexer::Indexer
Indexer(QObject *parent=0)
Create a new indexer.
Definition: indexer.cpp:56
Nepomuk2::Indexer::lastError
QString lastError() const
Definition: indexer.cpp:216
Nepomuk2::Indexer::indexFileGraph
Nepomuk2::SimpleResourceGraph indexFileGraph(const QUrl &url)
Extracts the SimpleResourceGraph of the local file or folder and returns it.
Definition: indexer.cpp:195
Nepomuk2::Indexer::indexFile
bool indexFile(const KUrl &url)
Index a single local file or folder (files in a folder will not be indexed recursively).
Definition: indexer.cpp:67
Nepomuk2::IdentifyNew
This is the default mode.
Definition: datamanagement.h:352
Nepomuk2::Indexer::~Indexer
~Indexer()
Destructor.
Definition: indexer.cpp:62
Nepomuk2::ExtractorPluginManager
Definition: extractorpluginmanager.h:30
Nepomuk2::ExtractorPlugin::maxPlainTextSize
static int maxPlainTextSize()
Virtuoso does not support streaming operators, and does not accept queries above a certain size...
Definition: extractorplugin.cpp:124
Nepomuk2::ExtractorPlugin::extract
virtual SimpleResourceGraph extract(const QUrl &resUri, const QUrl &fileUrl, const QString &mimeType)=0
The main function of the plugin that is responsible for extracting the data from the file url and ret...
simpleindexer.h
extractorplugin.h
Nepomuk2::NoStoreResourcesFlags
No flags - default behaviour.
Definition: datamanagement.h:364
Nepomuk2::ResourceManager::mainModel
Soprano::Model * mainModel()
Retrieve the main data storage model.
Definition: resourcemanager.cpp:363
KJob
extractorpluginmanager.h
Nepomuk2::updateIndexingLevel
void updateIndexingLevel(const QUrl &uri, int level)
update kext::indexingLevel for url
Definition: util.cpp:71
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:48:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Nepomuk-Core

Skip menu "Nepomuk-Core"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal