• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

Nepomuk-Core

  • sources
  • kde-4.12
  • kdelibs
  • nepomuk-core
  • services
  • storage
syncresourceidentifier.cpp
Go to the documentation of this file.
1 /*
2  This file is part of the Nepomuk KDE project.
3  Copyright (C) 2010 Vishesh Handa <handa.vish@gmail.com>
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) version 3, or any
9  later version accepted by the membership of KDE e.V. (or its
10  successor approved by the membership of KDE e.V.), which shall
11  act as a proxy defined in Section 6 of version 3 of the license.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library. If not, see <http://www.gnu.org/licenses/>.
20 */
21 
22 
23 #include "syncresourceidentifier.h"
24 #include "syncresource.h"
25 
26 #include <QtCore/QSet>
27 
28 #include <Soprano/Statement>
29 #include <Soprano/Graph>
30 #include <Soprano/Node>
31 #include <Soprano/BindingSet>
32 #include <Soprano/StatementIterator>
33 #include <Soprano/QueryResultIterator>
34 #include <Soprano/Model>
35 
36 #include <Soprano/Vocabulary/RDF>
37 #include <Soprano/Vocabulary/RDFS>
38 #include <Soprano/Vocabulary/NAO>
39 #include "nie.h"
40 
41 #include "resource.h"
42 #include "resourcemanager.h"
43 #include "variant.h"
44 
45 #include <KDebug>
46 #include <KUrl>
47 
48 using namespace Nepomuk2::Vocabulary;
49 using namespace Soprano::Vocabulary;
50 
51 Nepomuk2::Sync::ResourceIdentifier::ResourceIdentifier(Soprano::Model * model)
52 {
53  m_model = model;
54 }
55 
56 Nepomuk2::Sync::ResourceIdentifier::~ResourceIdentifier()
57 {
58 
59 }
60 
61 
62 void Nepomuk2::Sync::ResourceIdentifier::addSyncResource(const Nepomuk2::Sync::SyncResource& res)
63 {
64  Q_ASSERT( !res.uri().isEmpty() );
65  QHash<KUrl, SyncResource>::iterator it = m_resourceHash.find( res.uri() );
66  if( it == m_resourceHash.end() ) {
67  m_resourceHash.insert( res.uri(), res );
68  m_notIdentified.insert( res.uri() );
69  }
70  else {
71  it.value().unite( res );
72  }
73 }
74 
75 
76 //
77 // Identification
78 //
79 
80 void Nepomuk2::Sync::ResourceIdentifier::identifyAll()
81 {
82  return identify( m_notIdentified.toList() );
83 }
84 
85 
86 bool Nepomuk2::Sync::ResourceIdentifier::identify(const KUrl& uri)
87 {
88  // If already identified
89  if( m_hash.contains( uri ) )
90  return true;
91 
92  // Avoid recursive calls
93  if( m_beingIdentified.contains( uri ) )
94  return false;
95 
96  bool result = runIdentification( uri );
97  m_beingIdentified.remove( uri );
98 
99  if( result )
100  m_notIdentified.remove( uri );
101 
102  return result;
103 }
104 
105 
106 void Nepomuk2::Sync::ResourceIdentifier::identify(const KUrl::List& uriList)
107 {
108  foreach( const KUrl & uri, uriList ) {
109  identify( uri );
110  }
111 }
112 
113 bool Nepomuk2::Sync::ResourceIdentifier::runIdentification(const KUrl& uri)
114 {
115  Sync::SyncResource res = simpleResource( uri );
116 
117  // Make sure that the res has some rdf:type statements
118  if( !res.contains( RDF::type() ) ) {
119  kDebug() << "No rdf:type statements - Not identifying";
120  return false;
121  }
122 
123  // Remove the types
124  QList<Soprano::Node> requiredTypes = res.values( RDF::type() );
125  res.remove( RDF::type() );
126 
127  QStringList identifyingProperties;
128  QHash<KUrl, Soprano::Node> identifyingPropertiesHash;
129 
130  QHash< KUrl, Soprano::Node >::const_iterator it = res.constBegin();
131  QHash< KUrl, Soprano::Node >::const_iterator constEnd = res.constEnd();
132  for( ; it != constEnd; it++ ) {
133  const QUrl & prop = it.key();
134 
135  if( !isIdentifyingProperty( prop ) ) {
136  continue;
137  }
138 
139  identifyingProperties << Soprano::Node::resourceToN3( prop );
140 
141  // For the case when the property has a resource range, and is still identifying
142  Soprano::Node object = it.value();
143  // vHanda: Should we really be identifying nepomuk uris?
144  if( object.isBlank()
145  || ( object.isResource() && object.uri().scheme() == QLatin1String("nepomuk") ) ) {
146 
147  QUrl objectUri = object.isResource() ? object.uri() : QString( "_:" + object.identifier() );
148  if( !identify( objectUri ) ) {
149  //kDebug() << "Identification of object " << objectUri << " failed";
150  continue;
151  }
152 
153  object = m_hash.value( objectUri );
154  }
155 
156  identifyingPropertiesHash.insert(prop, object);
157  }
158 
159  if( identifyingPropertiesHash.isEmpty() ) {
160  //kDebug() << "No identification properties found!";
161  return false;
162  }
163 
164 
165  // construct the identification query
166  QString query = QLatin1String("select distinct ?r where { ");
167 
168  //
169  // Optimization:
170  // If there is only one identifying property using all that optional and filter stuff
171  // slows the queries down incredibly. Thus, we make it a special case.
172  //
173  if(identifyingPropertiesHash.count() > 1) {
174  int numIdentifyingProperties = 0;
175  for(QHash<KUrl, Soprano::Node>::const_iterator it = identifyingPropertiesHash.constBegin();
176  it != identifyingPropertiesHash.constEnd(); ++it) {
177  query += QString::fromLatin1(" optional { ?r %1 ?o%3 . } . filter(!bound(?o%3) || ?o%3=%2). ")
178  .arg( Soprano::Node::resourceToN3( it.key() ),
179  it.value().toN3(),
180  QString::number( numIdentifyingProperties++ ) );
181  }
182 
183  // Make sure at least one of the identification properties has been matched
184  // by adding filter( bound(?o1) || bound(?o2) ... )
185  query += QString::fromLatin1("filter( ");
186  for( int i=0; i<numIdentifyingProperties-1; i++ ) {
187  query += QString::fromLatin1(" bound(?o%1) || ").arg( QString::number( i ) );
188  }
189  query += QString::fromLatin1(" bound(?o%1) ) . ").arg( QString::number( numIdentifyingProperties - 1 ) );
190  }
191  else {
192  query += QString::fromLatin1("?r %1 %2 . ").arg(Soprano::Node::resourceToN3(identifyingPropertiesHash.constBegin().key()),
193  identifyingPropertiesHash.constBegin().value().toN3());
194  }
195 
196  //
197  // For performance reasons we add a limit even though this could mean that we
198  // miss a resource to identify since we check the types below.
199  //
200  query += QLatin1String("} LIMIT 100");
201 
202 
203  //
204  // Fetch a score for each result.
205  // We do this in a separate query for performance reasons.
206  //
207  QMultiHash<int, KUrl> resultsScoreHash;
208  int maxScore = -1;
209  Soprano::QueryResultIterator qit = m_model->executeQuery( query, Soprano::Query::QueryLanguageSparqlNoInference );
210  while( qit.next() ) {
211  const Soprano::Node r(qit["r"]);
212 
213  //
214  // Check the type requirements. Experiments have shown this to mean a substantial
215  // performance boost as compared to doing it in the main query.
216  //
217  if(!requiredTypes.isEmpty() ) {
218  query = QLatin1String("ask where { ");
219  foreach(const Soprano::Node& type, requiredTypes) {
220  query += QString::fromLatin1("%1 a %2 . ").arg(r.toN3(), type.toN3());
221  }
222  query += QLatin1String("}");
223  if(!m_model->executeQuery(query, Soprano::Query::QueryLanguageSparql).boolValue()) {
224  continue;
225  }
226  }
227 
228 
229  QList<Soprano::BindingSet> bindings = m_model->executeQuery(QString::fromLatin1("select count(?p) as ?cnt where { "
230  "%1 ?p ?o. filter( ?p in (%2) ) . }")
231  .arg( r.toN3(),
232  identifyingProperties.join(",") ),
233  Soprano::Query::QueryLanguageSparqlNoInference).allBindings();
234  if(bindings.isEmpty())
235  continue;
236 
237  const int score = bindings.first()["cnt"].literal().toInt();
238 
239  if( maxScore < score ) {
240  maxScore = score;
241  }
242 
243  resultsScoreHash.insert(score, r.uri());
244  }
245 
246  //
247  // Only get the results which have the maximum score
248  //
249  QSet<KUrl> results = QSet<KUrl>::fromList(resultsScoreHash.values(maxScore));
250 
251 
252  //kDebug() << "Got " << results.size() << " results";
253  if( results.empty() )
254  return false;
255 
256  KUrl newUri;
257  if( results.size() == 1 ) {
258  newUri = *results.begin();
259  }
260  else {
261  kDebug() << "DUPLICATE RESULTS!";
262  newUri = duplicateMatch( res.uri(), results );
263  }
264 
265  if( !newUri.isEmpty() ) {
266  kDebug() << uri << " --> " << newUri;
267  manualIdentification( uri, newUri );
268  return true;
269  }
270 
271  return false;
272 }
273 
274 
275 //
276 // Getting the info
277 //
278 
279 QHash<QUrl, QUrl> Nepomuk2::Sync::ResourceIdentifier::mappings() const
280 {
281  return m_hash;
282 }
283 
284 Nepomuk2::Sync::SyncResource Nepomuk2::Sync::ResourceIdentifier::simpleResource(const KUrl& uri)
285 {
286  QHash< KUrl, SyncResource >::const_iterator it = m_resourceHash.constFind( uri );
287  if( it != m_resourceHash.constEnd() ) {
288  return it.value();
289  }
290 
291  return SyncResource();
292 }
293 
294 Nepomuk2::Sync::ResourceHash Nepomuk2::Sync::ResourceIdentifier::resourceHash() const
295 {
296  return m_resourceHash;
297 }
298 
299 KUrl Nepomuk2::Sync::ResourceIdentifier::duplicateMatch(const KUrl& uri, const QSet< KUrl >& matchedUris)
300 {
301  Q_UNUSED( uri );
302  Q_UNUSED( matchedUris );
303 
304  // By default - Identification fails
305  return KUrl();
306 }
307 
308 void Nepomuk2::Sync::ResourceIdentifier::manualIdentification(const KUrl& oldUri, const KUrl& newUri)
309 {
310  m_hash[ oldUri ] = newUri;
311  m_notIdentified.remove( oldUri );
312 }
QMultiHash
syncresourceidentifier.h
Nepomuk2::Sync::ResourceIdentifier::mappings
QHash< QUrl, QUrl > mappings() const
Returns mappings of the identified uri.
Definition: syncresourceidentifier.cpp:279
Nepomuk2::Sync::ResourceIdentifier::~ResourceIdentifier
virtual ~ResourceIdentifier()
Definition: syncresourceidentifier.cpp:56
QHash
Nepomuk2::Sync::ResourceIdentifier::identifyAll
void identifyAll()
Definition: syncresourceidentifier.cpp:80
variant.h
Nepomuk2::Sync::ResourceIdentifier::resourceHash
ResourceHash resourceHash() const
Definition: syncresourceidentifier.cpp:294
Nepomuk2::Sync::ResourceIdentifier::ResourceIdentifier
ResourceIdentifier(Soprano::Model *model)
Definition: syncresourceidentifier.cpp:51
Nepomuk2::Sync::ResourceIdentifier::manualIdentification
void manualIdentification(const KUrl &oldUri, const KUrl &newUri)
Sets oldUri -> newUri in the mappings.
Definition: syncresourceidentifier.cpp:308
Nepomuk2::Sync::ResourceIdentifier::identify
bool identify(const KUrl &uri)
Definition: syncresourceidentifier.cpp:86
resource.h
Nepomuk2::Sync::ResourceIdentifier::addSyncResource
void addSyncResource(const SyncResource &res)
Definition: syncresourceidentifier.cpp:62
Nepomuk2::Sync::SyncResource::uri
KUrl uri() const
Definition: syncresource.cpp:127
resourcemanager.h
Nepomuk2::Sync::ResourceIdentifier::duplicateMatch
virtual KUrl duplicateMatch(const KUrl &uri, const QSet< KUrl > &matchedUris)
Called during identification if there is more than one match for one resource.
Definition: syncresourceidentifier.cpp:299
Nepomuk2::Sync::ResourceIdentifier::runIdentification
virtual bool runIdentification(const KUrl &uri)
This function returns true if identification was successful, and false if it was not.
Definition: syncresourceidentifier.cpp:113
Nepomuk2::Sync::ResourceIdentifier::simpleResource
SyncResource simpleResource(const KUrl &uri)
Definition: syncresourceidentifier.cpp:284
syncresource.h
Nepomuk2::Sync::SyncResource
A SyncResource is a convenient way of storing a set of properties and objects for a common subject...
Definition: syncresource.h:53
Nepomuk2::Sync::ResourceHash
A SyncResource is a convenient way of representing a list of Soprano::Statements or a Soprano::Graph...
Definition: syncresource.h:109
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:48:09 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Nepomuk-Core

Skip menu "Nepomuk-Core"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal