• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

Nepomuk-Core

  • sources
  • kde-4.12
  • kdelibs
  • nepomuk-core
  • libnepomukcore
  • query
literalterm.cpp
Go to the documentation of this file.
1 /*
2  This file is part of the Nepomuk KDE project.
3  Copyright (C) 2009-2012 Sebastian Trueg <trueg@kde.org>
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) version 3, or any
9  later version accepted by the membership of KDE e.V. (or its
10  successor approved by the membership of KDE e.V.), which shall
11  act as a proxy defined in Section 6 of version 3 of the license.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library. If not, see <http://www.gnu.org/licenses/>.
20 */
21 
22 #include "literalterm.h"
23 #include "literalterm_p.h"
24 #include "querybuilderdata_p.h"
25 #include "query_p.h"
26 
27 #include <Soprano/Node>
28 #include <Soprano/Vocabulary/RDFS>
29 
30 Nepomuk2::Query::LiteralTermPrivate::LiteralTermPrivate()
31 {
32  m_type = Term::Literal;
33 }
34 
35 
36 bool Nepomuk2::Query::LiteralTermPrivate::equals( const TermPrivate* other ) const
37 {
38  if ( other->m_type == m_type ) {
39  const LiteralTermPrivate* rtp = static_cast<const LiteralTermPrivate*>( other );
40  return rtp->m_value == m_value;
41  }
42  else {
43  return false;
44  }
45 }
46 
47 
48 //
49 // A LiteralTerm not used in a ComparisonTerm is a "classical" plain text search term. That would mean "?r ?p ?v . ?v bif:contains 'foobar' . "
50 // But since many relations like nao:hasTag or nmm:performer or similar are considered as plain text fields we extend the pattern by adding
51 // relations to resources that have labels containing the query text.
52 //
53 QString Nepomuk2::Query::LiteralTermPrivate::toSparqlGraphPattern( const QString& resourceVarName, const TermPrivate* parentTerm, const QString &additionalFilters, QueryBuilderData *qbd ) const
54 {
55  Q_UNUSED(parentTerm);
56 
57  if( m_value.toString().isEmpty() )
58  return QString();
59 
60  const QString p1 = qbd->uniqueVarName();
61  const QString v1 = qbd->uniqueVarName();
62  const QString r2 = qbd->uniqueVarName();
63  const QString containsPattern = createContainsPattern( v1, m_value.toString(), qbd );
64 
65  // { ?r ?p1 ?v1 . containsPattern(v1) }
66  // UNION
67  // { ?r ?p1 ?r2 . ?r2 rdfs:label ?v1 . containsPattern(v1) } .
68  return QString::fromLatin1( "{ %1 %2 %3 . %4 } "
69  "UNION "
70  "{ %1 %2 %5 . %5 %6 %3 . %4 } . " )
71  .arg( resourceVarName,
72  p1,
73  v1,
74  containsPattern + additionalFilters,
75  r2,
76  qbd->uniqueVarName() ); //Soprano::Node::resourceToN3(Soprano::Vocabulary::RDFS::label()) );
77 // FIXME: Change back to rdfs:label when virtuoso inferencing bug is fixed
78 // BUG: 3591024 - https://sourceforge.net/tracker/?func=detail&aid=3591024&group_id=161622&atid=820574
79 }
80 
81 
82 namespace {
83 QString prepareRegexText( const QString& text )
84 {
85  QString filterRxStr = QRegExp::escape( text );
86  filterRxStr.replace( "\\*", QLatin1String( ".*" ) );
87  filterRxStr.replace( "\\?", QLatin1String( "." ) );
88  filterRxStr.replace( '\\',"\\\\" );
89  return filterRxStr;
90 }
91 }
92 
93 
94 QString Nepomuk2::Query::LiteralTermPrivate::createContainsPattern( const QString& varName, const QString& text, Nepomuk2::Query::QueryBuilderData* qbd )
95 {
96  if( text.isEmpty() )
97  return QString();
98 
99  // each token with a negation flag
100  QList<QPair<QString, bool> > containsTokens;
101  QList<QPair<QString, bool> > regexTokens;
102 
103  // we only support AND or OR, not both at the same time
104  // TODO: Fix this. Virtuoso supports a combination of both
105  bool isUnion = false;
106 
107  // gather all the tokens
108  bool inQuotes = false;
109  QString currentToken;
110  bool nextIsNegated = false;
111  int i = 0;
112  while( i < text.length() ) {
113  const QChar& c = text[i];
114  bool tokenEnd = false;
115 
116  if( c == QChar('"') || c == QChar('\'') ) {
117  inQuotes = !inQuotes;
118  tokenEnd = !inQuotes;
119  }
120  else if( c.isSpace() && !inQuotes ) {
121  tokenEnd = true;
122  }
123  else {
124  currentToken.append(c);
125  }
126 
127  if( i == text.length()-1 ) {
128  tokenEnd = true;
129  }
130 
131  if( tokenEnd && !currentToken.isEmpty() ) {
132  //
133  // Handle the three special tokens supported in Virtuoso's full text search engine we support (there is also "near" which we do not handle yet)
134  //
135  if( currentToken.toLower() == QLatin1String("and") ) {
136  isUnion = false;
137  }
138  else if( currentToken.toLower() == QLatin1String("or") ) {
139  isUnion = true;
140  }
141  else if( currentToken.toLower() == QLatin1String("not") ) {
142  nextIsNegated = true;
143  }
144  else {
145  QPair<QString, bool> currentTokenPair = qMakePair( currentToken, nextIsNegated );
146 
147  //
148  // Virtuoso needs four leading chars when using wildcards. Thus, if there is less (this includes 0) we fall back to the slower regex filter
149  //
150  const QStringList subTokens = currentToken.split( QLatin1Char(' '), QString::SkipEmptyParts );
151  bool needsRegex = false;
152  QRegExp regex(QLatin1String("[\\?\\*]")); // The regex used to check if we needs a regex
153  Q_FOREACH( const QString& subToken, subTokens ) {
154  const int i = subToken.indexOf( regex );
155  if( i >= 0 && i < 4 ) {
156  needsRegex = true;
157  break;
158  }
159  }
160  if( !needsRegex ) {
161  containsTokens << currentTokenPair;
162  }
163  else {
164  regexTokens << currentTokenPair;
165  }
166  }
167 
168  nextIsNegated = false;
169  currentToken.clear();
170  }
171 
172  ++i;
173  }
174 
175  // add optional NOT terms to the contains tokens and build the search excerpts terms
176  QStringList containsFilterTokens;
177  QStringList fullTextTerms;
178  for( int i = 0; i < containsTokens.count(); ++i ) {
179  QString containsFilterToken;
180  if( containsTokens[i].second )
181  containsFilterToken += QLatin1String("NOT ");
182  containsFilterToken += QString::fromLatin1("'%1'").arg(containsTokens[i].first);
183  containsFilterTokens << containsFilterToken;
184 
185  // we only want to show excerpt with the actually searched terms
186  if( !containsTokens[i].second )
187  fullTextTerms << containsTokens[i].first;
188  }
189  if( !fullTextTerms.isEmpty() ) {
190  qbd->addFullTextSearchTerms( varName, fullTextTerms );
191  }
192 
193  const QString finalContainsToken = containsFilterTokens.join( isUnion ? QLatin1String(" OR ") : QLatin1String(" AND "));
194 
195  // convert the regex tokens into SPARQL filters
196  QStringList filters;
197  for( int i = 0; i < regexTokens.count(); ++i ) {
198  QString regexFilter;
199  if( regexTokens[i].second )
200  regexFilter += QLatin1Char('!');
201  regexFilter += QString::fromLatin1( "REGEX(%1, \"%2\", 'i')" )
202  .arg( varName,
203  prepareRegexText(regexTokens[i].first) );
204  filters << regexFilter;
205  }
206 
207  //
208  // with the current filter design we can only support full-text scoring if we either
209  // only have contains pattern or if isUnion is false. In the latter case we can simply
210  // use a normal graph pattern and a FILTER.
211  //
212  QString containsPattern;
213  if( !containsFilterTokens.isEmpty() &&
214  qbd->query()->m_fullTextScoringEnabled &&
215  (regexTokens.isEmpty() || !isUnion) ) {
216  containsPattern = QString::fromLatin1("%1 bif:contains \"%2\" OPTION (score %3) . ")
217  .arg( varName,
218  finalContainsToken,
219  qbd->createScoringVariable() );
220  }
221 
222  //
223  // The fallback is to add bif:contains as a filter. This syntax does not support scoring though.
224  //
225  else if( !containsFilterTokens.isEmpty() ) {
226  filters << QString::fromLatin1("bif:contains(%1, \"%2\")").arg( varName, finalContainsToken );
227  }
228 
229  QString filterPattern;
230  if(!filters.isEmpty())
231  filterPattern = QString( QLatin1String("FILTER(") + filters.join( isUnion ? QLatin1String(" || ") : QLatin1String(" && ") ) + QLatin1String(") . ") );
232 
233  return filterPattern + containsPattern;
234 }
235 
236 
237 Nepomuk2::Query::LiteralTerm::LiteralTerm( const LiteralTerm& term )
238  : Term( term )
239 {
240 }
241 
242 
243 Nepomuk2::Query::LiteralTerm::LiteralTerm( const Soprano::LiteralValue& value )
244  : Term( new LiteralTermPrivate() )
245 {
246  setValue( value );
247 }
248 
249 
250 Nepomuk2::Query::LiteralTerm::~LiteralTerm()
251 {
252 }
253 
254 
255 Nepomuk2::Query::LiteralTerm& Nepomuk2::Query::LiteralTerm::operator=( const LiteralTerm& term )
256 {
257  d_ptr = term.d_ptr;
258  return *this;
259 }
260 
261 
262 Soprano::LiteralValue Nepomuk2::Query::LiteralTerm::value() const
263 {
264  N_D_CONST( LiteralTerm );
265  return d->m_value;
266 }
267 
268 
269 void Nepomuk2::Query::LiteralTerm::setValue( const Soprano::LiteralValue& value )
270 {
271  N_D( LiteralTerm );
272  d->m_value = value;
273 }
Nepomuk2::Query::LiteralTerm::setValue
void setValue(const Soprano::LiteralValue &value)
Set the value this LiteralTerm should match to.
Definition: literalterm.cpp:269
Nepomuk2::Query::Term
The base class for all term types.
Definition: term.h:64
Nepomuk2::Query::LiteralTerm::~LiteralTerm
~LiteralTerm()
Desctructor.
Definition: literalterm.cpp:250
Nepomuk2::Query::LiteralTerm::LiteralTerm
LiteralTerm(const LiteralTerm &term)
Copy constructor.
Definition: literalterm.cpp:237
literalterm.h
Nepomuk2::Query::LiteralTerm::operator=
LiteralTerm & operator=(const LiteralTerm &term)
Assignment operator.
Definition: literalterm.cpp:255
Nepomuk2::Query::LiteralTerm
Match literal properties via full text.
Definition: literalterm.h:86
Nepomuk2::Query::LiteralTerm::value
Soprano::LiteralValue value() const
The value this LiteralTerm should match to.
Definition: literalterm.cpp:262
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:48:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Nepomuk-Core

Skip menu "Nepomuk-Core"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal