• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

Nepomuk-Core

  • sources
  • kde-4.12
  • kdelibs
  • nepomuk-core
  • services
  • fileindexer
  • indexer
odfextractor.cpp
Go to the documentation of this file.
1 /*
2  <one line to give the library's name and an idea of what it does.>
3  Copyright (C) 2013 Vishesh Handa <me@vhanda.in>
4  Copyright (C) 2012 Jörg Ehrichs <joerg.ehrichs@gmx.de>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Lesser General Public
8  License as published by the Free Software Foundation; either
9  version 2.1 of the License, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Lesser General Public License for more details.
15 
16  You should have received a copy of the GNU Lesser General Public
17  License along with this library; if not, write to the Free Software
18  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20 
21 
22 #include "odfextractor.h"
23 
24 #include "nco.h"
25 #include "nie.h"
26 #include "nfo.h"
27 
28 #include <KDE/KDebug>
29 #include <KDE/KZip>
30 
31 #include <QtXml/QDomDocument>
32 #include <QtXml/QXmlStreamReader>
33 #include <Soprano/Vocabulary/NAO>
34 
35 using namespace Soprano::Vocabulary;
36 using namespace Nepomuk2::Vocabulary;
37 using namespace Nepomuk2;
38 
39 OdfExtractor::OdfExtractor(QObject* parent, const QVariantList& ): ExtractorPlugin(parent)
40 {
41 
42 }
43 
44 QStringList OdfExtractor::mimetypes()
45 {
46  QStringList list;
47  list << QLatin1String("application/vnd.oasis.opendocument.text")
48  << QLatin1String("application/vnd.oasis.opendocument.presentation")
49  << QLatin1String("application/vnd.oasis.opendocument.spreadsheet");
50 
51  return list;
52 }
53 
54 SimpleResourceGraph OdfExtractor::extract(const QUrl& resUri, const QUrl& fileUrl, const QString& mimeType)
55 {
56  Q_UNUSED(mimeType);
57 
58  KZip zip(fileUrl.toLocalFile());
59  if (!zip.open(QIODevice::ReadOnly)) {
60  qWarning() << "Document is not a valid ZIP archive";
61  return SimpleResourceGraph();
62  }
63 
64  const KArchiveDirectory *directory = zip.directory();
65  if (!directory) {
66  qWarning() << "Invalid document structure (main directory is missing)";
67  return SimpleResourceGraph();
68  }
69 
70  const QStringList entries = directory->entries();
71  if (!entries.contains("meta.xml")) {
72  qWarning() << "Invalid document structure (meta.xml is missing)";
73  return SimpleResourceGraph();
74  }
75 
76  SimpleResourceGraph graph;
77  SimpleResource fileRes( resUri );
78 
79  QDomDocument metaData("metaData");
80  const KArchiveFile *file = static_cast<const KArchiveFile*>(directory->entry("meta.xml"));
81  metaData.setContent(file->data());
82 
83  // parse metadata ...
84  QDomElement docElem = metaData.documentElement();
85 
86  QDomNode n = docElem.firstChild().firstChild(); // <office:document-meta> ... <office:meta> ... content
87  while (!n.isNull()) {
88  QDomElement e = n.toElement();
89  if (!e.isNull()) {
90  const QString tagName = e.tagName();
91 
92  // Dublin Core
93  if( tagName == QLatin1String("dc:description") ) {
94  fileRes.addProperty( NAO::description(), e.text() );
95  }
96  else if( tagName == QLatin1String("dc:subject") ) {
97  fileRes.addProperty( NIE::subject(), e.text() );
98  }
99  else if( tagName == QLatin1String("dc:title") ) {
100  fileRes.setProperty( NIE::title(), e.text() );
101  }
102  else if( tagName == QLatin1String("dc:creator") ) {
103  SimpleResource creator;
104  creator.addType( NCO::Contact() );
105  creator.addProperty( NCO::fullname(), e.text() );
106  graph << creator;
107 
108  fileRes.setProperty( NCO::creator(), creator );
109  }
110  else if( tagName == QLatin1String("dc:langauge") ) {
111  fileRes.setProperty( NIE::language(), e.text() );
112  }
113 
114  // Meta Properties
115  else if( tagName == QLatin1String("meta:document-statistic")) {
116  bool ok = false;
117  int pageCount = e.attribute("meta:page-count").toInt(&ok);
118  if( ok ) {
119  fileRes.setProperty( NFO::pageCount(), pageCount );
120  }
121 
122  int wordCount = e.attribute("meta:word-count").toInt(&ok);
123  if( ok ) {
124  fileRes.setProperty( NFO::wordCount(), wordCount );
125  }
126  }
127  else if( tagName == QLatin1String("meta:keyword") ) {
128  QString keywords = e.text();
129  fileRes.addProperty( NIE::keyword(), keywords );
130  }
131  else if( tagName == QLatin1String("meta:generator") ) {
132  fileRes.addProperty( NIE::generator(), e.text() );
133  }
134  else if( tagName == QLatin1String("meta:creation-date") ) {
135  QDateTime dt = ExtractorPlugin::dateTimeFromString( e.text() );
136  if( !dt.isNull() )
137  fileRes.addProperty( NIE::contentCreated(), dt );
138  }
139  }
140  n = n.nextSibling();
141  }
142 
143  const KArchiveFile *contentsFile = static_cast<const KArchiveFile*>(directory->entry("content.xml"));
144  QXmlStreamReader xml( contentsFile->createDevice() );
145 
146  QString plainText;
147  QTextStream stream(&plainText);
148 
149  int size = 0;
150  while( !xml.atEnd() ) {
151  if( size >= maxPlainTextSize() )
152  break;
153 
154  xml.readNext();
155  if( xml.isCharacters() ) {
156  QString str = xml.text().toString();
157  stream << str;
158  size += str.size();
159 
160  if( !str.at(str.length()-1).isSpace() )
161  stream << QLatin1Char(' ');
162  }
163 
164  if( xml.hasError() || xml.isEndDocument() )
165  break;
166  }
167 
168  if( !plainText.isEmpty() )
169  fileRes.addProperty( NIE::plainTextContent(), plainText );
170 
171  if( fileRes.properties().isEmpty() )
172  return SimpleResourceGraph();
173 
174  graph << fileRes;
175  return graph;
176 }
177 
178 
179 NEPOMUK_EXPORT_EXTRACTOR( Nepomuk2::OdfExtractor, "nepomukodfextractor" )
Nepomuk2::SimpleResource::setProperty
void setProperty(const QUrl &property, const QVariant &value)
Set a property overwriting existing values.
Definition: simpleresource.cpp:186
Nepomuk2::ExtractorPlugin
The ExtractorPlugin is the base class for all file metadata extractors.
Definition: extractorplugin.h:60
Nepomuk2::SimpleResource
Represents a snapshot of one Nepomuk resource.
Definition: simpleresource.h:46
Nepomuk2::SimpleResource::properties
PropertyHash properties() const
Definition: simpleresource.cpp:155
QObject
Nepomuk2::SimpleResource::addProperty
void addProperty(const QUrl &property, const QVariant &value)
Add a property.
Definition: simpleresource.cpp:206
Nepomuk2::SimpleResourceGraph
Definition: simpleresourcegraph.h:48
Nepomuk2::OdfExtractor::mimetypes
virtual QStringList mimetypes()
Provide a list of mimetypes which are supported by this plugin.
Definition: odfextractor.cpp:44
Nepomuk2::OdfExtractor
Definition: odfextractor.h:28
NEPOMUK_EXPORT_EXTRACTOR
#define NEPOMUK_EXPORT_EXTRACTOR(classname, libname)
Export a Nepomuk file extractor.
Definition: extractorplugin.h:163
Nepomuk2::ExtractorPlugin::maxPlainTextSize
static int maxPlainTextSize()
Virtuoso does not support streaming operators, and does not accept queries above a certain size...
Definition: extractorplugin.cpp:124
Nepomuk2::ExtractorPlugin::dateTimeFromString
static QDateTime dateTimeFromString(const QString &dateString)
Tries to extract a valid date time from the string provided.
Definition: extractorplugin.cpp:59
Nepomuk2::SimpleResource::addType
void addType(const QUrl &type)
A convenience method which adds a property of type rdf:type.
Definition: simpleresource.cpp:257
Nepomuk2::OdfExtractor::extract
virtual SimpleResourceGraph extract(const QUrl &resUri, const QUrl &fileUrl, const QString &mimeType)
The main function of the plugin that is responsible for extracting the data from the file url and ret...
Definition: odfextractor.cpp:54
odfextractor.h
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:48:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Nepomuk-Core

Skip menu "Nepomuk-Core"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal