• Skip to content
  • Skip to link menu
KDE 4.4 API Reference
  • KDE API Reference
  • KDE Support
  • Sitemap
  • Contact Us
 

strigi/src/streams

archivereader.cpp

Go to the documentation of this file.
00001 /* This file is part of Strigi Desktop Search
00002  *
00003  * Copyright (C) 2006,2009 Jos van den Oever <jos@vandenoever.info>
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Library General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Library General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Library General Public License
00016  * along with this library; see the file COPYING.LIB.  If not, write to
00017  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  */
00020 #include "archivereader.h"
00021 #include "sdfinputstream.h"
00022 #include "tarinputstream.h"
00023 #include "mailinputstream.h"
00024 #include "rpminputstream.h"
00025 #include "arinputstream.h"
00026 #include "zipinputstream.h"
00027 #include "archiveentrycache.h"
00028 #include "listinginprogress.h"
00029 #include <iostream>
00030 #include <set>
00031 
00032 using namespace std;
00033 using namespace Strigi;
00034 
00035 class ArchiveReader::DirLister::Private {
00036 private:
00037     int pos;
00038     vector<EntryInfo> entries;
00039     ListingInProgress* listing;
00040     const ArchiveEntryCache::SubEntry* entry;
00041     set<std::string> done;
00042 public:
00043     const std::string url;
00044     explicit Private(const vector<EntryInfo>& v, int p = 0)
00045         :pos(p), entries(v), listing(NULL), entry(NULL) {
00046     }
00047     explicit Private(ListingInProgress* l, const std::string& u)
00048             :pos(0), listing(l), entry(NULL), url(u) {
00049         listing->ref();
00050     }
00051     ~Private() {
00052         if (listing && listing->unref()) delete listing;
00053     }
00054     void operator=(const Private& a) {
00055         pos = a.pos;
00056         entries = a.entries;
00057         if (listing && listing->unref()) delete listing;
00058         listing = a.listing;
00059         if (listing) listing->ref();
00060         entry = a.entry;
00061         done = a.done;
00062     }
00063     bool
00064     nextEntry(EntryInfo& e) {
00065         if (pos < 0 || (listing == NULL && pos >= (int)entries.size())) {
00066             // there are no (more) valid entries
00067             pos = -1;
00068         } else if (listing == NULL) {
00069             // get the next entry from the static list
00070             e = entries[pos++];
00071         } else if (entry == NULL) {
00072             // acquire a dynamic list and get the first entry
00073             entry = listing->nextEntry(url);
00074             if (entry && entry->entries.size()) {
00075                 e = nextEntry();
00076                 pos = 1;
00077             } else {
00078                 pos = -1;
00079             }
00080         } else if (pos < (int)entry->entries.size()
00081                 || listing->nextEntry(entry)) {
00082             // e is now the next entry from the dynamic list
00083             e = nextEntry();
00084         } else {
00085             // there are no more entries
00086             pos = -1;
00087         }
00088         return pos != -1;
00089     }
00090 private:
00095     EntryInfo
00096     nextEntry() {
00097         // we can make this more efficient by sorting a set with the entries
00098         // that have already been returned. A set is faster than a vector for
00099         // lookups by value.
00100         ArchiveEntryCache::SubEntryMap::const_iterator i
00101             = entry->entries.begin();
00102         do {
00103             if (done.find(i->first) == done.end()) {
00104                 done.insert(i->first);
00105                 pos++;
00106                 return i->second->entry;
00107             }
00108         } while (++i != entry->entries.end());
00109         // this should never happen!
00110         return i->second->entry;
00111     }
00112 };
00113 
00114 ArchiveReader::DirLister::DirLister(Private* d) :p(d) {
00115     assert(d);
00116 }
00117 
00118 ArchiveReader::DirLister::DirLister(const DirLister& dl)
00119     :p(new Private(*dl.p)) {
00120 }
00121 
00122 ArchiveReader::DirLister::~DirLister() {
00123     delete p;
00124 }
00125 
00126 bool
00127 ArchiveReader::DirLister::nextEntry(EntryInfo& e) {
00128     return p->nextEntry(e);
00129 }
00130 const ArchiveReader::DirLister&
00131 ArchiveReader::DirLister::operator=(const DirLister& a) {
00132     *p = *a.p;
00133     return a;
00134 }
00135 
00137 class ArchiveReader::ArchiveReaderPrivate {
00138 public:
00140     typedef std::map<InputStream*, std::list<StreamPtr> > OpenstreamsType;
00141     OpenstreamsType openstreams;
00143     std::list<StreamOpener*> openers;
00145     ArchiveEntryCache cache;
00148     std::map<bool (*)(const char*, int32_t),
00149         SubStreamProvider* (*)(InputStream*)> subs;
00154     std::map<std::string, ListingInProgress*> listingsInProgress;
00155 
00156     std::vector<size_t> cullName(const std::string& url,
00157         InputStream*& stream) const;
00158     SubStreamProvider* positionedProvider(const std::string& url);
00159     InputStream* open(const std::string& url) const;
00169     int localStat(const std::string& url, EntryInfo& e);
00170     ArchiveReaderPrivate();
00171     ~ArchiveReaderPrivate();
00172     ListingInProgress* findListingInProgress(const string& url) const;
00173 };
00174 ArchiveReader::ArchiveReaderPrivate::ArchiveReaderPrivate() {
00175     typedef std::pair<bool (*)(const char*, int32_t),
00176         SubStreamProvider* (*)(InputStream*)> SubsPair;
00177 
00178     subs.insert(SubsPair(MailInputStream::checkHeader,
00179         MailInputStream::factory));
00180     subs.insert(SubsPair(RpmInputStream::checkHeader,
00181         RpmInputStream::factory));
00182     subs.insert(SubsPair(ArInputStream::checkHeader,
00183         ArInputStream::factory));
00184     subs.insert(SubsPair(ZipInputStream::checkHeader,
00185         ZipInputStream::factory));
00186     subs.insert(SubsPair(TarInputStream::checkHeader,
00187         TarInputStream::factory));
00188     subs.insert(SubsPair(SdfInputStream::checkHeader,
00189         SdfInputStream::factory));
00190 }
00191 ArchiveReader::ArchiveReaderPrivate::~ArchiveReaderPrivate() {
00192     if (openstreams.size() > 0) {
00193         cerr << openstreams.size() << " streams were not closed." << endl;
00194         OpenstreamsType::iterator i;
00195         for (i = openstreams.begin(); i != openstreams.end(); ++i) {
00196             free(i->second);
00197         }
00198     }
00199     map<string, ListingInProgress*>::const_iterator end
00200         = listingsInProgress.end();
00201     for (map<string, ListingInProgress*>::const_iterator i
00202             = listingsInProgress.begin(); i != end; ++i) {
00203         if (i->second->unref()) delete i->second;
00204     }
00205 }
00212 vector<size_t>
00213 ArchiveReader::ArchiveReaderPrivate::cullName(const string& url,
00214         InputStream*& stream) const {
00215     vector<size_t> partpos;
00216     size_t p = url.rfind('/');
00217     stream = open(url);
00218     while (p != string::npos && p != 0 && !stream) {
00219         stream = open(url.substr(0, p));
00220         partpos.push_back(p+1);
00221         p = url.rfind('/', p-1);
00222     }
00223     return partpos;
00224 }
00230 SubStreamProvider*
00231 ArchiveReader::ArchiveReaderPrivate::positionedProvider(const string& url) {
00232     InputStream* stream = 0;
00233 
00234     // cull the url until a stream can be opened
00235     vector<size_t> partpos = cullName(url, stream);
00236     if (!stream) {
00237         return 0;
00238     }
00239 
00240     // open the substreams until the complete path has been opened
00241     SubStreamProvider* provider;
00242     InputStream* substream = stream;
00243     vector<size_t>::reverse_iterator i;
00244     list<StreamPtr> streams;
00245     streams.push_back(stream);
00246     for (i = partpos.rbegin(); i != partpos.rend(); ++i) {
00247         // try to open the stream as a SubStreamProvider
00248         provider = subStreamProvider(subs, substream, streams);
00249         if (provider == 0) {
00250             free(streams);
00251             return 0;
00252         }
00253         // let sn point to the trailing part of the url
00254         const char* sn = url.c_str() + *i;
00255         size_t len = url.length();
00256         bool nextstream = false;
00257         // try to open the first substream of the current SubStreamProvider
00258         substream = provider->currentEntry();
00259         do {
00260             const EntryInfo& e = provider->entryInfo();
00261             // check that the filename matches at least one entry
00262             if (e.type == EntryInfo::File
00263                     && e.filename.length() < len
00264                     && strncmp(e.filename.c_str(), sn,
00265                            e.filename.length()) == 0) {
00266                 nextstream = true;
00267                 // skip the number of entries that are matched
00268                 uint end = *i + e.filename.length();
00269                 do {
00270                     ++i;
00271                 } while (i != partpos.rend() && *i < end);
00272                 if (i == partpos.rend()) {
00273                     // success!
00274                     openstreams[substream] = streams;
00275                     return provider;
00276                 }
00277                 // no match: rewind
00278                 --i;
00279             } else {
00280                 substream = provider->nextEntry();
00281             }
00282         } while(substream && !nextstream);
00283     }
00284     if (substream) {
00285         openstreams[substream] = streams;
00286     } else {
00287         free(streams);
00288     }
00289     return 0;
00290 }
00294 InputStream*
00295 ArchiveReader::ArchiveReaderPrivate::open(const string& url) const {
00296     InputStream* stream = 0;
00297     list<StreamOpener*>::const_iterator i;
00298     for (i = openers.begin(); i != openers.end() && stream == 0; ++i) {
00299         stream = (*i)->openStream(url);
00300     }
00301     return stream;
00302 }
00303 int
00304 ArchiveReader::ArchiveReaderPrivate::localStat(const std::string& url,
00305         EntryInfo& e) {
00306     // try with the supplied streamOpeners
00307     list<StreamOpener*>::const_iterator i;
00308     for (i = openers.begin(); i != openers.end(); ++i) {
00309         if ((*i)->stat(url, e) == 0) {
00310             if (!(e.type & EntryInfo::File)) {
00311                 return 0;
00312             }
00313 
00314             // check if a ListingInProgress points to this url
00315             map<string, ListingInProgress*>::const_iterator li =
00316                 listingsInProgress.find(url);
00317             if (li != listingsInProgress.end()) {
00318                 // use the information in this entry
00319                 e = li->second->root->entry;
00320                 return 0;
00321             }
00322  
00323             // check if this file is in the cache
00324             map<string, ArchiveEntryCache::RootSubEntry*>::const_iterator se
00325                 = cache.cache.find(url);
00326             if (se != cache.cache.end()) {
00327                 if (se->second->entry.mtime == e.mtime) {
00328                     e.type = se->second->entry.type;
00329                     return 0;
00330                 }
00331                 // the file has changed: it is removed from the cache
00332                 ArchiveEntryCache::RootSubEntry* rse = se->second;
00333                 cache.cache.erase(se->second->entry.filename);
00334                 delete rse;
00335             }
00336 
00337             // The file exists, but is it an archive?
00338             InputStream* s = (*i)->openStream(url);
00339             list<StreamPtr> streams;
00340             SubStreamProvider* provider = subStreamProvider(subs, s, streams);
00341             if (provider) {
00342                 // this file contains substreams
00343                 e.type = (EntryInfo::Type)(EntryInfo::Dir|EntryInfo::File);
00344                 free(streams);
00345 /*
00346                 // create an empty entry in the cache
00347                 ArchiveEntryCache::RootSubEntry* rse = cache.cache[url];
00348                 if (rse == NULL) {
00349                     rse = new ArchiveEntryCache::RootSubEntry();
00350                     cache.cache[url] = rse;
00351                 }
00352                 rse->indexed = false;
00353                 rse->entry = e;*/
00354             }
00355             delete s;
00356             return 0;
00357         }
00358     }
00359     return -1;
00360 }
00361 ArchiveReader::ArchiveReader() :p(new ArchiveReaderPrivate()) {
00362 }
00363 ArchiveReader::~ArchiveReader() {
00364     delete p;
00365 }
00366 int
00367 ArchiveReader::stat(const std::string& url, EntryInfo& e) {
00368     // try to stat the url as a physical file
00369     if (p->localStat(url, e) == 0) return 0;
00370 
00371     // check the cache (this assumes dirEntries was already called)
00372     const ArchiveEntryCache::SubEntry* subentry = p->cache.findEntry(url);
00373     if (subentry) {
00374         e = subentry->entry;
00375         return 0;
00376     }
00377     // try reading the entries from the collection to which this file belongs
00378     size_t pos = url.rfind('/');
00379     if (pos == string::npos) return -1;
00380     std::string parenturl(url, 0, pos);
00381     ArchiveReader::DirLister dirlister(dirEntries(parenturl));
00382     while (dirlister.nextEntry(e)) {
00383         if (e.filename == url.c_str()+pos+1) {
00384             return 0;
00385         }
00386     }
00387     return -1;
00388 }
00389 InputStream*
00390 ArchiveReader::openStream(const string& url) {
00391     InputStream* stream = p->open(url);
00392     if (stream) return stream;
00393 
00394     // open the substreams until the complete path has been opened
00395     SubStreamProvider* provider = p->positionedProvider(url);
00396     if (provider) {
00397         stream = provider->currentEntry();
00398     }
00399     return stream;
00400 }
00401 void
00402 ArchiveReader::addStreamOpener(StreamOpener* opener) {
00403     p->openers.push_back(opener);
00404 }
00405 void
00406 ArchiveReader::closeStream(InputStream* s) {
00407     ArchiveReaderPrivate::OpenstreamsType::iterator i(
00408         p->openstreams.find(s));
00409     if (i == p->openstreams.end()) {
00410         delete s;
00411         return;
00412     }
00413     free(i->second);
00414     p->openstreams.erase(i);
00415 }
00416 bool
00417 ArchiveReader::isArchive(const std::string& url) {
00418     EntryInfo e;
00419     if (p->localStat(url, e) != 0) {
00420         return false;
00421     }
00422     return ((e.type & (EntryInfo::File | EntryInfo::Dir)) != 0);
00423 }
00424 std::vector<EntryInfo>
00425 convert(const ArchiveEntryCache::SubEntry* entry) {
00426     std::vector<EntryInfo> v;
00427     if (entry == NULL) return v;
00428     ArchiveEntryCache::SubEntryMap::const_iterator i;
00429     for (i = entry->entries.begin(); i != entry->entries.end(); ++i) {
00430         v.push_back(i->second->entry);
00431     }
00432     return v;
00433 }
00434 ArchiveReader::DirLister
00435 ArchiveReader::dirEntries(const std::string& url) {
00436     // find the entry in the cache
00437     const ArchiveEntryCache::SubEntry* subentry = p->cache.findEntry(url);
00438 
00439     // look for a ListingInProgress
00440     ListingInProgress* lip = NULL;
00441     if (subentry == NULL) {
00442         lip = p->findListingInProgress(url);
00443     }
00444     std::vector<EntryInfo> v;
00445     if (subentry == NULL && lip == NULL) {
00446         // this entry is not in the cache, we try to open it
00447         InputStream* s = 0;
00448         vector<size_t> l = p->cullName(url, s);
00449         // no entries were found: we return an empty dirlister
00450         // we have no other way of signaling failure
00451         // the caller should have checked with stat if the entry is valid
00452         if (!s) return DirLister(new DirLister::Private(v));
00453 
00454         string name(url);
00455         if (l.size()) {
00456             // let name be the name of physical file
00457             name.resize(l[l.size()-1]-1);
00458         }
00459         EntryInfo e;
00460         // get the properties of the physical file
00461         p->localStat(name, e);
00462         lip = new ListingInProgress(p->subs, e, name, s);
00463         lip->ref();
00464         p->listingsInProgress[name] = lip;
00465     }
00466 
00467     if (lip) {
00468         if (lip->isDone()) {
00469             p->cache.cache[lip->url] = lip->root;
00470             lip->root = 0;
00471             p->listingsInProgress.erase(lip->url);
00472             if (lip->unref()) delete lip;
00473         } else {
00474             return DirLister(new DirLister::Private(lip, url));
00475         }
00476     }
00477 
00478     if (subentry == NULL) {
00479         subentry = p->cache.findEntry(url);
00480     }
00481     if (subentry) {
00482         v = convert(subentry);
00483     }
00484     return DirLister(new DirLister::Private(v));
00485 }
00486 bool
00487 ArchiveReader::canHandle(const std::string& url) {
00488     // remove parts from the back of url until url matches a physical file
00489     // return true of the physical file is an archive, i.e. contains
00490     // substreams
00491     size_t pos = url.rfind('/');
00492     EntryInfo e;
00493     int r = p->localStat(url, e);
00494     while (pos != string::npos && pos != 0 && r == -1) {
00495         r = p->localStat(url.substr(0, pos), e);
00496         pos = url.rfind('/', pos-1);
00497     }
00498     return r == 0 && e.type & EntryInfo::File && e.type & EntryInfo::Dir;
00499 }
00500 ListingInProgress*
00501 ArchiveReader::ArchiveReaderPrivate::findListingInProgress(const string& url)
00502         const {
00503     string n(url);
00504     size_t p = n.size();
00505     do {
00506         map<string, ListingInProgress*>::const_iterator i
00507             = listingsInProgress.find(n);
00508         if (i != listingsInProgress.end()) {
00509             // the root entry is in the map - we are done
00510             return i->second;
00511         }
00512         // remove the last element in the path, and look for that
00513         p = n.rfind('/');
00514         if (p != string::npos) {
00515             n.resize(p);
00516         }
00517     } while (p != string::npos);
00518     // couldn't find it
00519     return 0;
00520 }

strigi/src/streams

Skip menu "strigi/src/streams"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members

KDE Support

Skip menu "KDE Support"
  • akonadi
  • Decibel
  • grantlee
  • kdewin
  • phonon
  •     Backend
  • polkit-qt
  • qca
  • qimageblitz
  • soprano
  • strigi
  •     searchclient
  •     streamanalyzer
  •     streams
Generated for KDE Support by doxygen 1.5.9-20090814
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal