00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "archivereader.h"
00021 #include "sdfinputstream.h"
00022 #include "tarinputstream.h"
00023 #include "mailinputstream.h"
00024 #include "rpminputstream.h"
00025 #include "arinputstream.h"
00026 #include "zipinputstream.h"
00027 #include "archiveentrycache.h"
00028 #include "listinginprogress.h"
00029 #include <iostream>
00030 #include <set>
00031
00032 using namespace std;
00033 using namespace Strigi;
00034
00035 class ArchiveReader::DirLister::Private {
00036 private:
00037 int pos;
00038 vector<EntryInfo> entries;
00039 ListingInProgress* listing;
00040 const ArchiveEntryCache::SubEntry* entry;
00041 set<std::string> done;
00042 public:
00043 const std::string url;
00044 explicit Private(const vector<EntryInfo>& v, int p = 0)
00045 :pos(p), entries(v), listing(NULL), entry(NULL) {
00046 }
00047 explicit Private(ListingInProgress* l, const std::string& u)
00048 :pos(0), listing(l), entry(NULL), url(u) {
00049 listing->ref();
00050 }
00051 ~Private() {
00052 if (listing && listing->unref()) delete listing;
00053 }
00054 void operator=(const Private& a) {
00055 pos = a.pos;
00056 entries = a.entries;
00057 if (listing && listing->unref()) delete listing;
00058 listing = a.listing;
00059 if (listing) listing->ref();
00060 entry = a.entry;
00061 done = a.done;
00062 }
00063 bool
00064 nextEntry(EntryInfo& e) {
00065 if (pos < 0 || (listing == NULL && pos >= (int)entries.size())) {
00066
00067 pos = -1;
00068 } else if (listing == NULL) {
00069
00070 e = entries[pos++];
00071 } else if (entry == NULL) {
00072
00073 entry = listing->nextEntry(url);
00074 if (entry && entry->entries.size()) {
00075 e = nextEntry();
00076 pos = 1;
00077 } else {
00078 pos = -1;
00079 }
00080 } else if (pos < (int)entry->entries.size()
00081 || listing->nextEntry(entry)) {
00082
00083 e = nextEntry();
00084 } else {
00085
00086 pos = -1;
00087 }
00088 return pos != -1;
00089 }
00090 private:
00095 EntryInfo
00096 nextEntry() {
00097
00098
00099
00100 ArchiveEntryCache::SubEntryMap::const_iterator i
00101 = entry->entries.begin();
00102 do {
00103 if (done.find(i->first) == done.end()) {
00104 done.insert(i->first);
00105 pos++;
00106 return i->second->entry;
00107 }
00108 } while (++i != entry->entries.end());
00109
00110 return i->second->entry;
00111 }
00112 };
00113
00114 ArchiveReader::DirLister::DirLister(Private* d) :p(d) {
00115 assert(d);
00116 }
00117
00118 ArchiveReader::DirLister::DirLister(const DirLister& dl)
00119 :p(new Private(*dl.p)) {
00120 }
00121
00122 ArchiveReader::DirLister::~DirLister() {
00123 delete p;
00124 }
00125
00126 bool
00127 ArchiveReader::DirLister::nextEntry(EntryInfo& e) {
00128 return p->nextEntry(e);
00129 }
00130 const ArchiveReader::DirLister&
00131 ArchiveReader::DirLister::operator=(const DirLister& a) {
00132 *p = *a.p;
00133 return a;
00134 }
00135
00137 class ArchiveReader::ArchiveReaderPrivate {
00138 public:
00140 typedef std::map<InputStream*, std::list<StreamPtr> > OpenstreamsType;
00141 OpenstreamsType openstreams;
00143 std::list<StreamOpener*> openers;
00145 ArchiveEntryCache cache;
00148 std::map<bool (*)(const char*, int32_t),
00149 SubStreamProvider* (*)(InputStream*)> subs;
00154 std::map<std::string, ListingInProgress*> listingsInProgress;
00155
00156 std::vector<size_t> cullName(const std::string& url,
00157 InputStream*& stream) const;
00158 SubStreamProvider* positionedProvider(const std::string& url);
00159 InputStream* open(const std::string& url) const;
00169 int localStat(const std::string& url, EntryInfo& e);
00170 ArchiveReaderPrivate();
00171 ~ArchiveReaderPrivate();
00172 ListingInProgress* findListingInProgress(const string& url) const;
00173 };
00174 ArchiveReader::ArchiveReaderPrivate::ArchiveReaderPrivate() {
00175 typedef std::pair<bool (*)(const char*, int32_t),
00176 SubStreamProvider* (*)(InputStream*)> SubsPair;
00177
00178 subs.insert(SubsPair(MailInputStream::checkHeader,
00179 MailInputStream::factory));
00180 subs.insert(SubsPair(RpmInputStream::checkHeader,
00181 RpmInputStream::factory));
00182 subs.insert(SubsPair(ArInputStream::checkHeader,
00183 ArInputStream::factory));
00184 subs.insert(SubsPair(ZipInputStream::checkHeader,
00185 ZipInputStream::factory));
00186 subs.insert(SubsPair(TarInputStream::checkHeader,
00187 TarInputStream::factory));
00188 subs.insert(SubsPair(SdfInputStream::checkHeader,
00189 SdfInputStream::factory));
00190 }
00191 ArchiveReader::ArchiveReaderPrivate::~ArchiveReaderPrivate() {
00192 if (openstreams.size() > 0) {
00193 cerr << openstreams.size() << " streams were not closed." << endl;
00194 OpenstreamsType::iterator i;
00195 for (i = openstreams.begin(); i != openstreams.end(); ++i) {
00196 free(i->second);
00197 }
00198 }
00199 map<string, ListingInProgress*>::const_iterator end
00200 = listingsInProgress.end();
00201 for (map<string, ListingInProgress*>::const_iterator i
00202 = listingsInProgress.begin(); i != end; ++i) {
00203 if (i->second->unref()) delete i->second;
00204 }
00205 }
00212 vector<size_t>
00213 ArchiveReader::ArchiveReaderPrivate::cullName(const string& url,
00214 InputStream*& stream) const {
00215 vector<size_t> partpos;
00216 size_t p = url.rfind('/');
00217 stream = open(url);
00218 while (p != string::npos && p != 0 && !stream) {
00219 stream = open(url.substr(0, p));
00220 partpos.push_back(p+1);
00221 p = url.rfind('/', p-1);
00222 }
00223 return partpos;
00224 }
00230 SubStreamProvider*
00231 ArchiveReader::ArchiveReaderPrivate::positionedProvider(const string& url) {
00232 InputStream* stream = 0;
00233
00234
00235 vector<size_t> partpos = cullName(url, stream);
00236 if (!stream) {
00237 return 0;
00238 }
00239
00240
00241 SubStreamProvider* provider;
00242 InputStream* substream = stream;
00243 vector<size_t>::reverse_iterator i;
00244 list<StreamPtr> streams;
00245 streams.push_back(stream);
00246 for (i = partpos.rbegin(); i != partpos.rend(); ++i) {
00247
00248 provider = subStreamProvider(subs, substream, streams);
00249 if (provider == 0) {
00250 free(streams);
00251 return 0;
00252 }
00253
00254 const char* sn = url.c_str() + *i;
00255 size_t len = url.length();
00256 bool nextstream = false;
00257
00258 substream = provider->currentEntry();
00259 do {
00260 const EntryInfo& e = provider->entryInfo();
00261
00262 if (e.type == EntryInfo::File
00263 && e.filename.length() < len
00264 && strncmp(e.filename.c_str(), sn,
00265 e.filename.length()) == 0) {
00266 nextstream = true;
00267
00268 uint end = *i + e.filename.length();
00269 do {
00270 ++i;
00271 } while (i != partpos.rend() && *i < end);
00272 if (i == partpos.rend()) {
00273
00274 openstreams[substream] = streams;
00275 return provider;
00276 }
00277
00278 --i;
00279 } else {
00280 substream = provider->nextEntry();
00281 }
00282 } while(substream && !nextstream);
00283 }
00284 if (substream) {
00285 openstreams[substream] = streams;
00286 } else {
00287 free(streams);
00288 }
00289 return 0;
00290 }
00294 InputStream*
00295 ArchiveReader::ArchiveReaderPrivate::open(const string& url) const {
00296 InputStream* stream = 0;
00297 list<StreamOpener*>::const_iterator i;
00298 for (i = openers.begin(); i != openers.end() && stream == 0; ++i) {
00299 stream = (*i)->openStream(url);
00300 }
00301 return stream;
00302 }
00303 int
00304 ArchiveReader::ArchiveReaderPrivate::localStat(const std::string& url,
00305 EntryInfo& e) {
00306
00307 list<StreamOpener*>::const_iterator i;
00308 for (i = openers.begin(); i != openers.end(); ++i) {
00309 if ((*i)->stat(url, e) == 0) {
00310 if (!(e.type & EntryInfo::File)) {
00311 return 0;
00312 }
00313
00314
00315 map<string, ListingInProgress*>::const_iterator li =
00316 listingsInProgress.find(url);
00317 if (li != listingsInProgress.end()) {
00318
00319 e = li->second->root->entry;
00320 return 0;
00321 }
00322
00323
00324 map<string, ArchiveEntryCache::RootSubEntry*>::const_iterator se
00325 = cache.cache.find(url);
00326 if (se != cache.cache.end()) {
00327 if (se->second->entry.mtime == e.mtime) {
00328 e.type = se->second->entry.type;
00329 return 0;
00330 }
00331
00332 ArchiveEntryCache::RootSubEntry* rse = se->second;
00333 cache.cache.erase(se->second->entry.filename);
00334 delete rse;
00335 }
00336
00337
00338 InputStream* s = (*i)->openStream(url);
00339 list<StreamPtr> streams;
00340 SubStreamProvider* provider = subStreamProvider(subs, s, streams);
00341 if (provider) {
00342
00343 e.type = (EntryInfo::Type)(EntryInfo::Dir|EntryInfo::File);
00344 free(streams);
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354 }
00355 delete s;
00356 return 0;
00357 }
00358 }
00359 return -1;
00360 }
00361 ArchiveReader::ArchiveReader() :p(new ArchiveReaderPrivate()) {
00362 }
00363 ArchiveReader::~ArchiveReader() {
00364 delete p;
00365 }
00366 int
00367 ArchiveReader::stat(const std::string& url, EntryInfo& e) {
00368
00369 if (p->localStat(url, e) == 0) return 0;
00370
00371
00372 const ArchiveEntryCache::SubEntry* subentry = p->cache.findEntry(url);
00373 if (subentry) {
00374 e = subentry->entry;
00375 return 0;
00376 }
00377
00378 size_t pos = url.rfind('/');
00379 if (pos == string::npos) return -1;
00380 std::string parenturl(url, 0, pos);
00381 ArchiveReader::DirLister dirlister(dirEntries(parenturl));
00382 while (dirlister.nextEntry(e)) {
00383 if (e.filename == url.c_str()+pos+1) {
00384 return 0;
00385 }
00386 }
00387 return -1;
00388 }
00389 InputStream*
00390 ArchiveReader::openStream(const string& url) {
00391 InputStream* stream = p->open(url);
00392 if (stream) return stream;
00393
00394
00395 SubStreamProvider* provider = p->positionedProvider(url);
00396 if (provider) {
00397 stream = provider->currentEntry();
00398 }
00399 return stream;
00400 }
00401 void
00402 ArchiveReader::addStreamOpener(StreamOpener* opener) {
00403 p->openers.push_back(opener);
00404 }
00405 void
00406 ArchiveReader::closeStream(InputStream* s) {
00407 ArchiveReaderPrivate::OpenstreamsType::iterator i(
00408 p->openstreams.find(s));
00409 if (i == p->openstreams.end()) {
00410 delete s;
00411 return;
00412 }
00413 free(i->second);
00414 p->openstreams.erase(i);
00415 }
00416 bool
00417 ArchiveReader::isArchive(const std::string& url) {
00418 EntryInfo e;
00419 if (p->localStat(url, e) != 0) {
00420 return false;
00421 }
00422 return ((e.type & (EntryInfo::File | EntryInfo::Dir)) != 0);
00423 }
00424 std::vector<EntryInfo>
00425 convert(const ArchiveEntryCache::SubEntry* entry) {
00426 std::vector<EntryInfo> v;
00427 if (entry == NULL) return v;
00428 ArchiveEntryCache::SubEntryMap::const_iterator i;
00429 for (i = entry->entries.begin(); i != entry->entries.end(); ++i) {
00430 v.push_back(i->second->entry);
00431 }
00432 return v;
00433 }
00434 ArchiveReader::DirLister
00435 ArchiveReader::dirEntries(const std::string& url) {
00436
00437 const ArchiveEntryCache::SubEntry* subentry = p->cache.findEntry(url);
00438
00439
00440 ListingInProgress* lip = NULL;
00441 if (subentry == NULL) {
00442 lip = p->findListingInProgress(url);
00443 }
00444 std::vector<EntryInfo> v;
00445 if (subentry == NULL && lip == NULL) {
00446
00447 InputStream* s = 0;
00448 vector<size_t> l = p->cullName(url, s);
00449
00450
00451
00452 if (!s) return DirLister(new DirLister::Private(v));
00453
00454 string name(url);
00455 if (l.size()) {
00456
00457 name.resize(l[l.size()-1]-1);
00458 }
00459 EntryInfo e;
00460
00461 p->localStat(name, e);
00462 lip = new ListingInProgress(p->subs, e, name, s);
00463 lip->ref();
00464 p->listingsInProgress[name] = lip;
00465 }
00466
00467 if (lip) {
00468 if (lip->isDone()) {
00469 p->cache.cache[lip->url] = lip->root;
00470 lip->root = 0;
00471 p->listingsInProgress.erase(lip->url);
00472 if (lip->unref()) delete lip;
00473 } else {
00474 return DirLister(new DirLister::Private(lip, url));
00475 }
00476 }
00477
00478 if (subentry == NULL) {
00479 subentry = p->cache.findEntry(url);
00480 }
00481 if (subentry) {
00482 v = convert(subentry);
00483 }
00484 return DirLister(new DirLister::Private(v));
00485 }
00486 bool
00487 ArchiveReader::canHandle(const std::string& url) {
00488
00489
00490
00491 size_t pos = url.rfind('/');
00492 EntryInfo e;
00493 int r = p->localStat(url, e);
00494 while (pos != string::npos && pos != 0 && r == -1) {
00495 r = p->localStat(url.substr(0, pos), e);
00496 pos = url.rfind('/', pos-1);
00497 }
00498 return r == 0 && e.type & EntryInfo::File && e.type & EntryInfo::Dir;
00499 }
00500 ListingInProgress*
00501 ArchiveReader::ArchiveReaderPrivate::findListingInProgress(const string& url)
00502 const {
00503 string n(url);
00504 size_t p = n.size();
00505 do {
00506 map<string, ListingInProgress*>::const_iterator i
00507 = listingsInProgress.find(n);
00508 if (i != listingsInProgress.end()) {
00509
00510 return i->second;
00511 }
00512
00513 p = n.rfind('/');
00514 if (p != string::npos) {
00515 n.resize(p);
00516 }
00517 } while (p != string::npos);
00518
00519 return 0;
00520 }