• Skip to content
  • Skip to link menu
KDE 4.4 API Reference
  • KDE API Reference
  • KDE Support
  • Sitemap
  • Contact Us
 

strigi/src/streams

oleinputstream.cpp

Go to the documentation of this file.
00001 /* This file is part of Strigi Desktop Search
00002  *
00003  * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Library General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Library General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Library General Public License
00016  * along with this library; see the file COPYING.LIB.  If not, write to
00017  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  */
00020 #include "oleinputstream.h"
00021 #include "textutils.h"
00022 #include "bufferedstream.h"
00023 #include <iostream>
00024 #include <set>
00025 using namespace Strigi;
00026 using namespace std;
00027 
00028 namespace {
00029 class OleEntryStream;
00030 }
00031 class OleInputStream::Private {
00032 public:
00033     const char* data;
00034     std::vector<int32_t> batIndex;
00035     std::vector<int32_t> sbatIndex;
00036     std::vector<int32_t> sbatbIndex;
00037     std::set<int32_t> usedSBlocks;
00038     std::set<int32_t> usedBlocks;
00039     int32_t size;
00040     int32_t maxindex;
00041     int32_t maxsindex;
00042     int32_t currentTableBlock;
00043     int32_t currentTableIndex;
00044     int32_t currentDataBlock;
00045     int32_t currentStreamSize;
00046     OleEntryStream* const entrystream;
00047     OleInputStream* stream;
00048 
00049     Private(OleInputStream* s, InputStream* input);
00050     ~Private();
00051     void readEntryInfo();
00052     int32_t nextBlock(int32_t);
00053     int32_t nextSmallBlock(int32_t);
00054     const char* getCurrentSmallBlock();
00055     InputStream* nextEntry();
00056 };
00057 
00058 namespace {
00059 class OleEntryStream : public BufferedInputStream {
00060 public:
00061     OleInputStream::Private* const parent;
00062     int64_t done;
00063     int32_t blockoffset;
00064     int32_t blocksize;
00065 
00066     OleEntryStream(OleInputStream::Private* f) :parent(f), blockoffset(0) {
00067         setMinBufSize(512);
00068         done = 0;
00069         blockoffset = 0;
00070         m_size = parent->currentStreamSize;
00071     }
00072     void reset() {
00073         resetBuffer();
00074         done = 0;
00075         blockoffset = 0;
00076         m_size = parent->currentStreamSize;
00077         blocksize = (m_size < 4096) ?64 : 512;
00078     }
00079     int32_t fillBuffer(char* start, int32_t space);
00080 };
00081 void
00082 printEntry(const char* d) {
00083     char type = d[0x42];
00084     string name;
00085     for (int i=0; i< d[0x40]; ++i) {
00086         name.append(d+2*i,1);
00087     }
00088     int32_t prevIndex = readLittleEndianInt32(d+0x44);
00089     int32_t nextIndex = readLittleEndianInt32(d+0x48);
00090     int32_t firstIndex = readLittleEndianInt32(d+0x4C);
00091     int32_t blockStart = readLittleEndianInt32(d+0x74);
00092     int32_t blockSize = readLittleEndianInt32(d+0x78);
00093     printf("entry %i %s: %i %i %i %i %i %i\n", d[0x40],name.c_str(), type, prevIndex, nextIndex, firstIndex,
00094         blockStart, blockSize);
00095 }
00096 }
00097 int32_t
00098 OleEntryStream::fillBuffer(char* start, int32_t space) {
00099     if (done == m_size) return -1;
00100 
00101     int32_t n = space;
00102     int32_t avail = blocksize-blockoffset;
00103     if (avail > m_size-done) {
00104         avail = m_size-done;
00105     }
00106     if (n > avail) {
00107         n = avail;
00108     }
00109     const char* d;
00110     if (blocksize == 64) {
00111         d = parent->getCurrentSmallBlock();
00112         if (d == 0) {
00113             m_status = Error;
00114             fprintf(stderr, "error in small blocks\n");
00115         return -1;
00116         }
00117     } else {
00118         d = parent->data+(1+parent->currentDataBlock)*512;
00119     }
00120     if (d < parent->data || parent->data + parent->size < d + n) {
00121         m_status = Error;
00122         m_error = "Invalid OLE stream.";
00123         cerr << "not 0 < " << d-parent->data << " < " << m_size << " "
00124             << blocksize << endl;
00125         return -1;
00126     }
00127     memcpy(start, d+blockoffset, n);
00128     done += n;
00129     blockoffset += n;
00130     if (blockoffset == blocksize) {
00131         if (blocksize == 64) {
00132             parent->currentDataBlock
00133                 = parent->nextSmallBlock(parent->currentDataBlock);
00134         } else {
00135             parent->currentDataBlock
00136                 = parent->nextBlock(parent->currentDataBlock);
00137         }
00138         blockoffset = 0;
00139         if (parent->currentDataBlock < 0) {
00140             if (parent->currentDataBlock != -2 || done != m_size) {
00141                 fprintf(stderr, "error: %i\n", parent->currentDataBlock);
00142                 m_status = Error;
00143                 n = -1;
00144             }
00145         }
00146     }
00147     return n;
00148 }
00149 
00150 OleInputStream::OleInputStream(InputStream* input) :SubStreamProvider(input),
00151     p(new Private(this, input)) {
00152 }
00153 OleInputStream::Private::Private(OleInputStream* s, InputStream* input)
00154         :entrystream(new OleEntryStream(this)), stream(s) {
00155     currentTableBlock = -1;
00156     // read start
00157     size = input->read(data, 512, 512);
00158     if (size != 512) {
00159         stream->m_status = Error;
00160         stream->m_error = "File is too small.";
00161         return;
00162     }
00163     input->reset(0);
00164 
00165     int32_t nBat = readLittleEndianInt32(data+0x2c);
00166     int32_t ptOffset = readLittleEndianInt32(data+0x30);
00167     int32_t sBatOffset = readLittleEndianInt32(data+0x3c);
00168     int32_t xBatOffset = readLittleEndianInt32(data+0x44);
00169     int32_t nXBat = readLittleEndianInt32(data+0x48);
00170     if (!checkHeader(data, size) || nBat < 0 || nBat > 128*nXBat+109
00171             || nXBat < 0) {
00172         stream->m_status = Error;
00173         stream->m_error = "Invalid OLE file.";
00174         return;
00175     }
00176     int32_t max = 0;
00177     batIndex.reserve(nBat);
00178     data += 76;
00179     for (int i = 0; i < ::min(109, nBat); ++i) {
00180         int32_t p = readLittleEndianInt32(data+4*i);
00181         batIndex.push_back(p);
00182         if (p > max) max = p;
00183     }
00184     if (ptOffset > max) max = ptOffset;
00185     if (128*(nBat-1) > max) max = 128*(nBat-1);
00186 
00187     int32_t toread = (max+2)*512;
00188     if (input->size() >= 0 && input->size() < toread) {
00189         stream->m_status = Error;
00190         stream->m_error = "File is incomplete.";
00191         return;
00192     }
00193     toread = (input->size() > 0) ?input->size() :10000000;
00194     size = input->read(data, toread, toread);
00195     input->reset(0);
00196     if (size != input->size()) {
00197         stream->m_status = Error;
00198         stream->m_error
00199             = string("File cannot be read completely: ")+input->error();
00200         return;
00201     }
00202     maxindex = size/512-2;
00203 
00204     // read any remaining BAT entries from XBAT blocks
00205     xBatOffset = 512 + 512 * xBatOffset;
00206     for (int j = 0; j < nXBat; ++j) {
00207         for (int i = 0; i<127 && (int)batIndex.size() < nBat; ++i) {
00208             int32_t p = readLittleEndianInt32(data + 4*i + xBatOffset);
00209             batIndex.push_back(p);
00210         }
00211         xBatOffset = 512+512*readLittleEndianInt32(data + 508 + xBatOffset);
00212     }
00213 
00214     // print all bat blocks
00215 /*    for (size_t i = 0; i<batIndex.size(); ++i) {
00216         const char* b = data+(1+batIndex[i])*512;
00217         fprintf(stderr, "%4.i %4.i\n", i, batIndex[i]);
00218         for (int j=0; j<128;++j) {
00219             int32_t p = readLittleEndianInt32(b+4*j);
00220             fprintf(stderr, "%4.i ", p);
00221             if (j%16 == 15) {fprintf(stderr, "\n");}
00222         }
00223     }*/
00224 
00225     // collect all sbat blocks
00226 //    fprintf(stderr, "sbat blocks\n");
00227     while (sBatOffset >= 0 && sbatIndex.size() < 1000) {
00228         sbatIndex.push_back(sBatOffset);
00229 /*        const char* b = data+(1+sBatOffset)*512;
00230         fprintf(stderr, "%4.i\n", sBatOffset);
00231         for (int j=0; j<128;++j) {
00232             int32_t p = readLittleEndianInt32(b+4*j);
00233             fprintf(stderr, "%4.i ", p);
00234             if (j%16 == 15) {fprintf(stderr, "\n");}
00235         }*/
00236         sBatOffset = nextBlock(sBatOffset);
00237     }
00238 
00239     sbatbIndex.reserve(sbatIndex.size()*16);
00240     // read the info for the root entry
00241     currentDataBlock = (1+ptOffset)*512 + 0x74;
00242     if (currentDataBlock + 4 > size) {
00243         stream->m_status = Error;
00244         stream->m_error = "Invalid header.";
00245         return;
00246     }
00247     currentDataBlock = readLittleEndianInt32(data + currentDataBlock);
00248     while (currentDataBlock >= 0 && sbatbIndex.size() < 16000) {
00249         sbatbIndex.push_back(currentDataBlock);
00250         currentDataBlock = nextBlock(currentDataBlock);
00251     }
00252     maxsindex = sbatbIndex.size()*8;
00253 
00254     currentTableBlock = ptOffset;
00255     currentTableIndex = 0;
00256 }
00257 OleInputStream::~OleInputStream() {
00258     delete p;
00259 }
00260 OleInputStream::Private::~Private() {
00261     delete entrystream;
00262 }
00263 int32_t
00264 OleInputStream::Private::nextBlock(int32_t in) {
00265     // get the number of the bat block we need
00266     int32_t bid = in/128;
00267     if (bid < 0 || bid >= (int32_t)batIndex.size()) {
00268         fprintf(stderr, "error 5: input block out of range %i\n", in);
00269         return -4;
00270     }
00271     bid = batIndex[bid]+1;
00272     int32_t next = in%128*4;
00273     next += 512*bid;
00274     if (next < 0 || size - 4 < next) {
00275         fprintf(stderr, "error 3: output block out of range %i\n", next);
00276         return -4;
00277     }
00278     bid = next;
00279     next = readLittleEndianInt32(data+bid);
00280     bool error = next < -2 || next == -1 || next > maxindex
00281         || usedBlocks.count(next) > 0;
00282     if (error) {
00283         fprintf(stderr, "error 4: output block out of range %i\n", next);
00284         next = -4;
00285     } else if (next >= 0) {
00286         // mark block as read
00287         usedBlocks.insert(next);
00288     }
00289     return next;
00290 }
00291 int32_t
00292 OleInputStream::Private::nextSmallBlock(int32_t in) {
00293     // get the number of the sbat block we need
00294     int32_t bid = in/128;
00295     if (bid < 0 || bid >= (int32_t)sbatIndex.size()) {
00296         fprintf(stderr, "error 6: input block out of range %i\n", in);
00297         return -4;
00298     }
00299     bid = sbatIndex[bid]+1;
00300     int32_t next = in%128*4;
00301     next += 512*bid;
00302     if (next < 0 || size - 4 < next) {
00303         fprintf(stderr, "error 1: output block out of range %i\n", next);
00304         return -4;
00305     }
00306     next = readLittleEndianInt32(data+next);
00307     bool error = next < -2 || next == -1 || next > maxsindex
00308         || usedSBlocks.count(next) > 0;
00309     if (error) {
00310         fprintf(stderr, "error 2: output block out of range %i\n", next);
00311         next = -4;
00312     } else if (next >= 0) {
00313         // mark block as read
00314         usedSBlocks.insert(next);
00315     }
00316     return next;
00317 }
00318 const char*
00319 OleInputStream::Private::getCurrentSmallBlock() {
00320     const char* d = data;
00321     // each block of 512 has 8 blocks of 64
00322     int32_t i = currentDataBlock/8;
00323     if (i < 0 || i >= (int32_t)sbatbIndex.size()) {
00324         return 0;
00325     }
00326     i = 512*(1+sbatbIndex[i]) + (currentDataBlock%8)*64;
00327     return (i > size-64) ?0 :d+i;
00328 }
00329 void
00330 OleInputStream::Private::readEntryInfo() {
00331     const char* d = data + (1+currentTableBlock)*512 + 128*currentTableIndex;
00332     char entryType = d[0x42];
00333     if (entryType != 2) {
00334         currentDataBlock = -1;
00335         return;
00336     }
00337     string name;
00338     int32_t namesize = d[0x40];
00339     if (namesize < 2) namesize = 2;
00340     if (namesize > 0x40) namesize = 0x40;
00341     namesize = namesize/2 - 1;
00342     name.resize(namesize);
00343     bool badname = false;
00344     for (int i=0; i < namesize; ++i) {
00345         badname = badname || d[2*i+1];
00346         name[i] = d[2*i];
00347     }
00348     if (badname) {
00349         name.assign("");
00350     }
00351     // only allow valid Utf8 names or names that start with the value 5
00352     // TODO: handle names that start with 0x1
00353 /*    if (namesize == 0 || (name[0] != 5 && !checkUtf8(name))) {
00354         fprintf(stderr, "Invalid entry name in OLE: '%s' of length %i\n",
00355         name.c_str(), namesize);
00356         currentDataBlock = -1;
00357         return;
00358     }*/
00359     
00360     stream->m_entryinfo.filename.assign(name);
00361     currentDataBlock = readLittleEndianInt32(d+0x74);
00362     currentStreamSize = readLittleEndianInt32(d+0x78);
00363     stream->m_entryinfo.size = currentStreamSize;
00364     if (currentDataBlock > maxindex || currentStreamSize <= 0) {
00365         currentDataBlock = -1;
00366     }
00367 }
00368 InputStream*
00369 OleInputStream::nextEntry() {
00370     return p->nextEntry();
00371 }
00372 InputStream*
00373 OleInputStream::Private::nextEntry() {
00374     if (currentTableBlock < 0) return 0;
00375     do {
00376         if (++currentTableIndex == 4) {
00377             currentTableBlock = nextBlock(currentTableBlock);
00378             if (currentTableBlock < 0) {
00379                 return 0;
00380             }
00381             currentTableIndex = 0;
00382         }
00383         readEntryInfo();
00384     } while (currentDataBlock < 0);
00385     //printEntry(data+(currentTableBlock+1)*512+128*currentTableIndex);
00386 
00387     entrystream->reset();
00388 
00389     return entrystream;
00390 }
00391 bool
00392 OleInputStream::checkHeader(const char* data, int32_t datasize) {
00393     static const unsigned char ole_magic[] = {
00394         0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
00395     return datasize > 8 && memcmp(data, ole_magic, 8) == 0;
00396 }

strigi/src/streams

Skip menu "strigi/src/streams"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members

KDE Support

Skip menu "KDE Support"
  • akonadi
  • Decibel
  • grantlee
  • kdewin
  • phonon
  •     Backend
  • polkit-qt
  • qca
  • qimageblitz
  • soprano
  • strigi
  •     searchclient
  •     streamanalyzer
  •     streams
Generated for KDE Support by doxygen 1.5.9-20090814
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal