00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "oleinputstream.h"
00021 #include "textutils.h"
00022 #include "bufferedstream.h"
00023 #include <iostream>
00024 #include <set>
00025 using namespace Strigi;
00026 using namespace std;
00027
00028 namespace {
00029 class OleEntryStream;
00030 }
00031 class OleInputStream::Private {
00032 public:
00033 const char* data;
00034 std::vector<int32_t> batIndex;
00035 std::vector<int32_t> sbatIndex;
00036 std::vector<int32_t> sbatbIndex;
00037 std::set<int32_t> usedSBlocks;
00038 std::set<int32_t> usedBlocks;
00039 int32_t size;
00040 int32_t maxindex;
00041 int32_t maxsindex;
00042 int32_t currentTableBlock;
00043 int32_t currentTableIndex;
00044 int32_t currentDataBlock;
00045 int32_t currentStreamSize;
00046 OleEntryStream* const entrystream;
00047 OleInputStream* stream;
00048
00049 Private(OleInputStream* s, InputStream* input);
00050 ~Private();
00051 void readEntryInfo();
00052 int32_t nextBlock(int32_t);
00053 int32_t nextSmallBlock(int32_t);
00054 const char* getCurrentSmallBlock();
00055 InputStream* nextEntry();
00056 };
00057
00058 namespace {
00059 class OleEntryStream : public BufferedInputStream {
00060 public:
00061 OleInputStream::Private* const parent;
00062 int64_t done;
00063 int32_t blockoffset;
00064 int32_t blocksize;
00065
00066 OleEntryStream(OleInputStream::Private* f) :parent(f), blockoffset(0) {
00067 setMinBufSize(512);
00068 done = 0;
00069 blockoffset = 0;
00070 m_size = parent->currentStreamSize;
00071 }
00072 void reset() {
00073 resetBuffer();
00074 done = 0;
00075 blockoffset = 0;
00076 m_size = parent->currentStreamSize;
00077 blocksize = (m_size < 4096) ?64 : 512;
00078 }
00079 int32_t fillBuffer(char* start, int32_t space);
00080 };
00081 void
00082 printEntry(const char* d) {
00083 char type = d[0x42];
00084 string name;
00085 for (int i=0; i< d[0x40]; ++i) {
00086 name.append(d+2*i,1);
00087 }
00088 int32_t prevIndex = readLittleEndianInt32(d+0x44);
00089 int32_t nextIndex = readLittleEndianInt32(d+0x48);
00090 int32_t firstIndex = readLittleEndianInt32(d+0x4C);
00091 int32_t blockStart = readLittleEndianInt32(d+0x74);
00092 int32_t blockSize = readLittleEndianInt32(d+0x78);
00093 printf("entry %i %s: %i %i %i %i %i %i\n", d[0x40],name.c_str(), type, prevIndex, nextIndex, firstIndex,
00094 blockStart, blockSize);
00095 }
00096 }
00097 int32_t
00098 OleEntryStream::fillBuffer(char* start, int32_t space) {
00099 if (done == m_size) return -1;
00100
00101 int32_t n = space;
00102 int32_t avail = blocksize-blockoffset;
00103 if (avail > m_size-done) {
00104 avail = m_size-done;
00105 }
00106 if (n > avail) {
00107 n = avail;
00108 }
00109 const char* d;
00110 if (blocksize == 64) {
00111 d = parent->getCurrentSmallBlock();
00112 if (d == 0) {
00113 m_status = Error;
00114 fprintf(stderr, "error in small blocks\n");
00115 return -1;
00116 }
00117 } else {
00118 d = parent->data+(1+parent->currentDataBlock)*512;
00119 }
00120 if (d < parent->data || parent->data + parent->size < d + n) {
00121 m_status = Error;
00122 m_error = "Invalid OLE stream.";
00123 cerr << "not 0 < " << d-parent->data << " < " << m_size << " "
00124 << blocksize << endl;
00125 return -1;
00126 }
00127 memcpy(start, d+blockoffset, n);
00128 done += n;
00129 blockoffset += n;
00130 if (blockoffset == blocksize) {
00131 if (blocksize == 64) {
00132 parent->currentDataBlock
00133 = parent->nextSmallBlock(parent->currentDataBlock);
00134 } else {
00135 parent->currentDataBlock
00136 = parent->nextBlock(parent->currentDataBlock);
00137 }
00138 blockoffset = 0;
00139 if (parent->currentDataBlock < 0) {
00140 if (parent->currentDataBlock != -2 || done != m_size) {
00141 fprintf(stderr, "error: %i\n", parent->currentDataBlock);
00142 m_status = Error;
00143 n = -1;
00144 }
00145 }
00146 }
00147 return n;
00148 }
00149
00150 OleInputStream::OleInputStream(InputStream* input) :SubStreamProvider(input),
00151 p(new Private(this, input)) {
00152 }
00153 OleInputStream::Private::Private(OleInputStream* s, InputStream* input)
00154 :entrystream(new OleEntryStream(this)), stream(s) {
00155 currentTableBlock = -1;
00156
00157 size = input->read(data, 512, 512);
00158 if (size != 512) {
00159 stream->m_status = Error;
00160 stream->m_error = "File is too small.";
00161 return;
00162 }
00163 input->reset(0);
00164
00165 int32_t nBat = readLittleEndianInt32(data+0x2c);
00166 int32_t ptOffset = readLittleEndianInt32(data+0x30);
00167 int32_t sBatOffset = readLittleEndianInt32(data+0x3c);
00168 int32_t xBatOffset = readLittleEndianInt32(data+0x44);
00169 int32_t nXBat = readLittleEndianInt32(data+0x48);
00170 if (!checkHeader(data, size) || nBat < 0 || nBat > 128*nXBat+109
00171 || nXBat < 0) {
00172 stream->m_status = Error;
00173 stream->m_error = "Invalid OLE file.";
00174 return;
00175 }
00176 int32_t max = 0;
00177 batIndex.reserve(nBat);
00178 data += 76;
00179 for (int i = 0; i < ::min(109, nBat); ++i) {
00180 int32_t p = readLittleEndianInt32(data+4*i);
00181 batIndex.push_back(p);
00182 if (p > max) max = p;
00183 }
00184 if (ptOffset > max) max = ptOffset;
00185 if (128*(nBat-1) > max) max = 128*(nBat-1);
00186
00187 int32_t toread = (max+2)*512;
00188 if (input->size() >= 0 && input->size() < toread) {
00189 stream->m_status = Error;
00190 stream->m_error = "File is incomplete.";
00191 return;
00192 }
00193 toread = (input->size() > 0) ?input->size() :10000000;
00194 size = input->read(data, toread, toread);
00195 input->reset(0);
00196 if (size != input->size()) {
00197 stream->m_status = Error;
00198 stream->m_error
00199 = string("File cannot be read completely: ")+input->error();
00200 return;
00201 }
00202 maxindex = size/512-2;
00203
00204
00205 xBatOffset = 512 + 512 * xBatOffset;
00206 for (int j = 0; j < nXBat; ++j) {
00207 for (int i = 0; i<127 && (int)batIndex.size() < nBat; ++i) {
00208 int32_t p = readLittleEndianInt32(data + 4*i + xBatOffset);
00209 batIndex.push_back(p);
00210 }
00211 xBatOffset = 512+512*readLittleEndianInt32(data + 508 + xBatOffset);
00212 }
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227 while (sBatOffset >= 0 && sbatIndex.size() < 1000) {
00228 sbatIndex.push_back(sBatOffset);
00229
00230
00231
00232
00233
00234
00235
00236 sBatOffset = nextBlock(sBatOffset);
00237 }
00238
00239 sbatbIndex.reserve(sbatIndex.size()*16);
00240
00241 currentDataBlock = (1+ptOffset)*512 + 0x74;
00242 if (currentDataBlock + 4 > size) {
00243 stream->m_status = Error;
00244 stream->m_error = "Invalid header.";
00245 return;
00246 }
00247 currentDataBlock = readLittleEndianInt32(data + currentDataBlock);
00248 while (currentDataBlock >= 0 && sbatbIndex.size() < 16000) {
00249 sbatbIndex.push_back(currentDataBlock);
00250 currentDataBlock = nextBlock(currentDataBlock);
00251 }
00252 maxsindex = sbatbIndex.size()*8;
00253
00254 currentTableBlock = ptOffset;
00255 currentTableIndex = 0;
00256 }
00257 OleInputStream::~OleInputStream() {
00258 delete p;
00259 }
00260 OleInputStream::Private::~Private() {
00261 delete entrystream;
00262 }
00263 int32_t
00264 OleInputStream::Private::nextBlock(int32_t in) {
00265
00266 int32_t bid = in/128;
00267 if (bid < 0 || bid >= (int32_t)batIndex.size()) {
00268 fprintf(stderr, "error 5: input block out of range %i\n", in);
00269 return -4;
00270 }
00271 bid = batIndex[bid]+1;
00272 int32_t next = in%128*4;
00273 next += 512*bid;
00274 if (next < 0 || size - 4 < next) {
00275 fprintf(stderr, "error 3: output block out of range %i\n", next);
00276 return -4;
00277 }
00278 bid = next;
00279 next = readLittleEndianInt32(data+bid);
00280 bool error = next < -2 || next == -1 || next > maxindex
00281 || usedBlocks.count(next) > 0;
00282 if (error) {
00283 fprintf(stderr, "error 4: output block out of range %i\n", next);
00284 next = -4;
00285 } else if (next >= 0) {
00286
00287 usedBlocks.insert(next);
00288 }
00289 return next;
00290 }
00291 int32_t
00292 OleInputStream::Private::nextSmallBlock(int32_t in) {
00293
00294 int32_t bid = in/128;
00295 if (bid < 0 || bid >= (int32_t)sbatIndex.size()) {
00296 fprintf(stderr, "error 6: input block out of range %i\n", in);
00297 return -4;
00298 }
00299 bid = sbatIndex[bid]+1;
00300 int32_t next = in%128*4;
00301 next += 512*bid;
00302 if (next < 0 || size - 4 < next) {
00303 fprintf(stderr, "error 1: output block out of range %i\n", next);
00304 return -4;
00305 }
00306 next = readLittleEndianInt32(data+next);
00307 bool error = next < -2 || next == -1 || next > maxsindex
00308 || usedSBlocks.count(next) > 0;
00309 if (error) {
00310 fprintf(stderr, "error 2: output block out of range %i\n", next);
00311 next = -4;
00312 } else if (next >= 0) {
00313
00314 usedSBlocks.insert(next);
00315 }
00316 return next;
00317 }
00318 const char*
00319 OleInputStream::Private::getCurrentSmallBlock() {
00320 const char* d = data;
00321
00322 int32_t i = currentDataBlock/8;
00323 if (i < 0 || i >= (int32_t)sbatbIndex.size()) {
00324 return 0;
00325 }
00326 i = 512*(1+sbatbIndex[i]) + (currentDataBlock%8)*64;
00327 return (i > size-64) ?0 :d+i;
00328 }
00329 void
00330 OleInputStream::Private::readEntryInfo() {
00331 const char* d = data + (1+currentTableBlock)*512 + 128*currentTableIndex;
00332 char entryType = d[0x42];
00333 if (entryType != 2) {
00334 currentDataBlock = -1;
00335 return;
00336 }
00337 string name;
00338 int32_t namesize = d[0x40];
00339 if (namesize < 2) namesize = 2;
00340 if (namesize > 0x40) namesize = 0x40;
00341 namesize = namesize/2 - 1;
00342 name.resize(namesize);
00343 bool badname = false;
00344 for (int i=0; i < namesize; ++i) {
00345 badname = badname || d[2*i+1];
00346 name[i] = d[2*i];
00347 }
00348 if (badname) {
00349 name.assign("");
00350 }
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360 stream->m_entryinfo.filename.assign(name);
00361 currentDataBlock = readLittleEndianInt32(d+0x74);
00362 currentStreamSize = readLittleEndianInt32(d+0x78);
00363 stream->m_entryinfo.size = currentStreamSize;
00364 if (currentDataBlock > maxindex || currentStreamSize <= 0) {
00365 currentDataBlock = -1;
00366 }
00367 }
00368 InputStream*
00369 OleInputStream::nextEntry() {
00370 return p->nextEntry();
00371 }
00372 InputStream*
00373 OleInputStream::Private::nextEntry() {
00374 if (currentTableBlock < 0) return 0;
00375 do {
00376 if (++currentTableIndex == 4) {
00377 currentTableBlock = nextBlock(currentTableBlock);
00378 if (currentTableBlock < 0) {
00379 return 0;
00380 }
00381 currentTableIndex = 0;
00382 }
00383 readEntryInfo();
00384 } while (currentDataBlock < 0);
00385
00386
00387 entrystream->reset();
00388
00389 return entrystream;
00390 }
00391 bool
00392 OleInputStream::checkHeader(const char* data, int32_t datasize) {
00393 static const unsigned char ole_magic[] = {
00394 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
00395 return datasize > 8 && memcmp(data, ole_magic, 8) == 0;
00396 }