00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef HAVE_CONFIG_H
00022 # include "config.h"
00023 #endif
00024
00025
00026 #include "mailinputstream.h"
00027 #include "subinputstream.h"
00028 #include "stringterminatedsubstream.h"
00029 #include "base64inputstream.h"
00030 #include "iconv.h"
00031 #include <cstring>
00032 #include <sstream>
00033 #include <iostream>
00034
00035 #ifdef HAVE_STRINGS_H
00036 #include <strings.h>
00037 #endif
00038
00039 #ifdef ICONV_SECOND_ARGUMENT_IS_CONST
00040 #define ICONV_CONST const
00041 #else
00042 #define ICONV_CONST
00043 #endif
00044
00045 using namespace std;
00046 using namespace Strigi;
00047
00048 char
00049 decodeHex(char h) {
00050 if (h >= 'A' && h <= 'F') return 10+h-'A';
00051 if (h >= 'a' && h <= 'f') return 10+h-'a';
00052 return h - '0';
00053 }
00054
00055 class Decoder {
00056 private:
00057 char* buffer;
00058 size_t bufferlen;
00059 map<string, iconv_t> iconvs;
00060 public:
00061 Decoder() :buffer(0), bufferlen(0) {}
00062 ~Decoder() {
00063 free(buffer);
00064 map<string, iconv_t>::const_iterator i;
00065 for (i = iconvs.begin(); i != iconvs.end(); ++i) {
00066 iconv_close(i->second);
00067 }
00068 }
00069 void decode(const string& enc, string& data);
00070 };
00071 void
00072 Decoder::decode(const string& enc, string& data) {
00073 iconv_t conv;
00074 if (iconvs.find(enc) == iconvs.end()) {
00075 conv = iconvs[enc] = iconv_open("UTF-8", enc.c_str());
00076 } else {
00077 conv = iconvs[enc];
00078 }
00079 if (conv == (iconv_t)-1) return;
00080 ICONV_CONST char* in = (char*)data.c_str();
00081 size_t ilen = data.length();
00082 size_t olen = 4*ilen;
00083 if (olen > bufferlen) {
00084 bufferlen = olen;
00085 buffer = (char*)realloc(buffer, bufferlen);
00086 }
00087 if (olen > 0) {
00088 char* out = buffer;
00089 char* mem = out;
00090 size_t r = iconv(conv, &in, &ilen, &out, &olen);
00091 if (r != (size_t)-1) {
00092 data.assign(mem, out-mem);
00093 }
00094 }
00095 }
00096
00097 class QuotedPrintableDecoder {
00098 private:
00099 string decoded;
00100 public:
00101 string& decodeQuotedPrintable(const char* v, uint32_t len);
00102 };
00103 class HeaderDecoder : public QuotedPrintableDecoder, Decoder {
00104 private:
00105 string decoded;
00106 public:
00107 const string& decodedHeaderValue(const char* v, uint32_t len);
00108 };
00109
00110 string&
00111 QuotedPrintableDecoder::decodeQuotedPrintable(const char* v, uint32_t len) {
00112 if (decoded.size() < len) {
00113 decoded.reserve(len);
00114 }
00115 decoded.resize(0);
00116 const char* pos = v;
00117 const char* end = v + len;
00118 char c;
00119 while (v < end) {
00120 if (*v == '=' && end - v > 2 && isxdigit(v[1]) && isxdigit(v[2])) {
00121 decoded.append(pos, v - pos);
00122 c = decodeHex(v[1])*16 + decodeHex(v[2]);
00123 decoded.append(&c, 1);
00124 pos = v = v + 3;
00125 } else if (*v == '_') {
00126 decoded.append(pos, v - pos);
00127 decoded.append(" ");
00128 pos = v = v + 1;
00129 } else {
00130 v++;
00131 }
00132 }
00133 if (pos < end) {
00134 decoded.append(pos, end-pos);
00135 }
00136 return decoded;
00137 }
00141 const string&
00142 HeaderDecoder::decodedHeaderValue(const char* v, uint32_t len) {
00143 if (decoded.size() < len) {
00144 decoded.reserve(len*2);
00145 }
00146 decoded.resize(0);
00147 const char* s = v;
00148 const char* p = v;
00149 const char* e = s + len;
00150 while (s < e) {
00151 if (e-s > 8 && *s == '=' && s[1] == '?') {
00152
00153 const char *q1, *q2, *end;
00154 q1 = s+2;
00155 while (q1 < e && *q1 != '?') q1++;
00156 q2 = q1+1;
00157 while (q2 < e && *q2 != '?') q2++;
00158 end = q2+1;
00159 while (end < e && *end != '?') end++;
00160 if (e - end < 1 || end[1] != '=') {
00161 s++;
00162 continue;
00163 }
00164
00165 decoded.append(p, s-p);
00166 s += 2;
00167 q1++;
00168 q2++;
00169
00170 if (*q1 == 'b' || *q1 == 'B') {
00171 string str(Base64InputStream::decode(q2, end-q2));
00172 if (strncasecmp("utf-8", s, 5)) {
00173 string encoding(s, q1-s-1);
00174 decode(encoding, str);
00175 }
00176 decoded.append(str);
00177 } else if (*q1 == 'q' || *q1 =='Q') {
00178 string& str(decodeQuotedPrintable(q2, end-q2));
00179 if (strncasecmp("utf-8", s, 5) != 0) {
00180 string encoding(s, q1-s-1);
00181 decode(encoding, str);
00182 }
00183 decoded.append(str);
00184 } else {
00185 s -= 1;
00186 }
00187
00188
00189 s = p = end + 2;
00190 } else if (e-s > 3 && s[0] == 13 && s[1] == 10
00191 && (s[2] == 9 || s[2] == 32)) {
00192
00193 decoded.append(p, s-p);
00194 s = p = s + 4;
00195 } else {
00196 s++;
00197 }
00198 }
00199 if (p < e) {
00200 decoded.append(p, e-p);
00201 }
00202 return decoded;
00203 }
00204 bool
00205 checkHeaderKey(const char* data, int32_t left) {
00206 if (left >= 9 && strncasecmp("Received:", data, 9) == 0) {
00207 return true;
00208 }
00209 if (left >= 5 && strncasecmp("From:", data, 5) == 0) {
00210 return true;
00211 }
00212 return false;
00213 }
00214
00220 bool
00221 MailInputStream::checkHeader(const char* data, int32_t datasize) {
00222
00223
00224
00225 int linecount = 1;
00226 bool key = true;
00227 bool slashr = false;
00228 int32_t pos = 0;
00229 bool reqheader = checkHeaderKey(data, datasize);
00230 char prevc = 0;
00231 while (pos < datasize) {
00232 unsigned char c = data[pos++];
00233 if (slashr) {
00234 slashr = false;
00235 if (c == '\n') {
00236 if (!reqheader) {
00237 reqheader = checkHeaderKey(data+pos, datasize-pos);
00238 }
00239 continue;
00240 }
00241 }
00242 if (key) {
00243 if (c == ':' || (isblank(c) && isspace(prevc))) {
00244
00245
00246 key = false;
00247 } else if ((c == '\n' || c == '\r') && reqheader && linecount >= 5
00248 && (prevc == '\n' || prevc == '\r')) {
00249
00250
00251 return true;
00252 } else if (c != '-' && c != '.' && c != '_' && !isalnum(c)
00253 && c != '#') {
00254
00255 return false;
00256 }
00257 } else {
00258
00259 if (c == '\n' || c == '\r') {
00260
00261 key = true;
00262 linecount++;
00263
00264 if (c == '\r') {
00265 slashr = true;
00266 } else if (!reqheader) {
00267 reqheader = checkHeaderKey(data+pos, datasize-pos);
00268 }
00269 }
00270 }
00271 prevc = c;
00272 }
00273 return reqheader && linecount >= 5;
00274 }
00275 class MailInputStream::Private {
00276 public:
00277 MailInputStream* const m;
00278 int64_t nextLineStartPosition;
00279
00280 int32_t entrynumber;
00281 int maxlinesize;
00282 const char* linestart;
00283 const char* lineend;
00284
00285 StringTerminatedSubStream* substream;
00286 std::string m_contenttransferencoding;
00287 std::string m_contentdisposition;
00288
00289 std::stack<std::string> boundary;
00290
00291 HeaderDecoder decoder;
00292
00293 void readHeaderLine();
00294 void readHeader();
00295 void scanBody();
00296 void handleHeaderLine();
00297 bool handleBodyLine();
00298 bool lineIsEndOfBlock();
00299 bool checkHeaderLine() const;
00300 void clearHeaders();
00301 void ensureFileName();
00302 std::string value(const char* n, const std::string& headerline) const;
00303
00304 Private(MailInputStream* mail);
00305 ~Private();
00306 };
00307 MailInputStream::Private::Private(MailInputStream* mail) :m(mail) {
00308 substream = 0;
00309 entrynumber = 0;
00310 nextLineStartPosition = 0;
00311 }
00312 MailInputStream::Private::~Private() {
00313 if (substream && substream != m->m_entrystream) {
00314 delete substream;
00315 }
00316 }
00317 MailInputStream::MailInputStream(InputStream* input)
00318 : SubStreamProvider(input), p(new Private(this)) {
00319
00320 p->readHeader();
00321 if (m_status != Ok) {
00322 fprintf(stderr, "no valid header\n");
00323 return;
00324 }
00325 }
00326 MailInputStream::~MailInputStream() {
00327 delete p;
00328 }
00337 void
00338 MailInputStream::Private::readHeaderLine() {
00339
00340 char state = 0;
00341 int32_t nread;
00342 int32_t linepos = 0;
00343 bool completeLine = false;
00344 char c = 0;
00345
00346 m->m_input->reset(nextLineStartPosition);
00347 do {
00348 nread = m->m_input->read(linestart, linepos+1, maxlinesize);
00349 if (nread < linepos+1) {
00350 completeLine = true;
00351 lineend = linestart + nread;
00352 m->m_status = Eof;
00353 return;
00354 }
00355 m->m_input->reset(nextLineStartPosition);
00356 if (m->m_input->status() == Error) {
00357 m->m_status = Error;
00358 m->m_error = m->m_input->error();
00359 return;
00360 } else if (linepos >= maxlinesize) {
00361
00362 m->m_status = Error;
00363 ostringstream out;
00364 out << "mail header line is too long: more than " << linepos
00365 << " bytes.";
00366 m->m_error = out.str();
00367 return;
00368 } else {
00369 while (linepos < nread) {
00370 c = linestart[linepos];
00371 if (state == 0) {
00372 if (c == '\r') {
00373 state = 1;
00374 } else if (c == '\n') {
00375 state = 2;
00376 }
00377 } else if (state == 1) {
00378 if (c == '\n') {
00379 state = 3;
00380 } else if (c == '\r' || !isspace(c)) {
00381 completeLine = true;
00382 lineend = linestart + linepos - 1;
00383 break;
00384 } else {
00385 state = 0;
00386 }
00387 } else if (state == 2) {
00388 if (c == '\n' || !isspace(c)) {
00389 completeLine = true;
00390 lineend = linestart + linepos - 1;
00391 break;
00392 } else {
00393 state = 0;
00394 }
00395 } else {
00396 if (c == '\r' || linepos == 2 || !isspace(c)) {
00397 completeLine = true;
00398 lineend = linestart + linepos - 2;
00399 break;
00400 } else {
00401 state = 0;
00402 }
00403 }
00404 linepos++;
00405 }
00406 }
00407 } while (!completeLine);
00408 nextLineStartPosition += linepos;
00409 }
00410 string
00411 MailInputStream::Private::value(const char* n, const string& headerline) const {
00412 size_t nl = strlen(n);
00413 string value;
00414
00415 const char* hl = headerline.c_str();
00416 const char* v = strcasestr(hl, n);
00417 if (v == 0) {
00418
00419 return value;
00420 }
00421 v += nl;
00422 v += strspn(v, "= \n\r");
00423 const char* vend = strchr(v, ';');
00424 if (vend == 0) {
00425 vend = hl + headerline.length();
00426 }
00427 if (*v == '"' && vend-v > 2) {
00428 value.assign(v+1, vend-v-2);
00429 } else {
00430 value.assign(v, vend-v);
00431 }
00432 return value;
00433 }
00434 void
00435 MailInputStream::Private::readHeader() {
00436 maxlinesize = 1024*1024;
00437
00438 readHeaderLine();
00439 while (m->m_status == Ok && linestart != lineend) {
00440 handleHeaderLine();
00441 readHeaderLine();
00442 }
00443 }
00448 void
00449 MailInputStream::Private::scanBody() {
00450 while (m->m_status == Ok) {
00451 readHeaderLine();
00452 int32_t len = lineend - linestart;
00453 if (len > 2 && strncmp("--", linestart, 2) == 0) {
00454 int32_t blen = boundary.top().length();
00455 if (len == blen + 4 && strncmp(linestart + 2 + blen, "--", 2) == 0
00456 && strncmp(linestart + 2, boundary.top().c_str(), blen)
00457 == 0) {
00458
00459 boundary.pop();
00460 if (boundary.size() == 0) {
00461 m->m_status = Eof;
00462 }
00463 } else if (len == blen + 2
00464 && strncmp(linestart + 2, boundary.top().c_str(), blen)
00465 == 0) {
00466 if (handleBodyLine()) {
00467 break;
00468 }
00469 }
00470 }
00471 }
00472 }
00473 void
00474 MailInputStream::Private::handleHeaderLine() {
00475 static const char* subject = "Subject:";
00476 static const char* contenttype = "Content-Type:";
00477 static const char* to = "To:";
00478 static const char* from = "From:";
00479 static const char* cc = "Cc:";
00480 static const char* bcc = "Bcc:";
00481 static const char* messageid = "Message-ID:";
00482 static const char* inreplyto = "In-Reply-To:";
00483 static const char* references = "References:";
00484 static const char* contenttransferencoding = "Content-Transfer-Encoding:";
00485 static const char* contentdisposition = "Content-Disposition:";
00486 int32_t len = lineend - linestart;
00487 if (len < 2) return;
00488 if (len < 8) {
00489 return;
00490 } else if (strncasecmp(linestart, subject, 8) == 0) {
00491 int32_t offset = 8;
00492 while (offset < len && isspace(linestart[offset])) offset++;
00493 m->m_subject = decoder.decodedHeaderValue(linestart+offset, len-offset);
00494 } else if (strncasecmp(linestart, to, 3) == 0) {
00495 int32_t offset = 3;
00496
00497 while (offset < len && isspace(linestart[offset])) offset++;
00498 m->m_to = decoder.decodedHeaderValue(linestart+offset, len-offset);
00499 } else if (strncasecmp(linestart, from, 5) == 0) {
00500 int32_t offset = 5;
00501 while (offset < len && isspace(linestart[offset])) offset++;
00502 m->m_from = decoder.decodedHeaderValue(linestart+offset, len-offset);
00503 } else if (strncasecmp(linestart, cc, 3) == 0) {
00504 int32_t offset = 3;
00505 while (offset < len && isspace(linestart[offset])) offset++;
00506 m->m_cc = decoder.decodedHeaderValue(linestart+offset, len-offset);
00507 } else if (strncasecmp(linestart, bcc, 4) == 0) {
00508 int32_t offset = 4;
00509 while (offset < len && isspace(linestart[offset])) offset++;
00510 m->m_bcc = decoder.decodedHeaderValue(linestart+offset, len-offset);
00511 } else if (strncasecmp(linestart, messageid, 11) == 0) {
00512 int32_t offset = 11;
00513 while (offset < len && isspace(linestart[offset])) offset++;
00514 m->m_messageid = decoder.decodedHeaderValue(linestart+offset, len-offset);
00515 } else if (strncasecmp(linestart, inreplyto, 12) == 0) {
00516 int32_t offset = 12;
00517 while (offset < len && isspace(linestart[offset])) offset++;
00518 m->m_inreplyto = decoder.decodedHeaderValue(linestart+offset, len-offset);
00519 } else if (strncasecmp(linestart, references, 11) == 0) {
00520 int32_t offset = 11;
00521 while (offset < len && isspace(linestart[offset])) offset++;
00522 m->m_references = decoder.decodedHeaderValue(linestart+offset, len-offset);
00523 } else if (strncasecmp(linestart, contenttype, 13) == 0) {
00524 int32_t offset = 13;
00525 while (offset < len && isspace(linestart[offset])) offset++;
00526 m->m_contenttype = std::string(linestart+offset, len-offset);
00527
00528 string b = value("boundary", m->m_contenttype);
00529 if (b.size()) {
00530 boundary.push(b);
00531 }
00532 } else if (strncasecmp(linestart, contenttransferencoding, 26) == 0) {
00533 m_contenttransferencoding = std::string(linestart, len);
00534 } else if (strncasecmp(linestart, contentdisposition, 20) == 0) {
00535 m_contentdisposition = std::string(linestart, len);
00536 }
00537 }
00538 bool
00539 MailInputStream::Private::checkHeaderLine() const {
00540 assert(lineend - linestart >= 0);
00541 bool validheader = linestart < lineend;
00542 if (validheader) {
00543 const char* colpos = linestart;
00544 while (*colpos != ':' && ++colpos != lineend) {}
00545 validheader = colpos != lineend || isblank(*linestart);
00546 }
00547 return validheader;
00548 }
00552 bool
00553 MailInputStream::Private::handleBodyLine() {
00554 clearHeaders();
00555
00556
00557
00558 bool validheader;
00559 size_t n = boundary.size();
00560 do {
00561 readHeaderLine();
00562 validheader = m->m_status == Ok && checkHeaderLine();
00563 if (validheader) {
00564 handleHeaderLine();
00565 }
00566 } while (m->m_status == Ok && validheader);
00567 if (boundary.size() > n) {
00568 return false;
00569 }
00570 readHeaderLine();
00571 if (m->m_status != Ok) {
00572 return false;
00573 }
00574
00575
00576 m->m_entryinfo.filename = value("filename", m_contentdisposition);
00577 if (m->m_entryinfo.filename.length() == 0) {
00578 m->m_entryinfo.filename = value("name", m->m_contenttype);
00579 }
00580
00581
00582 substream = new StringTerminatedSubStream(m->m_input, "--"+boundary.top());
00583
00584 if (strcasestr(m_contenttransferencoding.c_str(), "base64")) {
00585 m->m_entrystream = new Base64InputStream(substream);
00586 } else {
00587 m->m_entrystream = substream;
00588 }
00589 return true;
00590 }
00596 void
00597 MailInputStream::Private::ensureFileName() {
00598 entrynumber++;
00599 if (m->m_entryinfo.filename.length() == 0) {
00600 ostringstream o;
00601 o << entrynumber;
00602 m->m_entryinfo.filename = o.str();
00603 }
00604 m->m_entryinfo.type = EntryInfo::File;
00605 }
00606 InputStream*
00607 MailInputStream::nextEntry() {
00608 if (m_status != Ok) return 0;
00609
00610
00611 if (p->boundary.size() == 0) {
00612
00613 m_status = Eof;
00614 m_entrystream = new SubInputStream(m_input);
00615 m_entryinfo.filename = "body";
00616 return m_entrystream;
00617 }
00618
00619 if (p->substream) {
00620 const char* dummy;
00621 while (p->substream->status() == Ok) {
00622 p->substream->read(dummy, 1, 0);
00623 }
00624 if (p->substream->status() == Error) {
00625 m_status = Error;
00626 } else {
00627 p->nextLineStartPosition = p->substream->offset()
00628 + p->substream->size();
00629 }
00630 if (p->substream && p->substream != m_entrystream) {
00631 delete p->substream;
00632 }
00633 p->substream = 0;
00634 delete m_entrystream;
00635 m_entrystream = 0;
00636
00637 if (m_status != Ok) {
00638 return 0;
00639 }
00640 }
00641 p->scanBody();
00642
00643 if (m_entrystream == 0) {
00644 m_status = Eof;
00645 }
00646 p->ensureFileName();
00647 return m_entrystream;
00648 }
00649 void
00650 MailInputStream::Private::clearHeaders() {
00651 m->m_contenttype.resize(0);
00652 m_contenttransferencoding.resize(0);
00653 m_contentdisposition.resize(0);
00654 }