strigi/src/streams
encodinginputstream.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifdef HAVE_CONFIG_H
00021 # include "config.h"
00022 #endif
00023
00024 #include "encodinginputstream.h"
00025 #include <iconv.h>
00026 #include <cerrno>
00027 using namespace Strigi;
00028
00029 #ifdef ICONV_SECOND_ARGUMENT_IS_CONST
00030 #define ICONV_CONST const
00031 #else
00032 #define ICONV_CONST
00033 #endif
00034
00035 class EncodingInputStream::Private {
00036 public:
00037 StreamBuffer<char> charbuf;
00038 EncodingInputStream* const p;
00039 InputStream* input;
00040 iconv_t converter;
00041 int32_t charsLeft;
00042 bool finishedEncoding;
00043
00044 Private(EncodingInputStream* eis, InputStream* i, const char* inenc,
00045 const char* outenc) :p(eis), input(i),
00046 converter((iconv_t)-1), charsLeft(0), finishedEncoding(false) {
00047 if (outenc == 0) {
00048 outenc = "UTF-8";
00049 }
00050 converter = iconv_open(outenc, inenc);
00051 }
00052 ~Private() {
00053 if (converter != (iconv_t) -1) {
00054 iconv_close(converter);
00055 }
00056 }
00057 int32_t decode(char* start, int32_t space);
00058 };
00059
00060 EncodingInputStream::EncodingInputStream(InputStream* s, const char* inenc,
00061 const char* outenc)
00062 :p(new Private(this, s, inenc, outenc)) {
00063 if (inenc == 0) {
00064 m_status = Error;
00065 m_error = "No input encoding provided.";
00066 return;
00067 }
00068 m_status = Ok;
00069
00070
00071 if (p->converter == (iconv_t) -1) {
00072 m_error = "conversion from '";
00073 m_error.append(inenc);
00074 m_error.append("' to '");
00075 m_error.append(outenc ? outenc : "(null)");
00076 m_error.append(" not available.");
00077 m_status = Error;
00078 return;
00079 }
00080 p->charbuf.setSize(262);
00081 p->charsLeft = 0;
00082 }
00083 EncodingInputStream::~EncodingInputStream() {
00084 delete p;
00085 }
00086 int32_t
00087 EncodingInputStream::Private::decode(char* start, int32_t space) {
00088
00089 ICONV_CONST char *inbuf = charbuf.readPos;
00090 size_t inbytesleft = charbuf.avail;
00091 size_t outbytesleft = space;
00092 char *outbuf = start;
00093 size_t r = iconv(converter, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00094 int32_t nwritten;
00095 if (r == (size_t)-1) {
00096 switch (errno) {
00097 case EILSEQ:
00098 p->m_error = "Invalid multibyte sequence.";
00099 p->m_status = Error;
00100 return -1;
00101 case EINVAL:
00102
00103
00104 std::memmove(charbuf.start, inbuf, inbytesleft);
00105 charbuf.readPos = charbuf.start;
00106 charbuf.avail = inbytesleft;
00107 nwritten = outbuf - start;
00108 break;
00109 case E2BIG:
00110 charbuf.readPos += charbuf.avail - inbytesleft;
00111 charbuf.avail = inbytesleft;
00112 nwritten = space;
00113 break;
00114 default:
00115 char tmp[10];
00116 snprintf(tmp, 10, "%i", errno);
00117 p->m_error = "inputstreamreader error: ";
00118 p->m_error.append(tmp);
00119 fprintf(stderr, "inputstreamreader::error %d\n", errno);
00120 p->m_status = Error;
00121 return -1;
00122 }
00123 } else {
00124 charbuf.readPos = charbuf.start;
00125 charbuf.avail = 0;
00126 nwritten = outbuf - start;
00127 if (input == 0) {
00128 finishedEncoding = true;
00129 }
00130 }
00131 return nwritten;
00132 }
00133 int32_t
00134 EncodingInputStream::fillBuffer(char* start, int32_t space) {
00135
00136 if (p->input && p->charbuf.readPos == p->charbuf.start) {
00137 const char *begin;
00138 int32_t numRead;
00139 numRead = p->input->read(begin, 1, p->charbuf.size - p->charbuf.avail);
00140
00141 if (numRead < -1) {
00142 m_error = p->input->error();
00143 m_status = Error;
00144 p->input = 0;
00145 return numRead;
00146 }
00147 if (numRead < 1) {
00148
00149 p->input = 0;
00150 if (p->charbuf.avail) {
00151 m_error = "stream ends on incomplete character";
00152 m_status = Error;
00153 }
00154 return -1;
00155 }
00156
00157 std::memmove(p->charbuf.start + p->charbuf.avail, begin, numRead);
00158 p->charbuf.avail = numRead + p->charbuf.avail;
00159 }
00160
00161 int32_t n = p->decode(start, space);
00162
00163 return n;
00164 }