strigi/src/streams
inputstreamreader.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef HAVE_CONFIG_H
00022 # include "config.h"
00023 #endif
00024
00025 #include "inputstreamreader.h"
00026 #include <strigi/strigiconfig.h>
00027 #include <cerrno>
00028
00029 #ifdef ICONV_SECOND_ARGUMENT_IS_CONST
00030 #define ICONV_CONST const
00031 #else
00032 #define ICONV_CONST
00033 #endif
00034
00035 using namespace Strigi;
00036
00037 InputStreamReader::InputStreamReader(InputStream* i, const char* enc) {
00038 m_status = Ok;
00039 finishedDecoding = false;
00040 input = i;
00041 if (enc == 0) enc = "UTF-8";
00042 #ifdef _LIBICONV_H
00043 if (sizeof(wchar_t) == 4) {
00044 converter = iconv_open("UCS-4-INTERNAL", enc);
00045 } if (sizeof(wchar_t) == 2) {
00046 converter = iconv_open("UCS-2-INTERNAL", enc);
00047 #else
00048 if (sizeof(wchar_t) > 1) {
00049 converter = iconv_open("WCHAR_T", enc);
00050 #endif
00051 } else {
00052 converter = iconv_open("ASCII", enc);
00053 }
00054
00055
00056 if (converter == (iconv_t) -1) {
00057 m_error = "conversion from '";
00058 m_error += enc;
00059 m_error += "' not available.";
00060 m_status = Error;
00061 return;
00062 }
00063 charbuf.setSize(262);
00064
00065 charsLeft = 0;
00066 }
00067 InputStreamReader::~InputStreamReader() {
00068 if (converter != (iconv_t) -1) {
00069 iconv_close(converter);
00070 }
00071 }
00072 int32_t
00073 InputStreamReader::decode(wchar_t* start, int32_t space) {
00074
00075 ICONV_CONST char *inbuf = charbuf.readPos;
00076 size_t inbytesleft = charbuf.avail;
00077 size_t outbytesleft = sizeof(wchar_t)*space;
00078 char *outbuf = (char*)start;
00079 size_t r = iconv(converter, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00080 int32_t nwritten;
00081 if (r == (size_t)-1) {
00082 switch (errno) {
00083 case EILSEQ:
00084 m_error = "Invalid multibyte sequence.";
00085 m_status = Error;
00086 return -1;
00087 case EINVAL:
00088
00089
00090 std::memmove(charbuf.start, inbuf, inbytesleft);
00091 charbuf.readPos = charbuf.start;
00092 charbuf.avail = inbytesleft;
00093 nwritten = ((wchar_t*)outbuf) - start;
00094 break;
00095 case E2BIG:
00096 charbuf.readPos += charbuf.avail - inbytesleft;
00097 charbuf.avail = inbytesleft;
00098 nwritten = space;
00099 break;
00100 default:
00101 char tmp[10];
00102 snprintf(tmp, 10, "%i", errno);
00103 m_error = "inputstreamreader error: ";
00104 m_error.append(tmp);
00105 fprintf(stderr, "inputstreamreader::error %d\n", errno);
00106 m_status = Error;
00107 return -1;
00108 }
00109 } else {
00110 charbuf.readPos = charbuf.start;
00111 charbuf.avail = 0;
00112 nwritten = ((wchar_t*)outbuf) - start;
00113 if (input == 0) {
00114 finishedDecoding = true;
00115 }
00116 }
00117 return nwritten;
00118 }
00119 int32_t
00120 InputStreamReader::fillBuffer(wchar_t* start, int32_t space) {
00121
00122 if (input && charbuf.readPos == charbuf.start) {
00123 const char *begin;
00124 int32_t numRead;
00125 numRead = input->read(begin, 1, charbuf.size - charbuf.avail);
00126
00127 if (numRead < -1) {
00128 m_error = input->error();
00129 m_status = Error;
00130 input = 0;
00131 return numRead;
00132 }
00133 if (numRead < 1) {
00134
00135 input = 0;
00136 if (charbuf.avail) {
00137 m_error = "stream ends on incomplete character";
00138 m_status = Error;
00139 }
00140 return -1;
00141 }
00142
00143 std::memmove(charbuf.start + charbuf.avail, begin, numRead);
00144 charbuf.avail = numRead + charbuf.avail;
00145 }
00146
00147 int32_t n = decode(start, space);
00148
00149 return n;
00150 }