00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 #ifdef HAVE_CONFIG_H
00058 # include "config.h"
00059 #endif
00060 #if NEED_GNUG_PRAGMAS
00061 # pragma implementation
00062 #endif
00063
00064 #include "UnicodeByteStream.h"
00065 #include "ByteStream.h"
00066
00067
00068 #ifdef HAVE_NAMESPACES
00069 namespace DJVU {
00070 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
00071 }
00072 #endif
00073 #endif
00074
00075 UnicodeByteStream::UnicodeByteStream(const UnicodeByteStream &uni)
00076 : bs(uni.bs), buffer(uni.buffer), bufferpos(uni.bufferpos), linesread(0)
00077 {
00078 startpos=bs->tell();
00079 }
00080
00081 UnicodeByteStream::UnicodeByteStream(
00082 GP<ByteStream> ibs,const GStringRep::EncodeType et)
00083 : bs(ibs), bufferpos(0), linesread(0)
00084 {
00085 buffer=GUTF8String::create(0,0,et);
00086 startpos=bs->tell();
00087 }
00088
00089 UnicodeByteStream::~UnicodeByteStream()
00090 {}
00091
00092 static int
00093 CountLines(const GUTF8String &str)
00094 {
00095 int retval=0;
00096 static const unsigned long lf='\n';
00097 for(int pos=0;(pos=str.search(lf,pos)+1)>0;++retval)
00098 EMPTY_LOOP;
00099 return retval;
00100 }
00101
00102 void
00103 UnicodeByteStream::set_encodetype(const GStringRep::EncodeType et)
00104 {
00105 seek(startpos,SEEK_SET);
00106 bufferpos=0;
00107 buffer=GUTF8String::create(0,0,et);
00108 }
00109
00110 void
00111 UnicodeByteStream::set_encoding(const GUTF8String &xencoding)
00112 {
00113 seek(startpos,SEEK_SET);
00114 bufferpos=0;
00115 buffer=GUTF8String::create(0,0,xencoding);
00116 }
00117
00118 size_t
00119 UnicodeByteStream::read(void *buf, size_t size)
00120 {
00121 bufferpos=0;
00122 const int retval=bs->read(buf,size);
00123 if(retval)
00124 {
00125 buffer=GUTF8String::create(
00126 (unsigned char const *)buf,retval,buffer.get_remainder());
00127 }else
00128 {
00129 buffer=GUTF8String::create(0,0,buffer.get_remainder());
00130 }
00131 return retval;
00132 }
00133
00134 size_t
00135 UnicodeByteStream::write(const void *buf, size_t size)
00136 {
00137 bufferpos=0;
00138 buffer=GUTF8String::create(0,0,buffer.get_remainder());
00139 return bs->write(buf,size);
00140 }
00141
00142 long
00143 UnicodeByteStream::tell(void) const
00144 {
00145 return bs->tell();
00146 }
00147
00148 UnicodeByteStream &
00149 UnicodeByteStream::operator=(UnicodeByteStream &uni)
00150 {
00151 bs=uni.bs;
00152 bufferpos=uni.bufferpos;
00153 buffer=uni.buffer;
00154 return *this;
00155 }
00156
00157 int
00158 UnicodeByteStream::seek
00159 (long offset, int whence, bool nothrow)
00160 {
00161 int retval=bs->seek(offset,whence,nothrow);
00162 bufferpos=0;
00163 buffer=GUTF8String::create(0,0,buffer.get_remainder());
00164 return retval;
00165 }
00166
00167 void
00168 UnicodeByteStream::flush(void)
00169 {
00170 bs->flush();
00171 bufferpos=0;
00172 buffer=GUTF8String::create(0,0,buffer.get_remainder());
00173 }
00174
00175
00176
00177 GUTF8String
00178 UnicodeByteStream::gets(
00179 size_t const t,unsigned long const stopat,bool const inclusive)
00180 {
00181 GUTF8String retval;
00182 unsigned int len=buffer.length()-bufferpos;
00183 if(!len)
00184 {
00185 int i;
00186 char *buf;
00187 static const size_t bufsize=327680;
00188 GPBuffer<char> gbuf(buf,bufsize);
00189 while((i=read(buf,bufsize)>0))
00190 {
00191 if((len=buffer.length()-bufferpos))
00192 break;
00193 }
00194 }
00195 if(len)
00196 {
00197 int i=buffer.search((char)stopat,bufferpos);
00198 if(i>=0)
00199 {
00200 if(inclusive)
00201 {
00202 ++i;
00203 }
00204 if(t&&(i>(int)t+bufferpos))
00205 {
00206 i=t+bufferpos;
00207 }
00208 if(i>bufferpos)
00209 {
00210 retval=buffer.substr(bufferpos,i-bufferpos);
00211 }
00212 bufferpos=i;
00213 linesread+=CountLines(retval);
00214 }else
00215 {
00216 retval=buffer.substr(bufferpos,len);
00217 bufferpos=buffer.length();
00218 linesread+=CountLines(retval);
00219 retval+=gets(t?(t-(i-bufferpos)):0,stopat,inclusive);
00220 }
00221 }
00222 return retval;
00223 }
00224
00225 XMLByteStream::XMLByteStream(UnicodeByteStream &uni)
00226 : UnicodeByteStream(uni) {}
00227
00228 XMLByteStream::XMLByteStream(GP<ByteStream> &ibs)
00229 : UnicodeByteStream(ibs,GStringRep::XOTHER)
00230 {}
00231
00232 GP<XMLByteStream>
00233 XMLByteStream::create(GP<ByteStream> ibs)
00234 {
00235 XMLByteStream *xml=new XMLByteStream(ibs);
00236 GP<XMLByteStream> retval=xml;
00237 xml->init();
00238 return retval;
00239 }
00240
00241 void
00242 XMLByteStream::init(void)
00243 {
00244 unsigned char buf[4];
00245 GP<ByteStream> ibs=bs;
00246 bufferpos=0;
00247 bs->readall(buf,sizeof(buf));
00248 const unsigned int i=(buf[0]<<8)+buf[1];
00249 switch(i)
00250 {
00251 case 0x0000:
00252 {
00253 const unsigned int j=(buf[2]<<8)+buf[3];
00254 switch(j)
00255 {
00256 case 0x003C:
00257 {
00258 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4BE);
00259 break;
00260 }
00261 case 0x3C00:
00262 {
00263 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4_2143);
00264 break;
00265 }
00266 case 0xFEFF:
00267 {
00268 buffer=GUTF8String::create(0,0,GStringRep::XUCS4BE);
00269 startpos+=sizeof(buf);
00270 break;
00271 }
00272 case 0xFFFE:
00273 {
00274 buffer=GUTF8String::create(0,0,GStringRep::XUCS4_2143);
00275 startpos+=sizeof(buf);
00276 break;
00277 }
00278 default:
00279 {
00280 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
00281 break;
00282 }
00283 }
00284 }
00285 case 0x003C:
00286 {
00287 const unsigned int j=(buf[2]<<8)+buf[3];
00288 switch(j)
00289 {
00290 case 0x0000:
00291 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4_3412);
00292 break;
00293 case 0x003F:
00294 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF16BE);
00295 break;
00296 default:
00297 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
00298 break;
00299 }
00300 break;
00301 }
00302 case 0x3C00:
00303 {
00304 const unsigned int j=(buf[2]<<8)+buf[3];
00305 switch(j)
00306 {
00307 case 0x0000:
00308 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUCS4LE);
00309 break;
00310 case 0x3F00:
00311 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF16LE);
00312 break;
00313 default:
00314 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
00315 break;
00316 }
00317 break;
00318 }
00319 case 0x4C6F:
00320 {
00321 const unsigned int j=(buf[2]<<8)+buf[3];
00322 buffer=GUTF8String::create(buf,sizeof(buf),
00323 (j == 0xA794)?(GStringRep::XEBCDIC):(GStringRep::XUTF8));
00324 break;
00325 }
00326 case 0xFFFE:
00327 {
00328 buffer=GUTF8String::create(buf+2,sizeof(buf)-2,GStringRep::XUTF16LE);
00329 startpos+=2;
00330 break;
00331 }
00332 case 0xFEFF:
00333 {
00334 buffer=GUTF8String::create(buf+2,sizeof(buf)-2,GStringRep::XUTF16BE);
00335 startpos+=2;
00336 break;
00337 }
00338 case 0xEFBB:
00339 {
00340 if(buf[2] == 0xBF)
00341 {
00342 buffer=GUTF8String::create(buf+3,sizeof(buf)-3,GStringRep::XUTF8);
00343 startpos+=3;
00344 }else
00345 {
00346 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
00347 }
00348 break;
00349 }
00350 case 0x3C3F:
00351 default:
00352 {
00353 buffer=GUTF8String::create(buf,sizeof(buf),GStringRep::XUTF8);
00354 }
00355 }
00356 bs=ibs;
00357 }
00358
00359 XMLByteStream::~XMLByteStream()
00360 {}
00361
00362
00363 #ifdef HAVE_NAMESPACES
00364 }
00365 # ifndef NOT_USING_DJVU_NAMESPACE
00366 using namespace DJVU;
00367 # endif
00368 #endif