00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066 #ifdef HAVE_CONFIG_H
00067 # include "config.h"
00068 #endif
00069 #if NEED_GNUG_PRAGMAS
00070 # pragma implementation
00071 #endif
00072
00073 #include "GString.h"
00074 #include "GThreads.h"
00075 #include "debug.h"
00076
00077 #include <stdlib.h>
00078 #include <stdio.h>
00079 #include <string.h>
00080 #if HAS_WCHAR
00081 # include <locale.h>
00082 # if !defined(AUTOCONF) || HAVE_WCHAR_H
00083 # include <wchar.h>
00084 # endif
00085 # if HAS_WCTYPE
00086 # include <wctype.h>
00087 # endif
00088 #endif
00089 #include <ctype.h>
00090
00091 #ifndef DO_CHANGELOCALE
00092 #define DO_CHANGELOCALE 1
00093 #ifdef UNIX
00094 #if THREADMODEL != COTHREADS
00095 #if THREADMODEL != NOTHREADS
00096 #undef DO_CHANGELOCALE
00097 #define DO_CHANGELOCALE 0
00098 #endif
00099 #endif
00100 #endif
00101 #endif
00102
00103
00104 #ifdef HAVE_NAMESPACES
00105 namespace DJVU {
00106 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
00107 }
00108 #endif
00109 #endif
00110
00111
00112 GBaseString::~GBaseString() {}
00113 GNativeString::~GNativeString() {}
00114 GUTF8String::~GUTF8String() {}
00115
00116 #if !HAS_MBSTATE && HAS_WCHAR
00117
00118
00119
00120
00121
00122 #define wcrtomb MYwcrtomb
00123 #define mbrtowc MYmbrtowc
00124 #define mbrlen MYmbrlen
00125
00126 static inline int
00127 wcrtomb(char *bytes,wchar_t w,mbstate_t *)
00128 {
00129 return wctomb(bytes,w);
00130 }
00131
00132 static inline int
00133 mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *)
00134 {
00135 return mbtowc(w,source,n);
00136 }
00137
00138 static inline size_t
00139 mbrlen(const char *s, size_t n, mbstate_t *)
00140 {
00141 return mblen(s,n);
00142 }
00143 #endif // !HAS_MBSTATE || HAS_WCHAR
00144
00145
00146 GP<GStringRep>
00147 GStringRep::upcase(void) const
00148 { return tocase(giswupper,gtowupper); }
00149
00150 GP<GStringRep>
00151 GStringRep::downcase(void) const
00152 { return tocase(giswlower,gtowlower); }
00153
00154 GP<GStringRep>
00155 GStringRep::UTF8::create(const unsigned int sz)
00156 {
00157 return GStringRep::create(sz,(GStringRep::UTF8 *)0);
00158 }
00159
00160 GP<GStringRep>
00161 GStringRep::UTF8::create(const char *s)
00162 {
00163 GStringRep::UTF8 dummy;
00164 return dummy.strdup(s);
00165 }
00166
00167 GP<GStringRep>
00168 GStringRep::UTF8::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
00169 {
00170 GStringRep::UTF8 dummy;
00171 return dummy.concat(s1,s2);
00172 }
00173
00174 GP<GStringRep>
00175 GStringRep::UTF8::create( const GP<GStringRep> &s1,const char *s2)
00176 {
00177 GStringRep::UTF8 dummy;
00178 return dummy.concat(s1,s2);
00179 }
00180
00181 GP<GStringRep>
00182 GStringRep::UTF8::create( const char *s1, const GP<GStringRep> &s2)
00183 {
00184 GStringRep::UTF8 dummy;
00185 return dummy.concat(s1,s2);
00186 }
00187
00188 GP<GStringRep>
00189 GStringRep::UTF8::create( const char *s1,const char *s2)
00190 {
00191 GStringRep::UTF8 dummy;
00192 return dummy.concat(s1,s2);
00193 }
00194
00195 GP<GStringRep>
00196 GStringRep::UTF8::create(const char *s,const int start,const int length)
00197 {
00198 GStringRep::UTF8 dummy;
00199 return dummy.substr(s,start,length);
00200 }
00201
00202 GP<GStringRep>
00203 GStringRep::UTF8::create(
00204 const unsigned short *s,const int start,const int length)
00205 {
00206 GStringRep::UTF8 dummy;
00207 return dummy.substr(s,start,length);
00208 }
00209
00210 GP<GStringRep>
00211 GStringRep::UTF8::create(
00212 const unsigned long *s,const int start,const int length)
00213 {
00214 GStringRep::UTF8 dummy;
00215 return dummy.substr(s,start,length);
00216 }
00217
00218 GP<GStringRep>
00219 GStringRep::UTF8::blank(const unsigned int sz) const
00220 {
00221 return GStringRep::create(sz,(GStringRep::UTF8 *)0);
00222 }
00223
00224 bool
00225 GStringRep::UTF8::isUTF8(void) const
00226 {
00227 return true;
00228 }
00229
00230 GP<GStringRep>
00231 GStringRep::UTF8::toThis(
00232 const GP<GStringRep> &rep,const GP<GStringRep> &) const
00233 {
00234 return rep?(rep->toUTF8(true)):rep;
00235 }
00236
00237 GP<GStringRep>
00238 GStringRep::UTF8::create(const char fmt[],va_list& args)
00239 {
00240 const GP<GStringRep> s(create(fmt));
00241 return (s?(s->vformat(args)):s);
00242 }
00243
00244 #if !HAS_WCHAR
00245
00246 #define NATIVE_CREATE(x) UTF8::create( x );
00247
00248 #ifdef LC_ALL
00249 #undef LC_ALL
00250 #endif
00251 #define LC_ALL 0
00252
00253 class GStringRep::ChangeLocale
00254 {
00255 public:
00256 ChangeLocale(const int,const char *) {}
00257 ~ChangeLocale() {};
00258 };
00259
00260 GP<GStringRep>
00261 GStringRep::NativeToUTF8( const char *s )
00262 {
00263 return GStringRep::UTF8::create(s);
00264 }
00265
00266 #else
00267
00268 #define NATIVE_CREATE(x) Native::create( x );
00269
00270
00271
00272
00273 class GStringRep::ChangeLocale
00274 {
00275 public:
00276 ChangeLocale(const int category,const char locale[]);
00277 ~ChangeLocale();
00278 private:
00279 GUTF8String locale;
00280 int category;
00281 };
00282
00283 class GStringRep::Native : public GStringRep
00284 {
00285 public:
00286
00287 Native(void);
00288
00289 virtual ~Native();
00290
00291
00292
00293 virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
00294
00295 virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
00296
00297 virtual bool isNative(void) const;
00298
00299 virtual GP<GStringRep> toNative(
00300 const EscapeMode escape=UNKNOWN_ESCAPED) const;
00301
00302 virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
00303
00304 virtual GP<GStringRep> toThis(
00305 const GP<GStringRep> &rep,const GP<GStringRep> &) const;
00306
00307 virtual int cmp(const GP<GStringRep> &s2, const int len=(-1)) const;
00308
00309
00310 virtual int toInt(void) const;
00311 virtual long toLong(
00312 const int pos, int &endpos, const int base=10) const;
00313 virtual unsigned long toULong(
00314 const int pos, int &endpos, const int base=10) const;
00315 virtual double toDouble(
00316 const int pos, int &endpos) const;
00317
00318
00319 static GP<GStringRep> create(const unsigned int sz = 0);
00320
00321
00322 static GP<GStringRep> create(const char *s);
00323
00324
00325
00326
00327 static GP<GStringRep> create(
00328 const GP<GStringRep> &s1,const GP<GStringRep> &s2);
00329 static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
00330 static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
00331 static GP<GStringRep> create(const char *s1,const char *s2);
00332
00333
00334 static GP<GStringRep> create(
00335 const char *s,const int start,const int length=(-1));
00336 static GP<GStringRep> create(
00337 const unsigned short *s,const int start,const int length=(-1));
00338 static GP<GStringRep> create(
00339 const unsigned long *s,const int start,const int length=(-1));
00340
00341
00342 static GP<GStringRep> create_format(const char fmt[],...);
00343 static GP<GStringRep> create(const char fmt[],va_list &args);
00344
00345 virtual unsigned char *UCS4toString(
00346 const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const;
00347
00348
00349 virtual bool is_valid(void) const;
00350
00351 virtual int ncopy(wchar_t * const buf, const int buflen) const;
00352
00353 friend class GBaseString;
00354 protected:
00355
00356 virtual unsigned long getValidUCS4(const char *&source) const;
00357 };
00358
00359 GP<GStringRep>
00360 GStringRep::Native::create(const unsigned int sz)
00361 {
00362 return GStringRep::create(sz,(GStringRep::Native *)0);
00363 }
00364
00365
00366 GP<GStringRep>
00367 GStringRep::Native::create(const char *s)
00368 {
00369 GStringRep::Native dummy;
00370 return dummy.strdup(s);
00371 }
00372
00373 GP<GStringRep>
00374 GStringRep::Native::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
00375 {
00376 GStringRep::Native dummy;
00377 return dummy.concat(s1,s2);
00378 }
00379
00380 GP<GStringRep>
00381 GStringRep::Native::create( const GP<GStringRep> &s1,const char *s2)
00382 {
00383 GStringRep::Native dummy;
00384 return dummy.concat(s1,s2);
00385 }
00386
00387 GP<GStringRep>
00388 GStringRep::Native::create( const char *s1, const GP<GStringRep> &s2)
00389 {
00390 GStringRep::Native dummy;
00391 return dummy.concat(s1,s2);
00392 }
00393
00394 GP<GStringRep>
00395 GStringRep::Native::create(const char *s1,const char *s2)
00396 {
00397 GStringRep::Native dummy;
00398 return dummy.concat(s1,s2);
00399 }
00400
00401 GP<GStringRep>
00402 GStringRep::Native::create(
00403 const char *s,const int start,const int length)
00404 {
00405 GStringRep::Native dummy;
00406 return dummy.substr(s,start,length);
00407 }
00408
00409 GP<GStringRep>
00410 GStringRep::Native::create(
00411 const unsigned short *s,const int start,const int length)
00412 {
00413 GStringRep::Native dummy;
00414 return dummy.substr(s,start,length);
00415 }
00416
00417 GP<GStringRep>
00418 GStringRep::Native::create(
00419 const unsigned long *s,const int start,const int length)
00420 {
00421 GStringRep::Native dummy;
00422 return dummy.substr(s,start,length);
00423 }
00424
00425 GP<GStringRep>
00426 GStringRep::Native::blank(const unsigned int sz) const
00427 {
00428 return GStringRep::create(sz,(GStringRep::Native *)0);
00429 }
00430
00431 bool
00432 GStringRep::Native::isNative(void) const
00433 {
00434 return true;
00435 }
00436
00437 GP<GStringRep>
00438 GStringRep::Native::toThis(
00439 const GP<GStringRep> &rep,const GP<GStringRep> &) const
00440 {
00441 return rep?(rep->toNative(NOT_ESCAPED)):rep;
00442 }
00443
00444 GP<GStringRep>
00445 GStringRep::Native::create(const char fmt[],va_list &args)
00446 {
00447 const GP<GStringRep> s(create(fmt));
00448 return (s?(s->vformat(args)):s);
00449 }
00450
00451 int
00452 GStringRep::Native::ncopy(
00453 wchar_t * const buf, const int buflen ) const
00454 {
00455 return toUTF8()->ncopy(buf,buflen);
00456 }
00457
00458 GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] )
00459 : category(xcategory)
00460 {
00461 #if DO_CHANGELOCALE
00462
00463
00464 if(xlocale)
00465 {
00466 locale=setlocale(xcategory,0);
00467 if(locale.length() &&(locale!=xlocale))
00468 {
00469 if(locale == setlocale(category,xlocale))
00470 {
00471 locale.empty();
00472 }
00473 }
00474 else
00475 {
00476 locale.empty();
00477 }
00478 }
00479 #endif
00480 }
00481
00482 GStringRep::ChangeLocale::~ChangeLocale()
00483 {
00484 #if DO_CHANGELOCALE
00485 if(locale.length())
00486 {
00487 setlocale(category,(const char *)locale);
00488 }
00489 #endif
00490 }
00491
00492 GNativeString &
00493 GNativeString::format(const char fmt[], ... )
00494 {
00495 va_list args;
00496 va_start(args, fmt);
00497 return init(GStringRep::Native::create(fmt,args));
00498 }
00499
00500
00501
00502 GStringRep::Native::Native(void) {}
00503 GStringRep::Native::~Native() {}
00504
00505 GP<GStringRep>
00506 GStringRep::Native::append(const GP<GStringRep> &s2) const
00507 {
00508 GP<GStringRep> retval;
00509 if(s2)
00510 {
00511 if(s2->isUTF8())
00512 {
00513 G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") );
00514 }
00515 retval=concat(data,s2->data);
00516 }else
00517 {
00518 retval=const_cast<GStringRep::Native *>(this);
00519 }
00520 return retval;
00521 }
00522
00523 GP<GStringRep>
00524 GStringRep::Native::create_format(const char fmt[],...)
00525 {
00526 va_list args;
00527 va_start(args, fmt);
00528 return create(fmt,args);
00529 }
00530
00531 unsigned char *
00532 GStringRep::Native::UCS4toString(
00533 const unsigned long w0,unsigned char *ptr, mbstate_t *ps) const
00534 {
00535 return UCS4toNative(w0,ptr,ps);
00536 }
00537
00538
00539
00540
00541
00542 unsigned char *
00543 GStringRep::UCS4toNative(
00544 const unsigned long w0,unsigned char *ptr, mbstate_t *ps)
00545 {
00546 unsigned short w1;
00547 unsigned short w2=1;
00548 for(int count=(sizeof(wchar_t)==sizeof(w1)) ? UCS4toUTF16(w0,w1,w2) : 1;
00549 count;
00550 --count,w1=w2)
00551 {
00552
00553 const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
00554 int i=wcrtomb((char *)ptr,w,ps);
00555 if(i<0)
00556 {
00557 break;
00558 }
00559 ptr[i]=0;
00560 ptr += i;
00561 }
00562 ptr[0]=0;
00563 return ptr;
00564 }
00565
00566 GP<GStringRep>
00567 GStringRep::Native::toNative(const EscapeMode escape) const
00568 {
00569 if(escape == UNKNOWN_ESCAPED)
00570 G_THROW( ERR_MSG("GStringRep.NativeToNative") );
00571 return const_cast<GStringRep::Native *>(this);
00572 }
00573
00574 GP<GStringRep>
00575 GStringRep::Native::toUTF8(const bool) const
00576 {
00577 unsigned char *buf;
00578 GPBuffer<unsigned char> gbuf(buf,size*6+1);
00579 buf[0]=0;
00580 if(data && size)
00581 {
00582 size_t n=size;
00583 const char *source=data;
00584 mbstate_t ps;
00585 unsigned char *ptr=buf;
00586
00587 memset(&ps,0,sizeof(mbstate_t));
00588 int i=0;
00589 if(sizeof(wchar_t) == sizeof(unsigned long))
00590 {
00591 wchar_t w = 0;
00592 for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i)
00593 {
00594 ptr=UCS4toUTF8(w,ptr);
00595 }
00596 }
00597 else
00598 {
00599 wchar_t w = 0;
00600 for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i)
00601 {
00602 unsigned short s[2];
00603 s[0]=w;
00604 unsigned long w0;
00605 if(UTF16toUCS4(w0,s,s+1)<=0)
00606 {
00607 source+=i;
00608 n-=i;
00609 if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0))
00610 {
00611 s[1]=w;
00612 if(UTF16toUCS4(w0,s,s+2)<=0)
00613 {
00614 i=(-1);
00615 break;
00616 }
00617 }
00618 else
00619 {
00620 i=(-1);
00621 break;
00622 }
00623 }
00624 ptr=UCS4toUTF8(w0,ptr);
00625 }
00626 }
00627 if(i<0)
00628 {
00629 gbuf.resize(0);
00630 }
00631 else
00632 {
00633 ptr[0]=0;
00634 }
00635 }
00636 return GStringRep::UTF8::create((const char *)buf);
00637 }
00638
00639 GNativeString
00640 GBaseString::UTF8ToNative(
00641 const bool currentlocale,const EscapeMode escape) const
00642 {
00643 const char *source=(*this);
00644 GP<GStringRep> retval;
00645 if(source && source[0])
00646 {
00647 #if DO_CHANGELOCALE
00648 GUTF8String lc_ctype(setlocale(LC_CTYPE,0));
00649 #endif
00650 bool repeat;
00651 for(repeat=!currentlocale;;repeat=false)
00652 {
00653 retval=(*this)->toNative((GStringRep::EscapeMode)escape);
00654 #if DO_CHANGELOCALE
00655 if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
00656 #endif
00657 break;
00658 }
00659 #if DO_CHANGELOCALE
00660 if(!repeat)
00661 {
00662 setlocale(LC_CTYPE,(const char *)lc_ctype);
00663 }
00664 #endif
00665 }
00666 return GNativeString(retval);
00667 }
00668
00669
00670 GNativeString
00671 GBaseString::getUTF82Native( const EscapeMode escape ) const
00672 {
00673 GNativeString retval;
00674
00675
00676
00677
00678
00679 const size_t slen=length()+1;
00680 if(slen>1)
00681 {
00682 retval=UTF8ToNative(false,escape) ;
00683 if(!retval.length())
00684 {
00685 retval=(const char*)*this;
00686 }
00687 }
00688 return retval;
00689 }
00690
00691 GUTF8String
00692 GBaseString::NativeToUTF8(void) const
00693 {
00694 GP<GStringRep> retval;
00695 if(length())
00696 {
00697 const char *source=(*this);
00698 #if DO_CHANGELOCALE
00699 GUTF8String lc_ctype=setlocale(LC_CTYPE,0);
00700 #endif
00701 bool repeat;
00702 for(repeat=true;;repeat=false)
00703 {
00704 if( (retval=GStringRep::NativeToUTF8(source)) )
00705 {
00706 if(GStringRep::cmp(retval->toNative(),source))
00707 {
00708 retval=GStringRep::UTF8::create((unsigned int)0);
00709 }
00710 }
00711 #if DO_CHANGELOCALE
00712 if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
00713 #endif
00714 break;
00715 }
00716 #if DO_CHANGELOCALE
00717 if(!repeat)
00718 {
00719 setlocale(LC_CTYPE,(const char *)lc_ctype);
00720 }
00721 #endif
00722 }
00723 return GUTF8String(retval);
00724 }
00725
00726 GUTF8String
00727 GBaseString::getNative2UTF8(void) const
00728 {
00729
00730
00731
00732
00733
00734 const size_t slen=length()+1;
00735 GUTF8String retval;
00736 if(slen > 1)
00737 {
00738 retval=NativeToUTF8();
00739 if(!retval.length())
00740 {
00741 retval=(const char *)(*this);
00742 }
00743 }
00744 return retval;
00745 }
00746
00747 int
00748 GStringRep::Native::cmp(const GP<GStringRep> &s2,const int len) const
00749 {
00750 int retval;
00751 if(s2)
00752 {
00753 if(s2->isUTF8())
00754 {
00755 const GP<GStringRep> r(toUTF8(true));
00756 if(r)
00757 {
00758 retval=GStringRep::cmp(r->data,s2->data,len);
00759 }else
00760 {
00761 retval=cmp(s2->toNative(NOT_ESCAPED),len);
00762 }
00763 }else
00764 {
00765 retval=GStringRep::cmp(data,s2->data,len);
00766 }
00767 }else
00768 {
00769 retval=GStringRep::cmp(data,0,len);
00770 }
00771 return retval;
00772 }
00773
00774 int
00775 GStringRep::Native::toInt() const
00776 {
00777 return atoi(data);
00778 }
00779
00780 long
00781 GStringRep::Native::toLong(
00782 const int pos, int &endpos, const int base) const
00783 {
00784 char *edata=0;
00785 const long retval=strtol(data+pos, &edata, base);
00786 if(edata)
00787 {
00788 endpos=(int)((size_t)edata-(size_t)data);
00789 }else
00790 {
00791 endpos=(-1);
00792 }
00793 return retval;
00794 }
00795
00796 unsigned long
00797 GStringRep::Native::toULong(
00798 const int pos, int &endpos, const int base) const
00799 {
00800 char *edata=0;
00801 const unsigned long retval=strtoul(data+pos, &edata, base);
00802 if(edata)
00803 {
00804 endpos=(int)((size_t)edata-(size_t)data);
00805 }else
00806 {
00807 endpos=(-1);
00808 }
00809 return retval;
00810 }
00811
00812 double
00813 GStringRep::Native::toDouble(
00814 const int pos, int &endpos) const
00815 {
00816 char *edata=0;
00817 const double retval=strtod(data+pos, &edata);
00818 if(edata)
00819 {
00820 endpos=(int)((size_t)edata-(size_t)data);
00821 }else
00822 {
00823 endpos=(-1);
00824 }
00825 return retval;
00826 }
00827
00828 unsigned long
00829 GStringRep::Native::getValidUCS4(const char *&source) const
00830 {
00831 unsigned long retval=0;
00832 int n=(int)((size_t)size+(size_t)data-(size_t)source);
00833 if(source && (n > 0))
00834 {
00835 mbstate_t ps;
00836
00837 memset(&ps,0,sizeof(mbstate_t));
00838 wchar_t wt;
00839 const int len=mbrtowc(&wt,source,n,&ps);
00840 if(len>=0)
00841 {
00842 if(sizeof(wchar_t) == sizeof(unsigned short))
00843 {
00844 source+=len;
00845 unsigned short s[2];
00846 s[0]=(unsigned short)wt;
00847 if(UTF16toUCS4(retval,s,s+1)<=0)
00848 {
00849 if((n-=len)>0)
00850 {
00851 const int len=mbrtowc(&wt,source,n,&ps);
00852 if(len>=0)
00853 {
00854 s[1]=(unsigned short)wt;
00855 unsigned long w;
00856 if(UTF16toUCS4(w,s,s+2)>0)
00857 {
00858 source+=len;
00859 retval=w;
00860 }
00861 }
00862 }
00863 }
00864 }else
00865 {
00866 retval=(unsigned long)wt;
00867 source++;
00868 }
00869 }else
00870 {
00871 source++;
00872 }
00873 }
00874 return retval;
00875 }
00876
00877
00878 bool
00879 GStringRep::Native::is_valid(void) const
00880 {
00881 bool retval=true;
00882 if(data && size)
00883 {
00884 size_t n=size;
00885 const char *s=data;
00886 mbstate_t ps;
00887
00888 memset(&ps,0,sizeof(mbstate_t));
00889 do
00890 {
00891 size_t m=mbrlen(s,n,&ps);
00892 if(m > n)
00893 {
00894 retval=false;
00895 break;
00896 }else if(m)
00897 {
00898 s+=m;
00899 n-=m;
00900 }else
00901 {
00902 break;
00903 }
00904 } while(n);
00905 }
00906 return retval;
00907 }
00908
00909
00910 void
00911 GStringRep::set_remainder(void const * const, const unsigned int,
00912 const EncodeType) {}
00913 void
00914 GStringRep::set_remainder(void const * const, const unsigned int,
00915 const GP<GStringRep> &encoding) {}
00916 void
00917 GStringRep::set_remainder( const GP<GStringRep::Unicode> &) {}
00918
00919 GP<GStringRep::Unicode>
00920 GStringRep::get_remainder( void ) const
00921 {
00922 return 0;
00923 }
00924
00925 GNativeString::GNativeString(const char dat)
00926 {
00927 init(GStringRep::Native::create(&dat,0,1));
00928 }
00929
00930 GNativeString::GNativeString(const char *str)
00931 {
00932 init(GStringRep::Native::create(str));
00933 }
00934
00935 GNativeString::GNativeString(const unsigned char *str)
00936 {
00937 init(GStringRep::Native::create((const char *)str));
00938 }
00939
00940 GNativeString::GNativeString(const unsigned short *str)
00941 {
00942 init(GStringRep::Native::create(str,0,-1));
00943 }
00944
00945 GNativeString::GNativeString(const unsigned long *str)
00946 {
00947 init(GStringRep::Native::create(str,0,-1));
00948 }
00949
00950 GNativeString::GNativeString(const char *dat, unsigned int len)
00951 {
00952 init(
00953 GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
00954 }
00955
00956 GNativeString::GNativeString(const unsigned short *dat, unsigned int len)
00957 {
00958 init(
00959 GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
00960 }
00961
00962 GNativeString::GNativeString(const unsigned long *dat, unsigned int len)
00963 {
00964 init(
00965 GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
00966 }
00967
00968 GNativeString::GNativeString(const GNativeString &str)
00969 {
00970 init(str);
00971 }
00972
00973 GNativeString::GNativeString(const GBaseString &gs, int from, int len)
00974 {
00975 init(
00976 GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len));
00977 }
00978
00979 GNativeString::GNativeString(const int number)
00980 {
00981 init(GStringRep::Native::create_format("%d",number));
00982 }
00983
00984 GNativeString::GNativeString(const double number)
00985 {
00986 init(GStringRep::Native::create_format("%f",number));
00987 }
00988
00989 GNativeString&
00990 GNativeString::operator= (const char str)
00991 { return init(GStringRep::Native::create(&str,0,1)); }
00992
00993 GNativeString&
00994 GNativeString::operator= (const char *str)
00995 { return init(GStringRep::Native::create(str)); }
00996
00997 GNativeString
00998 GBaseString::operator+(const GNativeString &s2) const
00999 {
01000 return GStringRep::Native::create(*this,s2);
01001 }
01002
01003 GP<GStringRep>
01004 GStringRep::NativeToUTF8( const char *s )
01005 {
01006 return GStringRep::Native::create(s)->toUTF8();
01007 }
01008
01009 #endif // HAS_WCHAR
01010
01011 template <class TYPE>
01012 GP<GStringRep>
01013 GStringRep::create(const unsigned int sz, TYPE *)
01014 {
01015 GP<GStringRep> gaddr;
01016 if (sz > 0)
01017 {
01018 GStringRep *addr;
01019 gaddr=(addr=new TYPE);
01020 addr->data=(char *)(::operator new(sz+1));
01021 addr->size = sz;
01022 addr->data[sz] = 0;
01023 }
01024 return gaddr;
01025 }
01026
01027 GP<GStringRep>
01028 GStringRep::strdup(const char *s) const
01029 {
01030 GP<GStringRep> retval;
01031 const int length=s?strlen(s):0;
01032 if(length>0)
01033 {
01034 retval=blank(length);
01035 char const * const end=s+length;
01036 char *ptr=retval->data;
01037 for(;*s&&(s!=end);ptr++)
01038 {
01039 ptr[0]=s++[0];
01040 }
01041 ptr[0]=0;
01042 }
01043 return retval;
01044 }
01045
01046 GP<GStringRep>
01047 GStringRep::substr(const char *s,const int start,const int len) const
01048 {
01049 GP<GStringRep> retval;
01050 if(s && s[0])
01051 {
01052 const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1);
01053 const char *startptr, *endptr;
01054 if(start<0)
01055 {
01056 startptr=s+length+start;
01057 if(startptr<s)
01058 startptr=s;
01059 }else
01060 {
01061 startptr=s;
01062 for(const char * const ptr=s+start;(startptr<ptr)&&*startptr;++startptr)
01063 EMPTY_LOOP;
01064 }
01065 if(len<0)
01066 {
01067 if(s+length+1 < startptr+len)
01068 {
01069 endptr=startptr;
01070 }else
01071 {
01072 endptr=s+length+1+len;
01073 }
01074 }else
01075 {
01076 endptr=startptr;
01077 for(const char * const ptr=startptr+len;(endptr<ptr)&&*endptr;++endptr)
01078 EMPTY_LOOP;
01079 }
01080 if(endptr>startptr)
01081 {
01082 retval=blank((size_t)(endptr-startptr));
01083 char *data=retval->data;
01084 for(; (startptr<endptr) && *startptr; ++startptr,++data)
01085 {
01086 data[0]=startptr[0];
01087 }
01088 data[0]=0;
01089 }
01090 }
01091 return retval;
01092 }
01093
01094 GP<GStringRep>
01095 GStringRep::substr(const unsigned short *s,const int start,const int len) const
01096 {
01097 GP<GStringRep> retval;
01098 if(s && s[0])
01099 {
01100 unsigned short const *eptr;
01101 if(len<0)
01102 {
01103 for(eptr=s;eptr[0];++eptr)
01104 EMPTY_LOOP;
01105 }else
01106 {
01107 eptr=&(s[len]);
01108 }
01109 s=&s[start];
01110 if((size_t)s<(size_t)eptr)
01111 {
01112 mbstate_t ps;
01113 memset(&ps,0,sizeof(mbstate_t));
01114 unsigned char *buf,*ptr;
01115 GPBuffer<unsigned char> gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7);
01116 for(ptr=buf;s[0];)
01117 {
01118 unsigned long w;
01119 int i=UTF16toUCS4(w,s,eptr);
01120 if(i<=0)
01121 break;
01122 s+=i;
01123 ptr=UCS4toString(w,ptr,&ps);
01124 }
01125 ptr[0]=0;
01126 retval = strdup( (const char *)buf );
01127 }
01128 }
01129 return retval;
01130 }
01131
01132 GP<GStringRep>
01133 GStringRep::substr(const unsigned long *s,const int start,const int len) const
01134 {
01135 GP<GStringRep> retval;
01136 if(s && s[0])
01137 {
01138 unsigned long const *eptr;
01139 if(len<0)
01140 {
01141 for(eptr=s;eptr[0];++eptr)
01142 EMPTY_LOOP;
01143 }else
01144 {
01145 eptr=&(s[len]);
01146 }
01147 s=&s[start];
01148 if((size_t)s<(size_t)eptr)
01149 {
01150 mbstate_t ps;
01151 memset(&ps,0,sizeof(mbstate_t));
01152 unsigned char *buf,*ptr;
01153 GPBuffer<unsigned char> gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7);
01154 for(ptr=buf;s[0];++s)
01155 {
01156 ptr=UCS4toString(s[0],ptr,&ps);
01157 }
01158 ptr[0]=0;
01159 retval = strdup( (const char *)buf );
01160 }
01161 }
01162 return retval;
01163 }
01164
01165 GP<GStringRep>
01166 GStringRep::append(const char *s2) const
01167 {
01168 GP<GStringRep> retval;
01169 if(s2)
01170 {
01171 retval=concat(data,s2);
01172 }else
01173 {
01174 retval=const_cast<GStringRep *>(this);
01175 }
01176 return retval;
01177 }
01178
01179 GP<GStringRep>
01180 GStringRep::UTF8::append(const GP<GStringRep> &s2) const
01181 {
01182 GP<GStringRep> retval;
01183 if(s2)
01184 {
01185 if(s2->isNative())
01186 {
01187 G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") );
01188 }
01189 retval=concat(data,s2->data);
01190 }else
01191 {
01192 retval=const_cast<GStringRep::UTF8 *>(this);
01193 }
01194 return retval;
01195 }
01196
01197 GP<GStringRep>
01198 GStringRep::concat(const char *s1,const char *s2) const
01199 {
01200 const int length1=(s1?strlen(s1):0);
01201 const int length2=(s2?strlen(s2):0);
01202 const int length=length1+length2;
01203 GP<GStringRep> retval;
01204 if(length>0)
01205 {
01206 retval=blank(length);
01207 GStringRep &r=*retval;
01208 if(length1)
01209 {
01210 strcpy(r.data,s1);
01211 if(length2)
01212 strcat(r.data,s2);
01213 }else
01214 {
01215 strcpy(r.data,s2);
01216 }
01217 }
01218 return retval;
01219 }
01220
01221 const char *GBaseString::nullstr = "";
01222
01223 void
01224 GBaseString::empty( void )
01225 {
01226 init(0);
01227 }
01228
01229 GP<GStringRep>
01230 GStringRep::getbuf(int n) const
01231 {
01232 GP<GStringRep> retval;
01233 if(n< 0)
01234 n=strlen(data);
01235 if(n>0)
01236 {
01237 retval=blank(n);
01238 char *ndata=retval->data;
01239 strncpy(ndata,data,n);
01240 ndata[n]=0;
01241 }
01242 return retval;
01243 }
01244
01245 const char *
01246 GStringRep::isCharType(
01247 bool (*xiswtest)(const unsigned long wc), const char *ptr, const bool reverse) const
01248 {
01249 char const * xptr=ptr;
01250 const unsigned long w=getValidUCS4(xptr);
01251 if((ptr != xptr)
01252 &&(((sizeof(wchar_t) == 2)&&(w&~0xffff))
01253 ||(reverse?(!xiswtest(w)):xiswtest(w))))
01254 {
01255 ptr=xptr;
01256 }
01257 return ptr;
01258 }
01259
01260 int
01261 GStringRep::nextCharType(
01262 bool (*xiswtest)(const unsigned long wc), const int from, const int len,
01263 const bool reverse) const
01264 {
01265
01266
01267
01268
01269
01270 int retval;
01271 if(from<size)
01272 {
01273 retval=from;
01274 const char * ptr = data+from;
01275 for( const char * const eptr=ptr+((len<0)?(size-from):len);
01276 (ptr<eptr) && *ptr;)
01277 {
01278
01279 char const * const xptr=isCharType(xiswtest,ptr,!reverse);
01280 if(xptr == ptr)
01281 break;
01282 ptr=xptr;
01283 }
01284 retval=(int)((size_t)ptr-(size_t)data);
01285 }else
01286 {
01287 retval=size;
01288 }
01289 return retval;
01290 }
01291
01292 bool
01293 GStringRep::giswspace(const unsigned long w)
01294 {
01295 #if HAS_WCTYPE
01296 return
01297 ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01298 ||((unsigned long)iswspace((wchar_t)w))
01299 ||((w == '\r')||(w == '\n'));
01300 #else
01301 return
01302 (w&~0xff)?(true):(((unsigned long)isspace((char)w))||((w == '\r')||(w == '\n')));
01303 #endif
01304 }
01305
01306 bool
01307 GStringRep::giswupper(const unsigned long w)
01308 {
01309 #if HAS_WCTYPE
01310 return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01311 ?(true):((unsigned long)iswupper((wchar_t)w)?true:false);
01312 #else
01313 return (w&~0xff)?(true):((unsigned long)isupper((char)w)?true:false);
01314 #endif
01315 }
01316
01317 bool
01318 GStringRep::giswlower(const unsigned long w)
01319 {
01320 #if HAS_WCTYPE
01321 return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01322 ?(true):((unsigned long)iswlower((wchar_t)w)?true:false);
01323 #else
01324 return (w&~0xff)?(true):((unsigned long)islower((char)w)?true:false);
01325 #endif
01326 }
01327
01328 unsigned long
01329 GStringRep::gtowupper(const unsigned long w)
01330 {
01331 #if HAS_WCTYPE
01332 return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01333 ?w:((unsigned long)towupper((wchar_t)w));
01334 #else
01335 return (w&~0xff)?w:((unsigned long)toupper((char)w));
01336 #endif
01337 }
01338
01339 unsigned long
01340 GStringRep::gtowlower(const unsigned long w)
01341 {
01342 #if HAS_WCTYPE
01343 return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01344 ?w:((unsigned long)towlower((wchar_t)w));
01345 #else
01346 return (w&~0xff)?w:((unsigned long)tolower((char)w));
01347 #endif
01348 }
01349
01350 GP<GStringRep>
01351 GStringRep::tocase(
01352 bool (*xiswcase)(const unsigned long wc),
01353 unsigned long (*xtowcase)(const unsigned long wc)) const
01354 {
01355 GP<GStringRep> retval;
01356 char const * const eptr=data+size;
01357 char const *ptr=data;
01358 while(ptr<eptr)
01359 {
01360 char const * const xptr=isCharType(xiswcase,ptr,false);
01361 if(ptr == xptr)
01362 break;
01363 ptr=xptr;
01364 }
01365 if(ptr<eptr)
01366 {
01367 const int n=(int)((size_t)ptr-(size_t)data);
01368 unsigned char *buf;
01369 GPBuffer<unsigned char> gbuf(buf,n+(1+size-n)*6);
01370 if(n>0)
01371 {
01372 strncpy((char *)buf,data,n);
01373 }
01374 unsigned char *buf_ptr=buf+n;
01375 for(char const *ptr=data+n;ptr<eptr;)
01376 {
01377 char const * const xptr=ptr;
01378 const unsigned long w=getValidUCS4(ptr);
01379 if(ptr == xptr)
01380 break;
01381 if(xiswcase(w))
01382 {
01383 const int len=(int)((size_t)ptr-(size_t)xptr);
01384 strncpy((char *)buf_ptr,xptr,len);
01385 buf_ptr+=len;
01386 }else
01387 {
01388 mbstate_t ps;
01389 memset(&ps,0,sizeof(mbstate_t));
01390 buf_ptr=UCS4toString(xtowcase(w),buf_ptr,&ps);
01391 }
01392 }
01393 buf_ptr[0]=0;
01394 retval=substr((const char *)buf,0,(int)((size_t)buf_ptr-(size_t)buf));
01395 }else
01396 {
01397 retval=const_cast<GStringRep *>(this);
01398 }
01399 return retval;
01400 }
01401
01402
01403
01404
01405
01406
01407
01408
01409 GP<GStringRep>
01410 GStringRep::toEscaped( const bool tosevenbit ) const
01411 {
01412 bool modified=false;
01413 char *ret;
01414 GPBuffer<char> gret(ret,size*7);
01415 ret[0]=0;
01416 char *retptr=ret;
01417 char const *start=data;
01418 char const *s=start;
01419 char const *last=s;
01420 GP<GStringRep> special;
01421 for(unsigned long w;(w=getValidUCS4(s));last=s)
01422 {
01423 char const *ss=0;
01424 switch(w)
01425 {
01426 case '<':
01427 ss="<";
01428 break;
01429 case '>':
01430 ss=">";
01431 break;
01432 case '&':
01433 ss="&";
01434 break;
01435 case '\47':
01436 ss="'";
01437 break;
01438 case '\42':
01439 ss=""";
01440 break;
01441 default:
01442 if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80))))
01443 {
01444 special=toThis(UTF8::create_format("&#%lu;",w));
01445 ss=special->data;
01446 }
01447 break;
01448 }
01449 if(ss)
01450 {
01451 modified=true;
01452 if(s!=start)
01453 {
01454 size_t len=(size_t)last-(size_t)start;
01455 strncpy(retptr,start,len);
01456 retptr+=len;
01457 start=s;
01458 }
01459 if(ss[0])
01460 {
01461 size_t len=strlen(ss);
01462 strcpy(retptr,ss);
01463 retptr+=len;
01464 }
01465 }
01466 }
01467 GP<GStringRep> retval;
01468 if(modified)
01469 {
01470 strcpy(retptr,start);
01471 retval=strdup( ret );
01472 }else
01473 {
01474 retval=const_cast<GStringRep *>(this);
01475 }
01476
01477 return retval;
01478 }
01479
01480
01481 static const GMap<GUTF8String,GUTF8String> &
01482 BasicMap( void )
01483 {
01484 static GMap<GUTF8String,GUTF8String> Basic;
01485 if (! Basic.size())
01486 {
01487 Basic["lt"] = GUTF8String('<');
01488 Basic["gt"] = GUTF8String('>');
01489 Basic["amp"] = GUTF8String('&');
01490 Basic["apos"] = GUTF8String('\47');
01491 Basic["quot"] = GUTF8String('\42');
01492 }
01493 return Basic;
01494 }
01495
01496 GUTF8String
01497 GUTF8String::fromEscaped( const GMap<GUTF8String,GUTF8String> ConvMap ) const
01498 {
01499 GUTF8String ret;
01500 int start_locn = 0;
01501 int amp_locn;
01502
01503 while( (amp_locn = search( '&', start_locn )) > -1 )
01504 {
01505
01506
01507 const int semi_locn = search( ';', amp_locn );
01508
01509
01510 if( semi_locn < 0 )
01511 break;
01512 ret += substr( start_locn, amp_locn - start_locn );
01513 int const len = semi_locn - amp_locn - 1;
01514 if(len)
01515 {
01516 GUTF8String key = substr( amp_locn+1, len);
01517
01518 char const * s=key;
01519 if( s[0] == '#')
01520 {
01521 unsigned long value;
01522 char *ptr=0;
01523 if(s[1] == 'x' || s[1] == 'X')
01524 {
01525 value=strtoul((char const *)(s+2),&ptr,16);
01526 }else
01527 {
01528 value=strtoul((char const *)(s+1),&ptr,10);
01529 }
01530 if(ptr)
01531 {
01532 unsigned char utf8char[7];
01533 unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char);
01534 ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char);
01535 }else
01536 {
01537 ret += substr( amp_locn, semi_locn - amp_locn + 1 );
01538 }
01539 }else
01540 {
01541 GPosition map_entry = ConvMap.contains( key );
01542 if( map_entry )
01543 {
01544 ret += ConvMap[map_entry];
01545 } else
01546 {
01547 static const GMap<GUTF8String,GUTF8String> &Basic = BasicMap();
01548 GPosition map_entry = Basic.contains( key );
01549 if ( map_entry )
01550 {
01551 ret += Basic[map_entry];
01552 }else
01553 {
01554 ret += substr( amp_locn, len+2 );
01555 }
01556 }
01557 }
01558 }else
01559 {
01560 ret += substr( amp_locn, len+2 );
01561 }
01562 start_locn = semi_locn + 1;
01563
01564 }
01565
01566
01567 ret += substr( start_locn, length()-start_locn );
01568
01569
01570 return (ret == *this)?(*this):ret;
01571 }
01572
01573 GUTF8String
01574 GUTF8String::fromEscaped(void) const
01575 {
01576 const GMap<GUTF8String,GUTF8String> nill;
01577 return fromEscaped(nill);
01578 }
01579
01580 GP<GStringRep>
01581 GStringRep::setat(int n, char ch) const
01582 {
01583 GP<GStringRep> retval;
01584 if(n<0)
01585 n+=size;
01586 if (n < 0 || n>size)
01587 GBaseString::throw_illegal_subscript();
01588 if(ch == data[n])
01589 {
01590 retval=const_cast<GStringRep *>(this);
01591 }else if(!ch)
01592 {
01593 retval=getbuf(n);
01594 }else
01595 {
01596 retval=getbuf((n<size)?size:n);
01597 retval->data[n]=ch;
01598 if(n == size)
01599 retval->data[n+1]=0;
01600 }
01601 return retval;
01602 }
01603
01604 #ifdef WIN32
01605 #define USE_VSNPRINTF _vsnprintf
01606 #endif
01607
01608 #ifdef AUTOCONF
01609 # ifdef HAVE_VSNPRINTF
01610 # define USE_VSNPRINTF vsnprintf
01611 # endif
01612 #else
01613 # ifdef linux
01614 # define USE_VSNPRINTF vsnprintf
01615 # endif
01616 #endif
01617
01618 GUTF8String &
01619 GUTF8String::format(const char fmt[], ... )
01620 {
01621 va_list args;
01622 va_start(args, fmt);
01623 return init(GStringRep::UTF8::create(fmt,args));
01624 }
01625
01626 GP<GStringRep>
01627 GStringRep::UTF8::create_format(const char fmt[],...)
01628 {
01629 va_list args;
01630 va_start(args, fmt);
01631 return create(fmt,args);
01632 }
01633
01634 GP<GStringRep>
01635 GStringRep::vformat(va_list args) const
01636 {
01637 GP<GStringRep> retval;
01638 if(size)
01639 {
01640 #ifndef WIN32
01641 char *nfmt;
01642 GPBuffer<char> gnfmt(nfmt,size+1);
01643 nfmt[0]=0;
01644 int start=0;
01645 #endif
01646 int from=0;
01647 while((from=search('%',from)) >= 0)
01648 {
01649 if(data[++from] != '%')
01650 {
01651 int m,n=0;
01652 sscanf(data+from,"%d!%n",&m,&n);
01653 if(n)
01654 {
01655 #ifdef WIN32
01656 char *lpszFormat=data;
01657 LPTSTR lpszTemp;
01658 if((!::FormatMessage(
01659 FORMAT_MESSAGE_FROM_STRING|FORMAT_MESSAGE_ALLOCATE_BUFFER,
01660 lpszFormat, 0, 0, (LPTSTR)&lpszTemp,0,&args))
01661 || !lpszTemp)
01662 {
01663 G_THROW(GException::outofmemory);
01664 }
01665 va_end(args);
01666 retval=strdup((const char *)lpszTemp);
01667 LocalFree(lpszTemp);
01668 break;
01669 #else
01670 from+=n;
01671 const int end=search('!',from);
01672 if(end>=0)
01673 {
01674 strncat(nfmt,data+start,(int)(end-start));
01675 strncat(nfmt,"$",1);
01676 start=from=end+1;
01677 }else
01678 {
01679 gnfmt.resize(0);
01680 from=(-1);
01681 break;
01682 }
01683 #endif
01684 }else
01685 {
01686 #ifndef WIN32
01687 gnfmt.resize(0);
01688 #endif
01689 from=(-1);
01690 break;
01691 }
01692 }
01693 }
01694 if(from < 0)
01695 {
01696 #ifndef WIN32
01697 char const * const fmt=(nfmt&&nfmt[0])?nfmt:data;
01698 #else
01699 char const * const fmt=data;
01700 #endif
01701 int buflen=32768;
01702 char *buffer;
01703 GPBuffer<char> gbuffer(buffer,buflen);
01704
01705 ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C"));
01706
01707
01708 #ifdef USE_VSNPRINTF
01709 while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0)
01710 {
01711 gbuffer.resize(0);
01712 gbuffer.resize(buflen+32768);
01713 }
01714 va_end(args);
01715 #else
01716 buffer[buflen-1] = 0;
01717 vsprintf(buffer, fmt, args);
01718 va_end(args);
01719 if (buffer[buflen-1])
01720 {
01721
01722
01723 G_THROW( ERR_MSG("GString.overwrite") );
01724 }
01725 #endif
01726 retval=strdup((const char *)buffer);
01727 }
01728 }
01729
01730 return retval;
01731 }
01732
01733 int
01734 GStringRep::search(char c, int from) const
01735 {
01736 if (from<0)
01737 from += size;
01738 int retval=(-1);
01739 if (from>=0 && from<size)
01740 {
01741 char const *const s = strchr(data+from,c);
01742 if(s)
01743 retval=(int)((size_t)s-(size_t)data);
01744 }
01745 return retval;
01746 }
01747
01748 int
01749 GStringRep::search(char const *ptr, int from) const
01750 {
01751 if(from<0)
01752 {
01753 from+=size;
01754 if(from<0)
01755 G_THROW( ERR_MSG("GString.bad_subscript") );
01756 }
01757 int retval=(-1);
01758 if (from>=0 && from<size)
01759 {
01760 char const *const s = strstr(data+from,ptr);
01761 if(s)
01762 retval=(int)((size_t)s-(size_t)data);
01763 }
01764 return retval;
01765 }
01766
01767 int
01768 GStringRep::rsearch(char c, int from) const
01769 {
01770 if(from<0)
01771 {
01772 from+=size;
01773 if(from<0)
01774 G_THROW( ERR_MSG("GString.bad_subscript") );
01775 }
01776 int retval=(-1);
01777 if ((from>=0) && (from<size))
01778 {
01779 char const *const s = strrchr(data+from,c);
01780 if(s)
01781 retval=(int)((size_t)s-(size_t)data);
01782 }
01783 return retval;
01784 }
01785
01786 int
01787 GStringRep::rsearch(char const *ptr, int from) const
01788 {
01789 if(from<0)
01790 {
01791 from+=size;
01792 if(from<0)
01793 G_THROW( ERR_MSG("GString.bad_subscript") );
01794 }
01795 int retval=(-1);
01796 for(int loc=from;(loc=search(ptr,loc)) >= 0;++loc)
01797 retval=loc;
01798 return retval;
01799 }
01800
01801 int
01802 GStringRep::contains(const char accept[],int from) const
01803 {
01804 if(from<0)
01805 {
01806 from+=size;
01807 if(from<0)
01808 G_THROW( ERR_MSG("GString.bad_subscript") );
01809 }
01810 int retval=(-1);
01811 if (accept && accept[0] && from>=0 && from<size)
01812 {
01813 char const * const src = data+from;
01814 char const *ptr=strpbrk(src,accept);
01815 if(ptr)
01816 {
01817 retval=(int)(ptr-src)+from;
01818 }
01819 }
01820 return retval;
01821 }
01822
01823 int
01824 GStringRep::rcontains(const char accept[],int from) const
01825 {
01826 int retval=(-1);
01827 while((from=contains(accept,from)) >= 0)
01828 {
01829 retval=from++;
01830 }
01831 return retval;
01832 }
01833
01834 bool
01835 GBaseString::is_int(void) const
01836 {
01837 bool isLong=!!ptr;
01838 if(isLong)
01839 {
01840 int endpos;
01841 (*this)->toLong(0,endpos);
01842 if(endpos>=0)
01843 {
01844 isLong=((*this)->nextNonSpace(endpos) == (int)length());
01845 }
01846 }
01847 return isLong;
01848 }
01849
01850 bool
01851 GBaseString::is_float(void) const
01852 {
01853 bool isDouble=!!ptr;
01854 if(isDouble)
01855 {
01856 int endpos;
01857 (*this)->toDouble(0,endpos);
01858 if(endpos>=0)
01859 {
01860 isDouble=((*this)->nextNonSpace(endpos) == (int)length());
01861 }
01862 }
01863 return isDouble;
01864 }
01865
01866 unsigned int
01867 hash(const GBaseString &str)
01868 {
01869 unsigned int x = 0;
01870 const char *s = (const char*)str;
01871 while (*s)
01872 x = x ^ (x<<6) ^ (unsigned char)(*s++);
01873 return x;
01874 }
01875
01876 void
01877 GBaseString::throw_illegal_subscript()
01878 {
01879 G_THROW( ERR_MSG("GString.bad_subscript") );
01880 }
01881
01882 unsigned char *
01883 GStringRep::UTF8::UCS4toString(
01884 const unsigned long w0,unsigned char *ptr, mbstate_t *) const
01885 {
01886 return UCS4toUTF8(w0,ptr);
01887 }
01888
01889 int
01890 GStringRep::UTF8::ncopy(
01891 wchar_t * const buf, const int buflen ) const
01892 {
01893 int retval=(-1);
01894 if(buf && buflen)
01895 {
01896 buf[0]=0;
01897 if(data[0])
01898 {
01899 const size_t length=strlen(data);
01900 const unsigned char * const eptr=(const unsigned char *)(data+length);
01901 wchar_t *r=buf;
01902 wchar_t const * const rend=buf+buflen;
01903 for(const unsigned char *s=(const unsigned char *)data;(r<rend)&&(s<eptr)&&*s;)
01904 {
01905 const unsigned long w0=UTF8toUCS4(s,eptr);
01906 unsigned short w1;
01907 unsigned short w2=1;
01908 for(int count=(sizeof(wchar_t) == sizeof(w1))?UCS4toUTF16(w0,w1,w2):1;
01909 count&&(r<rend);
01910 --count,w1=w2,++r)
01911 {
01912 r[0]=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
01913 }
01914 }
01915 if(r<rend)
01916 {
01917 r[0]=0;
01918 retval=((size_t)r-(size_t)buf)/sizeof(wchar_t);
01919 }
01920 }else
01921 {
01922 retval=0;
01923 }
01924 }
01925 return retval;
01926 }
01927
01928 GP<GStringRep>
01929 GStringRep::UTF8::toNative(const EscapeMode escape) const
01930 {
01931 GP<GStringRep> retval;
01932 if(data[0])
01933 {
01934 const size_t length=strlen(data);
01935 const unsigned char * const eptr=(const unsigned char *)(data+length);
01936 unsigned char *buf;
01937 GPBuffer<unsigned char> gbuf(buf,12*length+12);
01938 unsigned char *r=buf;
01939 mbstate_t ps;
01940 memset(&ps,0,sizeof(mbstate_t));
01941 for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
01942 {
01943 const unsigned long w0=UTF8toUCS4(s,eptr);
01944 const unsigned char * const r0=r;
01945 r=UCS4toNative(w0,r,&ps);
01946 if(r == r0)
01947 {
01948 if(escape == IS_ESCAPED)
01949 {
01950 sprintf((char *)r,"&#%lu;",w0);
01951 r+=strlen((char *)r);
01952 }else
01953 {
01954 r=buf;
01955 break;
01956 }
01957 }
01958 }
01959 r[0]=0;
01960 retval = NATIVE_CREATE( (const char *)buf );
01961 } else
01962 {
01963 retval = NATIVE_CREATE( (unsigned int)0 );
01964 }
01965 return retval;
01966 }
01967
01968 GP<GStringRep>
01969 GStringRep::UTF8::toUTF8(const bool nothrow) const
01970 {
01971 if(!nothrow)
01972 G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") );
01973 return const_cast<GStringRep::UTF8 *>(this);
01974 }
01975
01976
01977 bool
01978 GStringRep::UTF8::is_valid(void) const
01979 {
01980 bool retval=true;
01981 if(data && size)
01982 {
01983 const unsigned char * const eptr=(const unsigned char *)(data+size);
01984 for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
01985 {
01986 const unsigned char * const r=s;
01987 (void)UTF8toUCS4(s,eptr);
01988 if(r == s)
01989 {
01990 retval=false;
01991 break;
01992 }
01993 }
01994 }
01995 return retval;
01996 }
01997
01998 static inline unsigned long
01999 add_char(unsigned long const U, unsigned char const * const r)
02000 {
02001 unsigned long const C=r[0];
02002 return ((C|0x3f) == 0xbf)?((U<<6)|(C&0x3f)):0;
02003 }
02004
02005 unsigned long
02006 GStringRep::UTF8toUCS4(
02007 unsigned char const *&s,void const * const eptr)
02008 {
02009 unsigned long U=0;
02010 unsigned char const *r=s;
02011 if(r < eptr)
02012 {
02013 unsigned long const C1=r++[0];
02014 if(C1&0x80)
02015 {
02016 if(r < eptr)
02017 {
02018 U=C1;
02019 if((U=((C1&0x40)?add_char(U,r++):0)))
02020 {
02021 if(C1&0x20)
02022 {
02023 if(r < eptr)
02024 {
02025 if((U=add_char(U,r++)))
02026 {
02027 if(C1&0x10)
02028 {
02029 if(r < eptr)
02030 {
02031 if((U=add_char(U,r++)))
02032 {
02033 if(C1&0x8)
02034 {
02035 if(r < eptr)
02036 {
02037 if((U=add_char(U,r++)))
02038 {
02039 if(C1&0x4)
02040 {
02041 if(r < eptr)
02042 {
02043 if((U=((!(C1&0x2))?(add_char(U,r++)&0x7fffffff):0)))
02044 {
02045 s=r;
02046 }else
02047 {
02048 U=(unsigned int)(-1)-s++[0];
02049 }
02050 }else
02051 {
02052 U=0;
02053 }
02054 }else if((U=((U&0x4000000)?0:(U&0x3ffffff))))
02055 {
02056 s=r;
02057 }
02058 }else
02059 {
02060 U=(unsigned int)(-1)-s++[0];
02061 }
02062 }else
02063 {
02064 U=0;
02065 }
02066 }else if((U=((U&0x200000)?0:(U&0x1fffff))))
02067 {
02068 s=r;
02069 }
02070 }else
02071 {
02072 U=(unsigned int)(-1)-s++[0];
02073 }
02074 }else
02075 {
02076 U=0;
02077 }
02078 }else if((U=((U&0x10000)?0:(U&0xffff))))
02079 {
02080 s=r;
02081 }
02082 }else
02083 {
02084 U=(unsigned int)(-1)-s++[0];
02085 }
02086 }else
02087 {
02088 U=0;
02089 }
02090 }else if((U=((U&0x800)?0:(U&0x7ff))))
02091 {
02092 s=r;
02093 }
02094 }else
02095 {
02096 U=(unsigned int)(-1)-s++[0];
02097 }
02098 }else
02099 {
02100 U=0;
02101 }
02102 }else if((U=C1))
02103 {
02104 s=r;
02105 }
02106 }
02107 return U;
02108 }
02109
02110 unsigned char *
02111 GStringRep::UCS4toUTF8(const unsigned long w,unsigned char *ptr)
02112 {
02113 if(w <= 0x7f)
02114 {
02115 *ptr++ = (unsigned char)w;
02116 }
02117 else if(w <= 0x7ff)
02118 {
02119 *ptr++ = (unsigned char)((w>>6)|0xC0);
02120 *ptr++ = (unsigned char)((w|0x80)&0xBF);
02121 }
02122 else if(w <= 0xFFFF)
02123 {
02124 *ptr++ = (unsigned char)((w>>12)|0xE0);
02125 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02126 *ptr++ = (unsigned char)((w|0x80)&0xBF);
02127 }
02128 else if(w <= 0x1FFFFF)
02129 {
02130 *ptr++ = (unsigned char)((w>>18)|0xF0);
02131 *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
02132 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02133 *ptr++ = (unsigned char)((w|0x80)&0xBF);
02134 }
02135 else if(w <= 0x3FFFFFF)
02136 {
02137 *ptr++ = (unsigned char)((w>>24)|0xF8);
02138 *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
02139 *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
02140 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02141 *ptr++ = (unsigned char)((w|0x80)&0xBF);
02142 }
02143 else if(w <= 0x7FFFFFFF)
02144 {
02145 *ptr++ = (unsigned char)((w>>30)|0xFC);
02146 *ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF);
02147 *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
02148 *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
02149 *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02150 *ptr++ = (unsigned char)((w|0x80)&0xBF);
02151 }
02152 else
02153 {
02154 *ptr++ = '?';
02155 }
02156 return ptr;
02157 }
02158
02159
02160 GP<GStringRep>
02161 GStringRep::concat( const char *s1, const GP<GStringRep> &s2) const
02162 {
02163 GP<GStringRep> retval;
02164 if(s2)
02165 {
02166 retval=toThis(s2);
02167 if(s1 && s1[0])
02168 {
02169 if(retval)
02170 {
02171 retval=concat(s1,retval->data);
02172 }else
02173 {
02174 retval=strdup(s1);
02175 }
02176 }
02177 }else if(s1 && s1[0])
02178 {
02179 retval=strdup(s1);
02180 }
02181 return retval;
02182 }
02183
02184
02185
02186 GP<GStringRep>
02187 GStringRep::concat( const GP<GStringRep> &s1,const char *s2) const
02188 {
02189 GP<GStringRep> retval;
02190 if(s1)
02191 {
02192 retval=toThis(s1);
02193 if(s2 && s2[0])
02194 {
02195 if(retval)
02196 {
02197 retval=retval->append(s2);
02198 }else
02199 {
02200 retval=strdup(s2);
02201 }
02202 }
02203 }else if(s2 && s2[0])
02204 {
02205 retval=strdup(s2);
02206 }
02207 return retval;
02208 }
02209
02210 GP<GStringRep>
02211 GStringRep::concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const
02212 {
02213 GP<GStringRep> retval;
02214 if(s1)
02215 {
02216 retval=toThis(s1,s2);
02217 if(retval && s2)
02218 {
02219 retval=retval->append(toThis(s2));
02220 }
02221 }else if(s2)
02222 {
02223 retval=toThis(s2);
02224 }
02225 return retval;
02226 }
02227
02228 #ifdef WIN32
02229 static const char *setlocale_win32(void)
02230 {
02231 static const char *locale=setlocale(LC_ALL,0);
02232 if(! locale || (locale[0] == 'C' && !locale[1]))
02233 {
02234 locale=setlocale(LC_ALL,"");
02235 }
02236 return locale;
02237 }
02238 #endif
02239
02240 GStringRep::GStringRep(void)
02241 {
02242 #ifdef WIN32
02243 static const char *locale=setlocale_win32();
02244 #endif
02245 size=0;
02246 data=0;
02247 }
02248
02249 GStringRep::~GStringRep()
02250 {
02251 if(data)
02252 {
02253 data[0]=0;
02254 ::operator delete(data);
02255 }
02256 data=0;
02257 }
02258
02259 GStringRep::UTF8::UTF8(void) {}
02260
02261 GStringRep::UTF8::~UTF8() {}
02262
02263 int
02264 GStringRep::cmp(const char *s1,const int len) const
02265 {
02266 return cmp(data,s1,len);
02267 }
02268
02269 int
02270 GStringRep::cmp(const char *s1, const char *s2,const int len)
02271 {
02272 return (len
02273 ?((s1&&s1[0])
02274 ?((s2&&s2[0])
02275 ?((len>0)
02276 ?strncmp(s1,s2,len)
02277 :strcmp(s1,s2))
02278 :1)
02279 :((s2&&s2[0])?(-1):0))
02280 :0);
02281 }
02282
02283 int
02284 GStringRep::cmp(const GP<GStringRep> &s1, const GP<GStringRep> &s2,
02285 const int len )
02286 {
02287 return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len));
02288 }
02289
02290 int
02291 GStringRep::cmp(const GP<GStringRep> &s1, const char *s2,
02292 const int len )
02293 {
02294 return cmp((s1?s1->data:0),s2,len);
02295 }
02296
02297 int
02298 GStringRep::cmp(const char *s1, const GP<GStringRep> &s2,
02299 const int len )
02300 {
02301 return cmp(s1,(s2?(s2->data):0),len);
02302 }
02303
02304 int
02305 GStringRep::UTF8::cmp(const GP<GStringRep> &s2,const int len) const
02306 {
02307 int retval;
02308 if(s2)
02309 {
02310 if(s2->isNative())
02311 {
02312 GP<GStringRep> r(s2->toUTF8(true));
02313 if(r)
02314 {
02315 retval=GStringRep::cmp(data,r->data,len);
02316 }else
02317 {
02318 retval=-(s2->cmp(toNative(NOT_ESCAPED),len));
02319 }
02320 }else
02321 {
02322 retval=GStringRep::cmp(data,s2->data,len);
02323 }
02324 }else
02325 {
02326 retval=GStringRep::cmp(data,0,len);
02327 }
02328 return retval;
02329 }
02330
02331 int
02332 GStringRep::UTF8::toInt() const
02333 {
02334 int endpos;
02335 return (int)toLong(0,endpos);
02336 }
02337
02338 static inline long
02339 Cstrtol(char *data,char **edata, const int base)
02340 {
02341 GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
02342 while (data && *data==' ') data++;
02343 return strtol(data,edata,base);
02344 }
02345
02346 long
02347 GStringRep::UTF8::toLong(
02348 const int pos, int &endpos, const int base) const
02349 {
02350 char *edata=0;
02351 long retval=Cstrtol(data+pos,&edata, base);
02352 if(edata)
02353 {
02354 endpos=edata-data;
02355 }else
02356 {
02357 endpos=(-1);
02358 GP<GStringRep> ptr=ptr->strdup(data+pos);
02359 if(ptr)
02360 ptr=ptr->toNative(NOT_ESCAPED);
02361 if(ptr)
02362 {
02363 int xendpos;
02364 retval=ptr->toLong(0,xendpos,base);
02365 if(xendpos> 0)
02366 {
02367 endpos=(int)size;
02368 ptr=ptr->strdup(data+xendpos);
02369 if(ptr)
02370 {
02371 ptr=ptr->toUTF8(true);
02372 if(ptr)
02373 {
02374 endpos-=(int)(ptr->size);
02375 }
02376 }
02377 }
02378 }
02379 }
02380 return retval;
02381 }
02382
02383 static inline unsigned long
02384 Cstrtoul(char *data,char **edata, const int base)
02385 {
02386 GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
02387 while (data && *data==' ') data++;
02388 return strtoul(data,edata,base);
02389 }
02390
02391 unsigned long
02392 GStringRep::UTF8::toULong(
02393 const int pos, int &endpos, const int base) const
02394 {
02395 char *edata=0;
02396 unsigned long retval=Cstrtoul(data+pos,&edata, base);
02397 if(edata)
02398 {
02399 endpos=edata-data;
02400 }else
02401 {
02402 endpos=(-1);
02403 GP<GStringRep> ptr=ptr->strdup(data+pos);
02404 if(ptr)
02405 ptr=ptr->toNative(NOT_ESCAPED);
02406 if(ptr)
02407 {
02408 int xendpos;
02409 retval=ptr->toULong(0,xendpos,base);
02410 if(xendpos> 0)
02411 {
02412 endpos=(int)size;
02413 ptr=ptr->strdup(data+xendpos);
02414 if(ptr)
02415 {
02416 ptr=ptr->toUTF8(true);
02417 if(ptr)
02418 {
02419 endpos-=(int)(ptr->size);
02420 }
02421 }
02422 }
02423 }
02424 }
02425 return retval;
02426 }
02427
02428 static inline double
02429 Cstrtod(char *data,char **edata)
02430 {
02431 GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
02432 while (data && *data==' ') data++;
02433 return strtod(data,edata);
02434 }
02435
02436 double
02437 GStringRep::UTF8::toDouble(const int pos, int &endpos) const
02438 {
02439 char *edata=0;
02440 double retval=Cstrtod(data+pos,&edata);
02441 if(edata)
02442 {
02443 endpos=edata-data;
02444 }else
02445 {
02446 endpos=(-1);
02447 GP<GStringRep> ptr=ptr->strdup(data+pos);
02448 if(ptr)
02449 ptr=ptr->toNative(NOT_ESCAPED);
02450 if(ptr)
02451 {
02452 int xendpos;
02453 retval=ptr->toDouble(0,xendpos);
02454 if(xendpos >= 0)
02455 {
02456 endpos=(int)size;
02457 ptr=ptr->strdup(data+xendpos);
02458 if(ptr)
02459 {
02460 ptr=ptr->toUTF8(true);
02461 if(ptr)
02462 {
02463 endpos-=(int)(ptr->size);
02464 }
02465 }
02466 }
02467 }
02468 }
02469 return retval;
02470 }
02471
02472 int
02473 GStringRep::getUCS4(unsigned long &w, const int from) const
02474 {
02475 int retval;
02476 if(from>=size)
02477 {
02478 w=0;
02479 retval=size;
02480 }else if(from<0)
02481 {
02482 w=(unsigned int)(-1);
02483 retval=(-1);
02484 }else
02485 {
02486 const char *source=data+from;
02487 w=getValidUCS4(source);
02488 retval=(int)((size_t)source-(size_t)data);
02489 }
02490 return retval;
02491 }
02492
02493
02494 unsigned long
02495 GStringRep::UTF8::getValidUCS4(const char *&source) const
02496 {
02497 return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size);
02498 }
02499
02500 int
02501 GStringRep::nextNonSpace(const int from,const int len) const
02502 {
02503 return nextCharType(giswspace,from,len,true);
02504 }
02505
02506 int
02507 GStringRep::nextSpace(const int from,const int len) const
02508 {
02509 return nextCharType(giswspace,from,len,false);
02510 }
02511
02512 int
02513 GStringRep::nextChar(const int from) const
02514 {
02515 char const * xptr=data+from;
02516 (void)getValidUCS4(xptr);
02517 return (int)((size_t)xptr-(size_t)data);
02518 }
02519
02520 int
02521 GStringRep::firstEndSpace(int from,const int len) const
02522 {
02523 const int xsize=(len<0)?size:(from+len);
02524 const int ysize=(size<xsize)?size:xsize;
02525 int retval=ysize;
02526 while(from<ysize)
02527 {
02528 from=nextNonSpace(from,ysize-from);
02529 if(from < size)
02530 {
02531 const int r=nextSpace(from,ysize-from);
02532
02533
02534 if(r == from)
02535 {
02536 from++;
02537 }else
02538 {
02539 from=retval=r;
02540 }
02541 }
02542 }
02543 return retval;
02544 }
02545
02546 int
02547 GStringRep::UCS4toUTF16(
02548 const unsigned long w,unsigned short &w1, unsigned short &w2)
02549 {
02550 int retval;
02551 if(w<0x10000)
02552 {
02553 w1=(unsigned short)w;
02554 w2=0;
02555 retval=1;
02556 }else
02557 {
02558 w1=(unsigned short)((((w-0x10000)>>10)&0x3ff)+0xD800);
02559 w2=(unsigned short)((w&0x3ff)+0xDC00);
02560 retval=2;
02561 }
02562 return retval;
02563 }
02564
02565 int
02566 GStringRep::UTF16toUCS4(
02567 unsigned long &U,unsigned short const * const s,void const * const eptr)
02568 {
02569 int retval=0;
02570 U=0;
02571 unsigned short const * const r=s+1;
02572 if(r <= eptr)
02573 {
02574 unsigned long const W1=s[0];
02575 if((W1<0xD800)||(W1>0xDFFF))
02576 {
02577 if((U=W1))
02578 {
02579 retval=1;
02580 }
02581 }else if(W1<=0xDBFF)
02582 {
02583 unsigned short const * const rr=r+1;
02584 if(rr <= eptr)
02585 {
02586 unsigned long const W2=s[1];
02587 if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff))))
02588 {
02589 retval=2;
02590 }else
02591 {
02592 retval=(-1);
02593 }
02594 }
02595 }
02596 }
02597 return retval;
02598 }
02599
02600
02601
02602
02603 GUTF8String&
02604 GUTF8String::operator+= (char ch)
02605 {
02606 return init(
02607 GStringRep::UTF8::create((const char*)*this,
02608 GStringRep::UTF8::create(&ch,0,1)));
02609 }
02610
02611 GUTF8String&
02612 GUTF8String::operator+= (const char *str)
02613 {
02614 return init(GStringRep::UTF8::create(*this,str));
02615 }
02616
02617 GUTF8String&
02618 GUTF8String::operator+= (const GBaseString &str)
02619 {
02620 return init(GStringRep::UTF8::create(*this,str));
02621 }
02622
02623 GUTF8String
02624 GUTF8String::substr(int from, int len) const
02625 { return GUTF8String(*this, from, len); }
02626
02627 GUTF8String
02628 GUTF8String::operator+(const GBaseString &s2) const
02629 { return GStringRep::UTF8::create(*this,s2); }
02630
02631 GUTF8String
02632 GUTF8String::operator+(const GUTF8String &s2) const
02633 { return GStringRep::UTF8::create(*this,s2); }
02634
02635 GUTF8String
02636 GUTF8String::operator+(const char *s2) const
02637 { return GStringRep::UTF8::create(*this,s2); }
02638
02639 char *
02640 GUTF8String::getbuf(int n)
02641 {
02642 if(ptr)
02643 init((*this)->getbuf(n));
02644 else if(n>0)
02645 init(GStringRep::UTF8::create(n));
02646 else
02647 init(0);
02648 return ptr?((*this)->data):0;
02649 }
02650
02651 void
02652 GUTF8String::setat(const int n, const char ch)
02653 {
02654 if((!n)&&(!ptr))
02655 {
02656 init(GStringRep::UTF8::create(&ch,0,1));
02657 }else
02658 {
02659 init((*this)->setat(CheckSubscript(n),ch));
02660 }
02661 }
02662
02663 GP<GStringRep>
02664 GStringRep::UTF8ToNative( const char *s, const EscapeMode escape )
02665 {
02666 return GStringRep::UTF8::create(s)->toNative(escape);
02667 }
02668
02669 GUTF8String::GUTF8String(const char dat)
02670 { init(GStringRep::UTF8::create(&dat,0,1)); }
02671
02672 GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args)
02673 {
02674 if (fmt.ptr)
02675 init(fmt->vformat(args));
02676 else
02677 init(fmt);
02678 }
02679
02680 GUTF8String::GUTF8String(const char *str)
02681 { init(GStringRep::UTF8::create(str)); }
02682
02683 GUTF8String::GUTF8String(const unsigned char *str)
02684 { init(GStringRep::UTF8::create((const char *)str)); }
02685
02686 GUTF8String::GUTF8String(const unsigned short *str)
02687 { init(GStringRep::UTF8::create(str,0,-1)); }
02688
02689 GUTF8String::GUTF8String(const unsigned long *str)
02690 { init(GStringRep::UTF8::create(str,0,-1)); }
02691
02692 GUTF8String::GUTF8String(const char *dat, unsigned int len)
02693 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
02694
02695 GUTF8String::GUTF8String(const unsigned short *dat, unsigned int len)
02696 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
02697
02698 GUTF8String::GUTF8String(const unsigned long *dat, unsigned int len)
02699 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
02700
02701 GUTF8String::GUTF8String(const GBaseString &gs, int from, int len)
02702 { init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); }
02703
02704 GUTF8String::GUTF8String(const int number)
02705 { init(GStringRep::UTF8::create_format("%d",number)); }
02706
02707 GUTF8String::GUTF8String(const double number)
02708 { init(GStringRep::UTF8::create_format("%f",number)); }
02709
02710 GUTF8String& GUTF8String::operator= (const char str)
02711 { return init(GStringRep::UTF8::create(&str,0,1)); }
02712
02713 GUTF8String& GUTF8String::operator= (const char *str)
02714 { return init(GStringRep::UTF8::create(str)); }
02715
02716 GUTF8String GBaseString::operator+(const GUTF8String &s2) const
02717 { return GStringRep::UTF8::create(*this,s2); }
02718
02719 #if HAS_WCHAR
02720 GUTF8String
02721 GNativeString::operator+(const GUTF8String &s2) const
02722 {
02723 if (ptr)
02724 return GStringRep::UTF8::create((*this)->toUTF8(true),s2);
02725 else
02726 return GStringRep::UTF8::create((*this),s2);
02727 }
02728 #endif
02729
02730 GUTF8String
02731 GUTF8String::operator+(const GNativeString &s2) const
02732 {
02733 GP<GStringRep> g = s2;
02734 if (s2.ptr)
02735 g = s2->toUTF8(true);
02736 return GStringRep::UTF8::create(*this,g);
02737 }
02738
02739 GUTF8String
02740 operator+(const char *s1, const GUTF8String &s2)
02741 { return GStringRep::UTF8::create(s1,s2); }
02742
02743 #if HAS_WCHAR
02744 GNativeString
02745 operator+(const char *s1, const GNativeString &s2)
02746 { return GStringRep::Native::create(s1,s2); }
02747
02748 GNativeString&
02749 GNativeString::operator+= (char ch)
02750 {
02751 char s[2]; s[0]=ch; s[1]=0;
02752 return init(GStringRep::Native::create((const char*)*this, s));
02753 }
02754
02755 GNativeString&
02756 GNativeString::operator+= (const char *str)
02757 {
02758 return init(GStringRep::Native::create(*this,str));
02759 }
02760
02761 GNativeString&
02762 GNativeString::operator+= (const GBaseString &str)
02763 {
02764 return init(GStringRep::Native::create(*this,str));
02765 }
02766
02767 GNativeString
02768 GNativeString::operator+(const GBaseString &s2) const
02769 { return GStringRep::Native::create(*this,s2); }
02770
02771 GNativeString
02772 GNativeString::operator+(const GNativeString &s2) const
02773 { return GStringRep::Native::create(*this,s2); }
02774
02775 GNativeString
02776 GNativeString::operator+(const char *s2) const
02777 { return GStringRep::Native::create(*this,s2); }
02778
02779 char *
02780 GNativeString::getbuf(int n)
02781 {
02782 if(ptr)
02783 init((*this)->getbuf(n));
02784 else if(n>0)
02785 init(GStringRep::Native::create(n));
02786 else
02787 init(0);
02788 return ptr?((*this)->data):0;
02789 }
02790
02791 void
02792 GNativeString::setat(const int n, const char ch)
02793 {
02794 if((!n)&&(!ptr))
02795 {
02796 init(GStringRep::Native::create(&ch,0,1));
02797 }else
02798 {
02799 init((*this)->setat(CheckSubscript(n),ch));
02800 }
02801 }
02802
02803 #endif
02804
02805
02806 #ifdef HAVE_NAMESPACES
02807 }
02808 # ifndef NOT_USING_DJVU_NAMESPACE
02809 using namespace DJVU;
02810 # endif
02811 #endif