• Skip to content
  • Skip to link menu
KDE 3.5 API Reference
  • KDE API Reference
  • API Reference
  • Sitemap
  • Contact Us
 

kviewshell

GString.cpp

Go to the documentation of this file.
00001 //C-  -*- C++ -*-
00002 //C- -------------------------------------------------------------------
00003 //C- DjVuLibre-3.5
00004 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
00005 //C- Copyright (c) 2001  AT&T
00006 //C-
00007 //C- This software is subject to, and may be distributed under, the
00008 //C- GNU General Public License, Version 2. The license should have
00009 //C- accompanied the software or you may obtain a copy of the license
00010 //C- from the Free Software Foundation at http://www.fsf.org .
00011 //C-
00012 //C- This program is distributed in the hope that it will be useful,
00013 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 //C- GNU General Public License for more details.
00016 //C- 
00017 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
00018 //C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech 
00019 //C- Software authorized us to replace the original DjVu(r) Reference 
00020 //C- Library notice by the following text (see doc/lizard2002.djvu):
00021 //C-
00022 //C-  ------------------------------------------------------------------
00023 //C- | DjVu (r) Reference Library (v. 3.5)
00024 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
00025 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
00026 //C- | 6,058,214 and patents pending.
00027 //C- |
00028 //C- | This software is subject to, and may be distributed under, the
00029 //C- | GNU General Public License, Version 2. The license should have
00030 //C- | accompanied the software or you may obtain a copy of the license
00031 //C- | from the Free Software Foundation at http://www.fsf.org .
00032 //C- |
00033 //C- | The computer code originally released by LizardTech under this
00034 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
00035 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
00036 //C- | claims, LizardTech grants recipient a worldwide, royalty-free, 
00037 //C- | non-exclusive license to make, use, sell, or otherwise dispose of 
00038 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the 
00039 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU 
00040 //C- | General Public License.   This grant only confers the right to 
00041 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to 
00042 //C- | the extent such infringement is reasonably necessary to enable 
00043 //C- | recipient to make, have made, practice, sell, or otherwise dispose 
00044 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to 
00045 //C- | any greater extent that may be necessary to utilize further 
00046 //C- | modifications or combinations.
00047 //C- |
00048 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
00049 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
00050 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
00051 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
00052 //C- +------------------------------------------------------------------
00053 // 
00054 // $Id: GString.cpp,v 1.22 2005/04/27 16:34:13 leonb Exp $
00055 // $Name: release_3_5_15 $
00056 
00057 // From: Leon Bottou, 1/31/2002
00058 // This file has very little to do with my initial implementation.
00059 // It has been practically rewritten by Lizardtech for i18n changes.
00060 // My original implementation was very small in comparison
00061 // <http://prdownloads.sourceforge.net/djvu/DjVu2_2b-src.tgz>.
00062 // In my opinion, the duplication of the string classes is a failed
00063 // attempt to use the type system to enforce coding policies.
00064 // This could be fixed.  But there are better things to do in djvulibre.
00065 
00066 #ifdef HAVE_CONFIG_H
00067 # include "config.h"
00068 #endif
00069 #if NEED_GNUG_PRAGMAS
00070 # pragma implementation
00071 #endif
00072 
00073 #include "GString.h"
00074 #include "GThreads.h"
00075 #include "debug.h"
00076 
00077 #include <stdlib.h>
00078 #include <stdio.h>
00079 #include <string.h>
00080 #if HAS_WCHAR
00081 # include <locale.h>
00082 # if !defined(AUTOCONF) || HAVE_WCHAR_H
00083 #  include <wchar.h>
00084 # endif
00085 # if HAS_WCTYPE
00086 #  include <wctype.h>
00087 # endif
00088 #endif
00089 #include <ctype.h>
00090 
00091 #ifndef DO_CHANGELOCALE
00092 #define DO_CHANGELOCALE 1
00093 #ifdef UNIX
00094 #if THREADMODEL != COTHREADS
00095 #if THREADMODEL != NOTHREADS
00096 #undef DO_CHANGELOCALE
00097 #define DO_CHANGELOCALE 0
00098 #endif
00099 #endif 
00100 #endif
00101 #endif
00102 
00103 
00104 #ifdef HAVE_NAMESPACES
00105 namespace DJVU {
00106 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
00107 }
00108 #endif
00109 #endif
00110 
00111 
00112 GBaseString::~GBaseString() {}
00113 GNativeString::~GNativeString() {}
00114 GUTF8String::~GUTF8String() {}
00115 
00116 #if !HAS_MBSTATE && HAS_WCHAR
00117 // Under some systems, wctomb() and mbtowc() are not thread
00118 // safe.  In those cases, wcrtomb and mbrtowc are preferred.
00119 // For Solaris, wctomb() and mbtowc() are thread safe, and 
00120 // wcrtomb() and mbrtowc() don't exist.
00121 
00122 #define wcrtomb MYwcrtomb
00123 #define mbrtowc MYmbrtowc
00124 #define mbrlen  MYmbrlen
00125 
00126 static inline int
00127 wcrtomb(char *bytes,wchar_t w,mbstate_t *)
00128 {
00129   return wctomb(bytes,w);
00130 }
00131 
00132 static inline int
00133 mbrtowc(wchar_t *w,const char *source, size_t n, mbstate_t *)
00134 {
00135   return mbtowc(w,source,n);
00136 }
00137 
00138 static inline size_t
00139 mbrlen(const char *s, size_t n, mbstate_t *)
00140 {
00141   return mblen(s,n);
00142 }
00143 #endif // !HAS_MBSTATE || HAS_WCHAR
00144 
00145 
00146 GP<GStringRep>
00147 GStringRep::upcase(void) const
00148 { return tocase(giswupper,gtowupper); }
00149 
00150 GP<GStringRep>
00151 GStringRep::downcase(void) const
00152 { return tocase(giswlower,gtowlower); }
00153 
00154 GP<GStringRep>
00155 GStringRep::UTF8::create(const unsigned int sz)
00156 {
00157   return GStringRep::create(sz,(GStringRep::UTF8 *)0);
00158 }
00159 
00160 GP<GStringRep>
00161 GStringRep::UTF8::create(const char *s)
00162 {
00163   GStringRep::UTF8 dummy;
00164   return dummy.strdup(s);
00165 }
00166 
00167 GP<GStringRep> 
00168 GStringRep::UTF8::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
00169 {
00170   GStringRep::UTF8 dummy;
00171   return dummy.concat(s1,s2);
00172 }
00173 
00174 GP<GStringRep> 
00175 GStringRep::UTF8::create( const GP<GStringRep> &s1,const char *s2)
00176 {
00177   GStringRep::UTF8 dummy;
00178   return dummy.concat(s1,s2);
00179 }
00180 
00181 GP<GStringRep> 
00182 GStringRep::UTF8::create( const char *s1, const GP<GStringRep> &s2)
00183 {
00184   GStringRep::UTF8 dummy;
00185   return dummy.concat(s1,s2);
00186 }
00187 
00188 GP<GStringRep> 
00189 GStringRep::UTF8::create( const char *s1,const char *s2)
00190 { 
00191   GStringRep::UTF8 dummy;
00192   return dummy.concat(s1,s2);
00193 }
00194 
00195 GP<GStringRep> 
00196 GStringRep::UTF8::create(const char *s,const int start,const int length)
00197 {
00198   GStringRep::UTF8 dummy;
00199   return dummy.substr(s,start,length);
00200 }
00201 
00202 GP<GStringRep> 
00203 GStringRep::UTF8::create(
00204   const unsigned short *s,const int start,const int length)
00205 {
00206   GStringRep::UTF8 dummy;
00207   return dummy.substr(s,start,length);
00208 }
00209 
00210 GP<GStringRep>
00211 GStringRep::UTF8::create(
00212   const unsigned long *s,const int start,const int length)
00213 {
00214   GStringRep::UTF8 dummy;
00215   return dummy.substr(s,start,length);
00216 }
00217 
00218 GP<GStringRep> 
00219 GStringRep::UTF8::blank(const unsigned int sz) const
00220 {
00221    return GStringRep::create(sz,(GStringRep::UTF8 *)0);
00222 }
00223 
00224 bool
00225 GStringRep::UTF8::isUTF8(void) const
00226 {
00227   return true;
00228 }
00229 
00230 GP<GStringRep> 
00231 GStringRep::UTF8::toThis(
00232     const GP<GStringRep> &rep,const GP<GStringRep> &) const
00233 {
00234   return rep?(rep->toUTF8(true)):rep;
00235 }
00236 
00237 GP<GStringRep> 
00238 GStringRep::UTF8::create(const char fmt[],va_list& args)
00239 { 
00240   const GP<GStringRep> s(create(fmt));
00241   return (s?(s->vformat(args)):s);
00242 }
00243 
00244 #if !HAS_WCHAR
00245 
00246 #define NATIVE_CREATE(x) UTF8::create( x );
00247 
00248 #ifdef LC_ALL
00249 #undef LC_ALL
00250 #endif
00251 #define LC_ALL 0
00252 
00253 class GStringRep::ChangeLocale
00254 {
00255 public:
00256   ChangeLocale(const int,const char *) {}
00257   ~ChangeLocale() {};
00258 };
00259 
00260 GP<GStringRep>
00261 GStringRep::NativeToUTF8( const char *s )
00262 {
00263   return GStringRep::UTF8::create(s);
00264 }
00265 
00266 #else
00267 
00268 #define NATIVE_CREATE(x) Native::create( x );
00269 
00270 // The declaration and implementation of GStringRep::ChangeLocale
00271 // Not used in WinCE
00272 
00273 class GStringRep::ChangeLocale
00274 {
00275 public:
00276   ChangeLocale(const int category,const char locale[]);
00277   ~ChangeLocale();
00278 private:
00279   GUTF8String locale;
00280   int category;
00281 };
00282 
00283 class GStringRep::Native : public GStringRep
00284 {
00285 public:
00286   // default constructor
00287   Native(void);
00288   // virtual destructor
00289   virtual ~Native();
00290 
00291     // Other virtual methods.
00292       // Create an empty string.
00293   virtual GP<GStringRep> blank(const unsigned int sz = 0) const;
00294       // Append a string.
00295   virtual GP<GStringRep> append(const GP<GStringRep> &s2) const;
00296       // Test if Native.
00297   virtual bool isNative(void) const;
00298       // Convert to Native.
00299   virtual GP<GStringRep> toNative(
00300     const EscapeMode escape=UNKNOWN_ESCAPED) const;
00301       // Convert to UTF8.
00302   virtual GP<GStringRep> toUTF8(const bool nothrow=false) const;
00303       // Convert to UTF8.
00304   virtual GP<GStringRep> toThis(
00305      const GP<GStringRep> &rep,const GP<GStringRep> &) const;
00306       // Compare with #s2#.
00307   virtual int cmp(const GP<GStringRep> &s2, const int len=(-1)) const;
00308 
00309   // Convert strings to numbers.
00310   virtual int toInt(void) const;
00311   virtual long toLong(
00312     const int pos, int &endpos, const int base=10) const;
00313   virtual unsigned long toULong(
00314     const int pos, int &endpos, const int base=10) const;
00315   virtual double toDouble(
00316     const int pos, int &endpos) const;
00317 
00318     // Create an empty string
00319   static GP<GStringRep> create(const unsigned int sz = 0);
00320 
00321     // Create a strdup string.
00322   static GP<GStringRep> create(const char *s);
00323 
00324   // Creates by appending to the current string
00325 
00326    // Creates with a concat operation.
00327   static GP<GStringRep> create(
00328     const GP<GStringRep> &s1,const GP<GStringRep> &s2);
00329   static GP<GStringRep> create( const GP<GStringRep> &s1,const char *s2);
00330   static GP<GStringRep> create( const char *s1, const GP<GStringRep> &s2);
00331   static GP<GStringRep> create(const char *s1,const char *s2);
00332 
00333     // Create with a strdup and substr operation.
00334   static GP<GStringRep> create(
00335     const char *s,const int start,const int length=(-1));
00336   static GP<GStringRep> create(
00337     const unsigned short *s,const int start,const int length=(-1));
00338   static GP<GStringRep> create(
00339     const unsigned long *s,const int start,const int length=(-1));
00340 
00341     // Create with an sprintf()
00342   static GP<GStringRep> create_format(const char fmt[],...);
00343   static GP<GStringRep> create(const char fmt[],va_list &args);
00344 
00345   virtual unsigned char *UCS4toString(
00346     const unsigned long w,unsigned char *ptr, mbstate_t *ps=0) const;
00347 
00348   // Tests if a string is legally encoded in the current character set.
00349   virtual bool is_valid(void) const;
00350 
00351   virtual int ncopy(wchar_t * const buf, const int buflen) const;
00352 
00353   friend class GBaseString;
00354 protected:
00355   // Return the next character and increment the source pointer.
00356   virtual unsigned long getValidUCS4(const char *&source) const;
00357 };
00358 
00359 GP<GStringRep>
00360 GStringRep::Native::create(const unsigned int sz)
00361 {
00362   return GStringRep::create(sz,(GStringRep::Native *)0);
00363 }
00364 
00365     // Create a strdup string.
00366 GP<GStringRep>
00367 GStringRep::Native::create(const char *s)
00368 {
00369   GStringRep::Native dummy;
00370   return dummy.strdup(s);
00371 }
00372 
00373 GP<GStringRep>
00374 GStringRep::Native::create(const GP<GStringRep> &s1,const GP<GStringRep> &s2)
00375 {
00376   GStringRep::Native dummy;
00377   return dummy.concat(s1,s2);
00378 }
00379 
00380 GP<GStringRep> 
00381 GStringRep::Native::create( const GP<GStringRep> &s1,const char *s2)
00382 {
00383   GStringRep::Native dummy;
00384   return dummy.concat(s1,s2);
00385 }
00386 
00387 GP<GStringRep>
00388 GStringRep::Native::create( const char *s1, const GP<GStringRep> &s2)
00389 {
00390   GStringRep::Native dummy;
00391   return dummy.concat(s1,s2);
00392 }
00393 
00394 GP<GStringRep>
00395 GStringRep::Native::create(const char *s1,const char *s2)
00396 {
00397   GStringRep::Native dummy;
00398   return dummy.concat(s1,s2);
00399 }
00400 
00401 GP<GStringRep>
00402 GStringRep::Native::create(
00403   const char *s,const int start,const int length)
00404 {
00405   GStringRep::Native dummy;
00406   return dummy.substr(s,start,length);
00407 }
00408 
00409 GP<GStringRep>
00410 GStringRep::Native::create(
00411     const unsigned short *s,const int start,const int length)
00412 {
00413   GStringRep::Native dummy;
00414   return dummy.substr(s,start,length);
00415 }
00416 
00417 GP<GStringRep>
00418 GStringRep::Native::create(
00419   const unsigned long *s,const int start,const int length)
00420 {
00421   GStringRep::Native dummy;
00422   return dummy.substr(s,start,length);
00423 }
00424 
00425 GP<GStringRep> 
00426 GStringRep::Native::blank(const unsigned int sz) const
00427 {
00428   return GStringRep::create(sz,(GStringRep::Native *)0);
00429 }
00430 
00431 bool
00432 GStringRep::Native::isNative(void) const
00433 {
00434   return true;
00435 }
00436 
00437 GP<GStringRep>
00438 GStringRep::Native::toThis(
00439      const GP<GStringRep> &rep,const GP<GStringRep> &) const
00440 {
00441   return rep?(rep->toNative(NOT_ESCAPED)):rep;
00442 }
00443 
00444 GP<GStringRep> 
00445 GStringRep::Native::create(const char fmt[],va_list &args)
00446 { 
00447   const GP<GStringRep> s(create(fmt));
00448   return (s?(s->vformat(args)):s);
00449 }
00450 
00451 int
00452 GStringRep::Native::ncopy(
00453   wchar_t * const buf, const int buflen ) const
00454 {
00455   return toUTF8()->ncopy(buf,buflen);
00456 }
00457 
00458 GStringRep::ChangeLocale::ChangeLocale(const int xcategory, const char xlocale[] )
00459   : category(xcategory)
00460 {
00461 #if DO_CHANGELOCALE
00462   // This is disabled under UNIX because 
00463   // it does not play nice with MT.
00464   if(xlocale)
00465     {
00466       locale=setlocale(xcategory,0);
00467       if(locale.length() &&(locale!=xlocale))
00468         {
00469           if(locale == setlocale(category,xlocale))
00470             {
00471               locale.empty();
00472             }
00473         }
00474       else
00475         {
00476           locale.empty();
00477         }
00478     }
00479 #endif
00480 }
00481 
00482 GStringRep::ChangeLocale::~ChangeLocale()
00483 {
00484 #if DO_CHANGELOCALE
00485   if(locale.length())
00486     {
00487       setlocale(category,(const char *)locale);
00488     }
00489 #endif
00490 }
00491 
00492 GNativeString &
00493 GNativeString::format(const char fmt[], ... )
00494 {
00495   va_list args;
00496   va_start(args, fmt);
00497   return init(GStringRep::Native::create(fmt,args));
00498 }
00499 
00500 // Gather the native implementations here. Not used in WinCE.
00501 
00502 GStringRep::Native::Native(void) {}
00503 GStringRep::Native::~Native() {}
00504 
00505 GP<GStringRep>
00506 GStringRep::Native::append(const GP<GStringRep> &s2) const
00507 {
00508   GP<GStringRep> retval;
00509   if(s2)
00510   {
00511     if(s2->isUTF8())
00512     {
00513       G_THROW( ERR_MSG("GStringRep.appendUTF8toNative") );
00514     }
00515     retval=concat(data,s2->data);
00516   }else
00517   {
00518     retval=const_cast<GStringRep::Native *>(this); 
00519   }
00520   return retval;
00521 }
00522 
00523 GP<GStringRep>
00524 GStringRep::Native::create_format(const char fmt[],...)
00525 {
00526   va_list args;
00527   va_start(args, fmt);
00528   return create(fmt,args);
00529 }
00530 
00531 unsigned char *
00532 GStringRep::Native::UCS4toString(
00533   const unsigned long w0,unsigned char *ptr, mbstate_t *ps) const
00534 {
00535   return UCS4toNative(w0,ptr,ps);
00536 }
00537 
00538 // Convert a UCS4 to a multibyte string in the value bytes.  
00539 // The data pointed to by ptr should be long enough to contain
00540 // the results with a nill termination.  (Normally 7 characters
00541 // is enough.)
00542 unsigned char *
00543 GStringRep::UCS4toNative(
00544   const unsigned long w0,unsigned char *ptr, mbstate_t *ps)
00545 {
00546   unsigned short w1;
00547   unsigned short w2=1;
00548   for(int count=(sizeof(wchar_t)==sizeof(w1)) ? UCS4toUTF16(w0,w1,w2) : 1;
00549       count;
00550       --count,w1=w2)
00551   {
00552     // wchar_t can be either UCS4 or UCS2
00553     const wchar_t w=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
00554     int i=wcrtomb((char *)ptr,w,ps);
00555     if(i<0)
00556     {
00557       break;
00558     }
00559     ptr[i]=0;
00560     ptr += i;
00561   }
00562   ptr[0]=0;
00563   return ptr;
00564 }
00565 
00566 GP<GStringRep>
00567 GStringRep::Native::toNative(const EscapeMode escape) const
00568 {
00569   if(escape == UNKNOWN_ESCAPED)
00570     G_THROW( ERR_MSG("GStringRep.NativeToNative") );
00571   return const_cast<GStringRep::Native *>(this);
00572 }
00573 
00574 GP<GStringRep>
00575 GStringRep::Native::toUTF8(const bool) const
00576 {
00577   unsigned char *buf;
00578   GPBuffer<unsigned char> gbuf(buf,size*6+1);
00579   buf[0]=0;
00580   if(data && size)
00581   {
00582     size_t n=size;
00583     const char *source=data;
00584     mbstate_t ps;
00585     unsigned char *ptr=buf;
00586     //(void)mbrlen(source, n, &ps);
00587     memset(&ps,0,sizeof(mbstate_t));
00588     int i=0;
00589     if(sizeof(wchar_t) == sizeof(unsigned long))
00590       {
00591         wchar_t w = 0;
00592         for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0); n-=i,source+=i)
00593           {
00594             ptr=UCS4toUTF8(w,ptr);
00595           }
00596       }
00597     else
00598       { 
00599         wchar_t w = 0;
00600         for(;(n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0);n-=i,source+=i)
00601           {
00602             unsigned short s[2];
00603             s[0]=w;
00604             unsigned long w0;
00605             if(UTF16toUCS4(w0,s,s+1)<=0)
00606               {
00607                 source+=i;
00608                 n-=i;
00609                 if((n>0)&&((i=mbrtowc(&w,source,n,&ps))>=0))
00610                   {
00611                     s[1]=w;
00612                     if(UTF16toUCS4(w0,s,s+2)<=0)
00613                       {
00614                         i=(-1);
00615                         break;
00616                       }
00617                   }
00618                 else
00619                   {
00620                     i=(-1);
00621                     break;
00622                   }
00623               }
00624             ptr=UCS4toUTF8(w0,ptr);
00625           }
00626       }
00627     if(i<0)
00628       {
00629         gbuf.resize(0);
00630       }
00631     else
00632       {
00633         ptr[0]=0;
00634       }
00635   }
00636   return GStringRep::UTF8::create((const char *)buf);
00637 }
00638 
00639 GNativeString
00640 GBaseString::UTF8ToNative(
00641   const bool currentlocale,const EscapeMode escape) const
00642 {
00643   const char *source=(*this);
00644   GP<GStringRep> retval;
00645   if(source && source[0]) 
00646   {
00647 #if DO_CHANGELOCALE
00648     GUTF8String lc_ctype(setlocale(LC_CTYPE,0));
00649 #endif
00650     bool repeat;
00651     for(repeat=!currentlocale;;repeat=false)
00652     {
00653       retval=(*this)->toNative((GStringRep::EscapeMode)escape);
00654 #if DO_CHANGELOCALE
00655       if (!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
00656 #endif
00657         break;
00658     }
00659 #if DO_CHANGELOCALE
00660     if(!repeat)
00661       {
00662         setlocale(LC_CTYPE,(const char *)lc_ctype);
00663       }
00664 #endif
00665   }
00666   return GNativeString(retval);
00667 }
00668 
00669 /*MBCS*/
00670 GNativeString
00671 GBaseString::getUTF82Native( const EscapeMode escape ) const
00672 { //MBCS cvt
00673   GNativeString retval;
00674 
00675   // We don't want to convert this if it 
00676   // already is known to be native...
00677 //  if (isNative()) return *this;
00678 
00679   const size_t slen=length()+1;
00680   if(slen>1)
00681   {
00682     retval=UTF8ToNative(false,escape) ;
00683     if(!retval.length())
00684     {
00685       retval=(const char*)*this;
00686     }
00687   }
00688   return retval;
00689 }
00690 
00691 GUTF8String
00692 GBaseString::NativeToUTF8(void) const
00693 {
00694   GP<GStringRep> retval;
00695   if(length())
00696   {
00697     const char *source=(*this);
00698 #if DO_CHANGELOCALE
00699     GUTF8String lc_ctype=setlocale(LC_CTYPE,0);
00700 #endif
00701     bool repeat;
00702     for(repeat=true;;repeat=false)
00703     {
00704       if( (retval=GStringRep::NativeToUTF8(source)) )
00705       {
00706         if(GStringRep::cmp(retval->toNative(),source))
00707         {
00708           retval=GStringRep::UTF8::create((unsigned int)0);
00709         }
00710       }
00711 #if DO_CHANGELOCALE
00712       if(!repeat || retval || (lc_ctype == setlocale(LC_CTYPE,"")))
00713 #endif
00714         break;
00715     }
00716 #if DO_CHANGELOCALE
00717     if(!repeat)
00718     {
00719       setlocale(LC_CTYPE,(const char *)lc_ctype);
00720     }
00721 #endif
00722   }
00723   return GUTF8String(retval);
00724 }
00725 
00726 GUTF8String
00727 GBaseString::getNative2UTF8(void) const
00728 { //MBCS cvt
00729 
00730    // We don't want to do a transform this
00731    // if we already are in the given type.
00732 //   if (isUTF8()) return *this;
00733    
00734   const size_t slen=length()+1;
00735   GUTF8String retval;
00736   if(slen > 1)
00737   {
00738     retval=NativeToUTF8();
00739     if(!retval.length())
00740     {
00741       retval=(const char *)(*this);
00742     }
00743   }
00744   return retval;
00745 } /*MBCS*/
00746 
00747 int
00748 GStringRep::Native::cmp(const GP<GStringRep> &s2,const int len) const
00749 {
00750   int retval;
00751   if(s2)
00752   {
00753     if(s2->isUTF8())
00754     {
00755       const GP<GStringRep> r(toUTF8(true));
00756       if(r)
00757       {
00758         retval=GStringRep::cmp(r->data,s2->data,len);
00759       }else
00760       {
00761         retval=cmp(s2->toNative(NOT_ESCAPED),len);
00762       }
00763     }else
00764     {
00765       retval=GStringRep::cmp(data,s2->data,len);
00766     }
00767   }else
00768   {
00769     retval=GStringRep::cmp(data,0,len);
00770   }
00771   return retval;
00772 }
00773 
00774 int
00775 GStringRep::Native::toInt() const
00776 {
00777   return atoi(data);
00778 }
00779 
00780 long
00781 GStringRep::Native::toLong(
00782   const int pos, int &endpos, const int base) const
00783 {
00784    char *edata=0;
00785    const long retval=strtol(data+pos, &edata, base);
00786    if(edata)
00787    {
00788      endpos=(int)((size_t)edata-(size_t)data);
00789    }else
00790    {
00791      endpos=(-1);
00792    }
00793    return retval;
00794 }
00795 
00796 unsigned long
00797 GStringRep::Native::toULong(
00798   const int pos, int &endpos, const int base) const
00799 {
00800    char *edata=0;
00801    const unsigned long retval=strtoul(data+pos, &edata, base);
00802    if(edata)
00803    {
00804      endpos=(int)((size_t)edata-(size_t)data);
00805    }else
00806    {
00807      endpos=(-1);
00808    }
00809    return retval;
00810 }
00811 
00812 double
00813 GStringRep::Native::toDouble(
00814   const int pos, int &endpos) const
00815 {
00816    char *edata=0;
00817    const double retval=strtod(data+pos, &edata);
00818    if(edata)
00819    {
00820      endpos=(int)((size_t)edata-(size_t)data);
00821    }else
00822    {
00823      endpos=(-1);
00824    }
00825    return retval;
00826 }
00827 
00828 unsigned long
00829 GStringRep::Native::getValidUCS4(const char *&source) const
00830 {
00831   unsigned long retval=0;
00832   int n=(int)((size_t)size+(size_t)data-(size_t)source);
00833   if(source && (n > 0))
00834   {
00835     mbstate_t ps;
00836     //(void)mbrlen(source, n, &ps);
00837     memset(&ps,0,sizeof(mbstate_t));
00838     wchar_t wt;
00839     const int len=mbrtowc(&wt,source,n,&ps); 
00840     if(len>=0)
00841     {
00842       if(sizeof(wchar_t) == sizeof(unsigned short))
00843       {
00844         source+=len;
00845         unsigned short s[2];
00846         s[0]=(unsigned short)wt;
00847         if(UTF16toUCS4(retval,s,s+1)<=0)
00848         {
00849           if((n-=len)>0)
00850           {
00851             const int len=mbrtowc(&wt,source,n,&ps);
00852             if(len>=0)
00853             {
00854               s[1]=(unsigned short)wt;
00855               unsigned long w;
00856               if(UTF16toUCS4(w,s,s+2)>0)
00857               {
00858                 source+=len;
00859                 retval=w;
00860               }
00861             }
00862           }
00863         }
00864       }else
00865       {
00866         retval=(unsigned long)wt;
00867         source++;
00868       } 
00869     }else
00870     {
00871       source++;
00872     }
00873   }
00874   return retval;
00875 }
00876 
00877 // Tests if a string is legally encoded in the current character set.
00878 bool 
00879 GStringRep::Native::is_valid(void) const
00880 {
00881   bool retval=true;
00882   if(data && size)
00883   {
00884     size_t n=size;
00885     const char *s=data;
00886     mbstate_t ps;
00887     //(void)mbrlen(s, n, &ps);
00888     memset(&ps,0,sizeof(mbstate_t));
00889     do
00890     {
00891       size_t m=mbrlen(s,n,&ps);
00892       if(m > n)
00893       {
00894         retval=false;
00895         break;
00896       }else if(m)
00897       {
00898         s+=m;
00899         n-=m;
00900       }else
00901       {
00902         break;
00903       }
00904     } while(n);
00905   }
00906   return retval;
00907 }
00908 
00909 // These are dummy functions.
00910 void 
00911 GStringRep::set_remainder(void const * const, const unsigned int,
00912   const EncodeType) {}
00913 void 
00914 GStringRep::set_remainder(void const * const, const unsigned int,
00915   const GP<GStringRep> &encoding) {}
00916 void
00917 GStringRep::set_remainder( const GP<GStringRep::Unicode> &) {}
00918 
00919 GP<GStringRep::Unicode>
00920 GStringRep::get_remainder( void ) const
00921 {
00922   return 0;
00923 }
00924 
00925 GNativeString::GNativeString(const char dat)
00926 {
00927   init(GStringRep::Native::create(&dat,0,1));
00928 }
00929 
00930 GNativeString::GNativeString(const char *str)
00931 {
00932   init(GStringRep::Native::create(str));
00933 }
00934 
00935 GNativeString::GNativeString(const unsigned char *str)
00936 {
00937   init(GStringRep::Native::create((const char *)str));
00938 }
00939 
00940 GNativeString::GNativeString(const unsigned short *str)
00941 {
00942   init(GStringRep::Native::create(str,0,-1));
00943 }
00944 
00945 GNativeString::GNativeString(const unsigned long *str)
00946 {
00947   init(GStringRep::Native::create(str,0,-1));
00948 }
00949 
00950 GNativeString::GNativeString(const char *dat, unsigned int len)
00951 {
00952   init(
00953     GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
00954 }
00955 
00956 GNativeString::GNativeString(const unsigned short *dat, unsigned int len)
00957 {
00958   init(
00959     GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
00960 }
00961 
00962 GNativeString::GNativeString(const unsigned long *dat, unsigned int len)
00963 {
00964   init(
00965     GStringRep::Native::create(dat,0,((int)len<0)?(-1):(int)len));
00966 }
00967 
00968 GNativeString::GNativeString(const GNativeString &str)
00969 {
00970   init(str);
00971 }
00972 
00973 GNativeString::GNativeString(const GBaseString &gs, int from, int len)
00974 {
00975   init(
00976     GStringRep::Native::create(gs,from,((int)len<0)?(-1):(int)len));
00977 }
00978 
00979 GNativeString::GNativeString(const int number)
00980 {
00981   init(GStringRep::Native::create_format("%d",number));
00982 }
00983 
00984 GNativeString::GNativeString(const double number)
00985 {
00986   init(GStringRep::Native::create_format("%f",number));
00987 }
00988 
00989 GNativeString&
00990 GNativeString::operator= (const char str)
00991 { return init(GStringRep::Native::create(&str,0,1)); }
00992 
00993 GNativeString&
00994 GNativeString::operator= (const char *str)
00995 { return init(GStringRep::Native::create(str)); }
00996 
00997 GNativeString
00998 GBaseString::operator+(const GNativeString &s2) const
00999 {
01000   return GStringRep::Native::create(*this,s2);
01001 }
01002 
01003 GP<GStringRep>
01004 GStringRep::NativeToUTF8( const char *s )
01005 {
01006   return GStringRep::Native::create(s)->toUTF8();
01007 }
01008 
01009 #endif // HAS_WCHAR
01010 
01011 template <class TYPE>
01012 GP<GStringRep>
01013 GStringRep::create(const unsigned int sz, TYPE *)
01014 {
01015   GP<GStringRep> gaddr;
01016   if (sz > 0)
01017   {
01018     GStringRep *addr;
01019     gaddr=(addr=new TYPE);
01020     addr->data=(char *)(::operator new(sz+1));
01021     addr->size = sz;
01022     addr->data[sz] = 0;
01023   }
01024   return gaddr;
01025 }
01026 
01027 GP<GStringRep>
01028 GStringRep::strdup(const char *s) const
01029 {
01030   GP<GStringRep> retval;
01031   const int length=s?strlen(s):0;
01032   if(length>0)
01033   {
01034     retval=blank(length);
01035     char const * const end=s+length;
01036     char *ptr=retval->data;
01037     for(;*s&&(s!=end);ptr++)
01038     {
01039       ptr[0]=s++[0];
01040     }
01041     ptr[0]=0;
01042   }
01043   return retval;
01044 }
01045 
01046 GP<GStringRep>
01047 GStringRep::substr(const char *s,const int start,const int len) const
01048 {
01049   GP<GStringRep> retval;
01050   if(s && s[0])
01051   {
01052     const unsigned int length=(start<0 || len<0)?(unsigned int)strlen(s):(unsigned int)(-1);
01053     const char *startptr, *endptr;
01054     if(start<0)
01055     {
01056       startptr=s+length+start;
01057       if(startptr<s)
01058         startptr=s;
01059     }else
01060     { 
01061       startptr=s;
01062       for(const char * const ptr=s+start;(startptr<ptr)&&*startptr;++startptr)
01063         EMPTY_LOOP;
01064     }
01065     if(len<0)
01066     {
01067       if(s+length+1 < startptr+len)
01068       {
01069         endptr=startptr;
01070       }else
01071       {
01072         endptr=s+length+1+len;
01073       } 
01074     }else
01075     {
01076       endptr=startptr;
01077       for(const char * const ptr=startptr+len;(endptr<ptr)&&*endptr;++endptr)
01078         EMPTY_LOOP;
01079     }
01080     if(endptr>startptr)
01081     {
01082       retval=blank((size_t)(endptr-startptr));
01083       char *data=retval->data;
01084       for(; (startptr<endptr) && *startptr; ++startptr,++data)
01085       {
01086         data[0]=startptr[0];
01087       }
01088       data[0]=0;
01089     }
01090   }
01091   return retval;
01092 }
01093 
01094 GP<GStringRep>
01095 GStringRep::substr(const unsigned short *s,const int start,const int len) const
01096 {
01097   GP<GStringRep> retval;
01098   if(s && s[0])
01099   {
01100     unsigned short const *eptr;
01101     if(len<0)
01102     {
01103       for(eptr=s;eptr[0];++eptr)
01104         EMPTY_LOOP;
01105     }else
01106     {
01107       eptr=&(s[len]);
01108     }
01109     s=&s[start];
01110     if((size_t)s<(size_t)eptr)
01111     {
01112       mbstate_t ps;
01113       memset(&ps,0,sizeof(mbstate_t));
01114       unsigned char *buf,*ptr;
01115       GPBuffer<unsigned char> gbuf(buf,(((size_t)eptr-(size_t)s)/2)*3+7);
01116       for(ptr=buf;s[0];)
01117       {
01118         unsigned long w;
01119         int i=UTF16toUCS4(w,s,eptr);
01120         if(i<=0)
01121           break;
01122         s+=i;
01123         ptr=UCS4toString(w,ptr,&ps);
01124       }
01125       ptr[0]=0;
01126       retval = strdup( (const char *)buf );
01127     }
01128   }
01129   return retval;
01130 }
01131 
01132 GP<GStringRep>
01133 GStringRep::substr(const unsigned long *s,const int start,const int len) const
01134 {
01135   GP<GStringRep> retval;
01136   if(s && s[0])
01137   {
01138     unsigned long const *eptr;
01139     if(len<0)
01140     {
01141       for(eptr=s;eptr[0];++eptr)
01142         EMPTY_LOOP;
01143     }else
01144     {
01145       eptr=&(s[len]);
01146     }
01147     s=&s[start];
01148     if((size_t)s<(size_t)eptr)
01149     {
01150       mbstate_t ps;
01151       memset(&ps,0,sizeof(mbstate_t));
01152       unsigned char *buf,*ptr;
01153       GPBuffer<unsigned char> gbuf(buf,((((size_t)eptr-(size_t)s))/4)*6+7);
01154       for(ptr=buf;s[0];++s)
01155       {
01156         ptr=UCS4toString(s[0],ptr,&ps);
01157       }
01158       ptr[0]=0;
01159       retval = strdup( (const char *)buf );
01160     }
01161   }
01162   return retval;
01163 }
01164 
01165 GP<GStringRep>
01166 GStringRep::append(const char *s2) const
01167 {
01168   GP<GStringRep> retval;
01169   if(s2)
01170   {
01171     retval=concat(data,s2);
01172   }else
01173   {
01174     retval=const_cast<GStringRep *>(this);
01175   }
01176   return retval;
01177 }
01178 
01179 GP<GStringRep>
01180 GStringRep::UTF8::append(const GP<GStringRep> &s2) const
01181 {
01182   GP<GStringRep> retval;
01183   if(s2)
01184   {
01185     if(s2->isNative())
01186     {
01187       G_THROW( ERR_MSG("GStringRep.appendNativeToUTF8") );
01188     }
01189     retval=concat(data,s2->data);
01190   }else
01191   {
01192     retval=const_cast<GStringRep::UTF8 *>(this); 
01193   }
01194   return retval;
01195 }
01196 
01197 GP<GStringRep>
01198 GStringRep::concat(const char *s1,const char *s2) const
01199 {
01200   const int length1=(s1?strlen(s1):0);
01201   const int length2=(s2?strlen(s2):0);
01202   const int length=length1+length2;
01203   GP<GStringRep> retval;
01204   if(length>0)
01205   {
01206     retval=blank(length);
01207     GStringRep &r=*retval;
01208     if(length1)
01209     {
01210       strcpy(r.data,s1);
01211       if(length2)
01212         strcat(r.data,s2);
01213     }else
01214     {
01215       strcpy(r.data,s2);
01216     }
01217   }
01218   return retval;
01219 }
01220 
01221 const char *GBaseString::nullstr = "";
01222 
01223 void
01224 GBaseString::empty( void )
01225 {
01226   init(0);
01227 }
01228 
01229 GP<GStringRep>
01230 GStringRep::getbuf(int n) const
01231 {
01232   GP<GStringRep> retval;
01233   if(n< 0)
01234     n=strlen(data);
01235   if(n>0)
01236   {
01237     retval=blank(n);
01238     char *ndata=retval->data;
01239     strncpy(ndata,data,n);
01240     ndata[n]=0;
01241   }
01242   return retval;
01243 }
01244 
01245 const char *
01246 GStringRep::isCharType(
01247   bool (*xiswtest)(const unsigned long wc), const char *ptr, const bool reverse) const
01248 {
01249   char const * xptr=ptr;
01250   const unsigned long w=getValidUCS4(xptr);
01251   if((ptr != xptr)
01252     &&(((sizeof(wchar_t) == 2)&&(w&~0xffff))
01253       ||(reverse?(!xiswtest(w)):xiswtest(w))))
01254   {
01255     ptr=xptr;
01256   }
01257   return ptr;
01258 }
01259 
01260 int
01261 GStringRep::nextCharType(
01262   bool (*xiswtest)(const unsigned long wc), const int from, const int len,
01263   const bool reverse) const
01264 {
01265   // We want to return the position of the next
01266   // non white space starting from the #from#
01267   // location.  isspace should work in any locale
01268   // so we should only need to do this for the non-
01269   // native locales (UTF8)
01270   int retval;
01271   if(from<size)
01272   {
01273     retval=from;
01274     const char * ptr = data+from;
01275     for( const char * const eptr=ptr+((len<0)?(size-from):len);
01276       (ptr<eptr) && *ptr;)
01277     {
01278        // Skip characters that fail the isCharType test
01279       char const * const xptr=isCharType(xiswtest,ptr,!reverse);
01280       if(xptr == ptr)
01281         break;
01282       ptr=xptr;
01283     }
01284     retval=(int)((size_t)ptr-(size_t)data);
01285   }else
01286   {
01287     retval=size;
01288   }
01289   return retval;
01290 }
01291 
01292 bool
01293 GStringRep::giswspace(const unsigned long w)
01294 {
01295 #if HAS_WCTYPE
01296   return 
01297     ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01298     ||((unsigned long)iswspace((wchar_t)w))
01299     ||((w == '\r')||(w == '\n'));
01300 #else
01301   return 
01302     (w&~0xff)?(true):(((unsigned long)isspace((char)w))||((w == '\r')||(w == '\n')));
01303 #endif
01304 }
01305 
01306 bool
01307 GStringRep::giswupper(const unsigned long w)
01308 {
01309 #if HAS_WCTYPE
01310   return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01311     ?(true):((unsigned long)iswupper((wchar_t)w)?true:false);
01312 #else
01313   return (w&~0xff)?(true):((unsigned long)isupper((char)w)?true:false);
01314 #endif
01315 }
01316 
01317 bool
01318 GStringRep::giswlower(const unsigned long w)
01319 {
01320 #if HAS_WCTYPE
01321   return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01322     ?(true):((unsigned long)iswlower((wchar_t)w)?true:false);
01323 #else
01324   return (w&~0xff)?(true):((unsigned long)islower((char)w)?true:false);
01325 #endif
01326 }
01327 
01328 unsigned long
01329 GStringRep::gtowupper(const unsigned long w)
01330 {
01331 #if HAS_WCTYPE
01332   return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01333     ?w:((unsigned long)towupper((wchar_t)w));
01334 #else
01335   return (w&~0xff)?w:((unsigned long)toupper((char)w));
01336 #endif
01337 }
01338 
01339 unsigned long
01340 GStringRep::gtowlower(const unsigned long w)
01341 {
01342 #if HAS_WCTYPE
01343   return ((sizeof(wchar_t) == 2)&&(w&~0xffff))
01344     ?w:((unsigned long)towlower((wchar_t)w));
01345 #else
01346   return (w&~0xff)?w:((unsigned long)tolower((char)w));
01347 #endif
01348 }
01349 
01350 GP<GStringRep>
01351 GStringRep::tocase(
01352   bool (*xiswcase)(const unsigned long wc),
01353   unsigned long (*xtowcase)(const unsigned long wc)) const
01354 {
01355   GP<GStringRep> retval;
01356   char const * const eptr=data+size;
01357   char const *ptr=data;
01358   while(ptr<eptr)
01359   {
01360     char const * const xptr=isCharType(xiswcase,ptr,false);
01361     if(ptr == xptr)
01362       break;
01363     ptr=xptr;
01364   }
01365   if(ptr<eptr)
01366   {
01367     const int n=(int)((size_t)ptr-(size_t)data);
01368     unsigned char *buf;
01369     GPBuffer<unsigned char> gbuf(buf,n+(1+size-n)*6);
01370     if(n>0)
01371     {
01372       strncpy((char *)buf,data,n);
01373     }
01374     unsigned char *buf_ptr=buf+n;
01375     for(char const *ptr=data+n;ptr<eptr;)
01376     {
01377       char const * const xptr=ptr;
01378       const unsigned long w=getValidUCS4(ptr);
01379       if(ptr == xptr)
01380         break;
01381       if(xiswcase(w))
01382       {
01383         const int len=(int)((size_t)ptr-(size_t)xptr);
01384         strncpy((char *)buf_ptr,xptr,len);
01385         buf_ptr+=len;
01386       }else
01387       {
01388         mbstate_t ps;
01389         memset(&ps,0,sizeof(mbstate_t));
01390         buf_ptr=UCS4toString(xtowcase(w),buf_ptr,&ps);
01391       }
01392     }
01393     buf_ptr[0]=0;
01394     retval=substr((const char *)buf,0,(int)((size_t)buf_ptr-(size_t)buf));
01395   }else
01396   {
01397     retval=const_cast<GStringRep *>(this);
01398   }
01399   return retval;
01400 }
01401 
01402 // Returns a copy of this string with characters used in XML escaped as follows:
01403 //      '<'  -->  "&lt;"
01404 //      '>'  -->  "&gt;"
01405 //      '&'  -->  "&amp;"
01406 //      '\'' -->  "&apos;"
01407 //      '\"' -->  "&quot;"
01408 //  Also escapes characters 0x00 through 0x1f and 0x7e through 0x7f.
01409 GP<GStringRep>
01410 GStringRep::toEscaped( const bool tosevenbit ) const
01411 {
01412   bool modified=false;
01413   char *ret;
01414   GPBuffer<char> gret(ret,size*7);
01415   ret[0]=0;
01416   char *retptr=ret;
01417   char const *start=data;
01418   char const *s=start;
01419   char const *last=s;
01420   GP<GStringRep> special;
01421   for(unsigned long w;(w=getValidUCS4(s));last=s)
01422   {
01423     char const *ss=0;
01424     switch(w)
01425     {
01426     case '<':
01427       ss="&lt;";
01428       break;
01429     case '>':
01430       ss="&gt;";
01431       break;
01432     case '&':
01433       ss="&amp;";
01434       break;
01435     case '\47':
01436       ss="&apos;";
01437       break;
01438     case '\42':
01439       ss="&quot;";
01440       break;
01441     default:
01442       if((w<' ')||(w>=0x7e && (tosevenbit || (w < 0x80))))
01443       {
01444         special=toThis(UTF8::create_format("&#%lu;",w));
01445         ss=special->data;
01446       }
01447       break;
01448     }
01449     if(ss)
01450     {
01451       modified=true;
01452       if(s!=start)
01453       {
01454         size_t len=(size_t)last-(size_t)start;
01455         strncpy(retptr,start,len);
01456         retptr+=len;
01457         start=s;
01458       }
01459       if(ss[0])
01460       {
01461         size_t len=strlen(ss);
01462         strcpy(retptr,ss);
01463         retptr+=len;
01464       }
01465     }
01466   }
01467   GP<GStringRep> retval;
01468   if(modified)
01469   {
01470     strcpy(retptr,start);
01471     retval=strdup( ret );
01472   }else
01473   {
01474     retval=const_cast<GStringRep *>(this);
01475   }
01476 //  DEBUG_MSG( "Escaped string is '" << ret << "'\n" );
01477   return retval;
01478 }
01479 
01480 
01481 static const GMap<GUTF8String,GUTF8String> &
01482 BasicMap( void )
01483 {
01484   static GMap<GUTF8String,GUTF8String> Basic;
01485   if (! Basic.size())
01486     {
01487       Basic["lt"]   = GUTF8String('<');
01488       Basic["gt"]   = GUTF8String('>');
01489       Basic["amp"]  = GUTF8String('&');
01490       Basic["apos"] = GUTF8String('\47');
01491       Basic["quot"] = GUTF8String('\42');
01492     }
01493   return Basic;
01494 }
01495 
01496 GUTF8String
01497 GUTF8String::fromEscaped( const GMap<GUTF8String,GUTF8String> ConvMap ) const
01498 {
01499   GUTF8String ret;                  // Build output string here
01500   int start_locn = 0;           // Beginning of substring to skip
01501   int amp_locn;                 // Location of a found ampersand
01502 
01503   while( (amp_locn = search( '&', start_locn )) > -1 )
01504   {
01505       // Found the next apostrophe
01506       // Locate the closing semicolon
01507     const int semi_locn = search( ';', amp_locn );
01508       // No closing semicolon, exit and copy
01509       //  the rest of the string.
01510     if( semi_locn < 0 )
01511       break;
01512     ret += substr( start_locn, amp_locn - start_locn );
01513     int const len = semi_locn - amp_locn - 1;
01514     if(len)
01515     {
01516       GUTF8String key = substr( amp_locn+1, len);
01517       //DEBUG_MSG( "key = '" << key << "'\n" );
01518       char const * s=key;
01519       if( s[0] == '#')
01520       {
01521         unsigned long value;
01522         char *ptr=0;
01523         if(s[1] == 'x' || s[1] == 'X')
01524         {
01525           value=strtoul((char const *)(s+2),&ptr,16);
01526         }else
01527         {
01528           value=strtoul((char const *)(s+1),&ptr,10);
01529         }
01530         if(ptr)
01531         {
01532           unsigned char utf8char[7];
01533           unsigned char const * const end=GStringRep::UCS4toUTF8(value,utf8char);
01534           ret+=GUTF8String((char const *)utf8char,(size_t)end-(size_t)utf8char);
01535         }else
01536         {
01537           ret += substr( amp_locn, semi_locn - amp_locn + 1 );
01538         }
01539       }else
01540       {  
01541         GPosition map_entry = ConvMap.contains( key );
01542         if( map_entry )
01543         {                           // Found in the conversion map, substitute
01544           ret += ConvMap[map_entry];
01545         } else
01546         {
01547           static const GMap<GUTF8String,GUTF8String> &Basic = BasicMap();
01548           GPosition map_entry = Basic.contains( key );
01549           if ( map_entry )
01550           {
01551             ret += Basic[map_entry];
01552           }else
01553           {
01554             ret += substr( amp_locn, len+2 );
01555           }
01556         }
01557       }
01558     }else
01559     {
01560       ret += substr( amp_locn, len+2 );
01561     }
01562     start_locn = semi_locn + 1;
01563 //    DEBUG_MSG( "ret = '" << ret << "'\n" );
01564   }
01565 
01566                                 // Copy the end of the string to the output
01567   ret += substr( start_locn, length()-start_locn );
01568 
01569 //  DEBUG_MSG( "Unescaped string is '" << ret << "'\n" );
01570   return (ret == *this)?(*this):ret;
01571 }
01572 
01573 GUTF8String
01574 GUTF8String::fromEscaped(void) const
01575 {
01576   const GMap<GUTF8String,GUTF8String> nill;
01577   return fromEscaped(nill);
01578 }
01579 
01580 GP<GStringRep>
01581 GStringRep::setat(int n, char ch) const
01582 {
01583   GP<GStringRep> retval;
01584   if(n<0)
01585     n+=size;
01586   if (n < 0 || n>size) 
01587     GBaseString::throw_illegal_subscript();
01588   if(ch == data[n])
01589   {
01590     retval=const_cast<GStringRep *>(this);
01591   }else if(!ch)
01592   {
01593     retval=getbuf(n);
01594   }else
01595   {
01596     retval=getbuf((n<size)?size:n);
01597     retval->data[n]=ch;
01598     if(n == size)
01599       retval->data[n+1]=0;
01600   }
01601   return retval;
01602 }
01603 
01604 #ifdef WIN32
01605 #define USE_VSNPRINTF _vsnprintf
01606 #endif
01607 
01608 #ifdef AUTOCONF
01609 # ifdef HAVE_VSNPRINTF
01610 #  define USE_VSNPRINTF vsnprintf
01611 # endif
01612 #else
01613 # ifdef linux
01614 #  define USE_VSNPRINTF vsnprintf
01615 # endif
01616 #endif
01617 
01618 GUTF8String &
01619 GUTF8String::format(const char fmt[], ... )
01620 {
01621   va_list args;
01622   va_start(args, fmt);
01623   return init(GStringRep::UTF8::create(fmt,args));
01624 }
01625 
01626 GP<GStringRep>
01627 GStringRep::UTF8::create_format(const char fmt[],...)
01628 {
01629   va_list args;
01630   va_start(args, fmt);
01631   return create(fmt,args);
01632 }
01633 
01634 GP<GStringRep>
01635 GStringRep::vformat(va_list args) const
01636 {
01637   GP<GStringRep> retval;
01638   if(size)
01639   {
01640 #ifndef WIN32
01641     char *nfmt;
01642     GPBuffer<char> gnfmt(nfmt,size+1);
01643     nfmt[0]=0;
01644     int start=0;
01645 #endif
01646     int from=0;
01647     while((from=search('%',from)) >= 0)
01648     {
01649       if(data[++from] != '%')
01650       {
01651         int m,n=0;
01652         sscanf(data+from,"%d!%n",&m,&n);
01653         if(n)
01654         {
01655 #ifdef WIN32
01656           char *lpszFormat=data;
01657           LPTSTR lpszTemp;
01658           if((!::FormatMessage(
01659             FORMAT_MESSAGE_FROM_STRING|FORMAT_MESSAGE_ALLOCATE_BUFFER,
01660               lpszFormat, 0, 0, (LPTSTR)&lpszTemp,0,&args))
01661             || !lpszTemp)
01662           {
01663             G_THROW(GException::outofmemory);
01664           }
01665           va_end(args); 
01666           retval=strdup((const char *)lpszTemp);
01667           LocalFree(lpszTemp);
01668           break;
01669 #else
01670           from+=n;
01671           const int end=search('!',from);
01672           if(end>=0)
01673           {
01674             strncat(nfmt,data+start,(int)(end-start));
01675             strncat(nfmt,"$",1);
01676             start=from=end+1;
01677           }else
01678           {
01679             gnfmt.resize(0);
01680             from=(-1);
01681             break;
01682           }
01683 #endif
01684         }else
01685         {
01686 #ifndef WIN32
01687           gnfmt.resize(0);
01688 #endif
01689           from=(-1);
01690           break;
01691         }
01692       }
01693     }
01694     if(from < 0)
01695     {
01696 #ifndef WIN32
01697       char const * const fmt=(nfmt&&nfmt[0])?nfmt:data;
01698 #else
01699       char const * const fmt=data;
01700 #endif
01701       int buflen=32768;
01702       char *buffer;
01703       GPBuffer<char> gbuffer(buffer,buflen);
01704 
01705       ChangeLocale locale(LC_NUMERIC,(isNative()?0:"C"));
01706 
01707       // Format string
01708 #ifdef USE_VSNPRINTF
01709       while(USE_VSNPRINTF(buffer, buflen, fmt, args)<0)
01710       {
01711         gbuffer.resize(0);
01712         gbuffer.resize(buflen+32768);
01713       }
01714       va_end(args);
01715 #else
01716       buffer[buflen-1] = 0;
01717       vsprintf(buffer, fmt, args);
01718       va_end(args);
01719       if (buffer[buflen-1])
01720       {
01721         // This isn't as fatal since it is on the stack, but we
01722         // definitely should stop the current operation.
01723         G_THROW( ERR_MSG("GString.overwrite") );
01724       }
01725 #endif
01726       retval=strdup((const char *)buffer);
01727     }
01728   }
01729   // Go altering the string
01730   return retval;
01731 }
01732 
01733 int 
01734 GStringRep::search(char c, int from) const
01735 {
01736   if (from<0)
01737     from += size;
01738   int retval=(-1);
01739   if (from>=0 && from<size)
01740   {
01741     char const *const s = strchr(data+from,c);
01742     if(s)
01743       retval=(int)((size_t)s-(size_t)data);
01744   }
01745   return retval;
01746 }
01747 
01748 int 
01749 GStringRep::search(char const *ptr, int from) const
01750 {
01751   if(from<0)
01752   {
01753     from+=size;
01754     if(from<0)
01755       G_THROW( ERR_MSG("GString.bad_subscript") );
01756   }
01757   int retval=(-1);
01758   if (from>=0 && from<size)
01759   {
01760     char const *const s = strstr(data+from,ptr);
01761     if(s)
01762       retval=(int)((size_t)s-(size_t)data);
01763   }
01764   return retval;
01765 }
01766 
01767 int 
01768 GStringRep::rsearch(char c, int from) const
01769 {
01770   if(from<0)
01771   {
01772     from+=size;
01773     if(from<0)
01774       G_THROW( ERR_MSG("GString.bad_subscript") );
01775   }
01776   int retval=(-1);
01777   if ((from>=0) && (from<size))
01778   {
01779     char const *const s = strrchr(data+from,c);
01780     if(s)
01781       retval=(int)((size_t)s-(size_t)data);
01782   }
01783   return retval;
01784 }
01785 
01786 int 
01787 GStringRep::rsearch(char const *ptr, int from) const
01788 {
01789   if(from<0)
01790   {
01791     from+=size;
01792     if(from<0)
01793       G_THROW( ERR_MSG("GString.bad_subscript") );
01794   }
01795   int retval=(-1);
01796   for(int loc=from;(loc=search(ptr,loc)) >= 0;++loc)
01797     retval=loc;
01798   return retval;
01799 }
01800 
01801 int
01802 GStringRep::contains(const char accept[],int from) const
01803 {
01804   if(from<0)
01805   {
01806     from+=size;
01807     if(from<0)
01808       G_THROW( ERR_MSG("GString.bad_subscript") );
01809   }
01810   int retval=(-1);
01811   if (accept && accept[0] && from>=0 && from<size)
01812   {
01813     char const * const src = data+from;
01814     char const *ptr=strpbrk(src,accept);
01815     if(ptr)
01816     {
01817       retval=(int)(ptr-src)+from;
01818     }
01819   }
01820   return retval;
01821 }
01822 
01823 int
01824 GStringRep::rcontains(const char accept[],int from) const
01825 {
01826   int retval=(-1);
01827   while((from=contains(accept,from)) >= 0)
01828   {
01829     retval=from++;
01830   }
01831   return retval;
01832 }
01833 
01834 bool
01835 GBaseString::is_int(void) const
01836 {
01837   bool isLong=!!ptr;
01838   if(isLong)
01839   {
01840     int endpos;
01841     (*this)->toLong(0,endpos);
01842     if(endpos>=0)
01843     {
01844       isLong=((*this)->nextNonSpace(endpos) == (int)length());
01845     }
01846   }
01847   return isLong;
01848 }
01849 
01850 bool
01851 GBaseString::is_float(void) const
01852 {
01853   bool isDouble=!!ptr;
01854   if(isDouble)
01855   {
01856     int endpos;
01857     (*this)->toDouble(0,endpos);
01858     if(endpos>=0)
01859     {
01860       isDouble=((*this)->nextNonSpace(endpos) == (int)length());
01861     }
01862   }
01863   return isDouble;
01864 }
01865 
01866 unsigned int 
01867 hash(const GBaseString &str)
01868 {
01869   unsigned int x = 0;
01870   const char *s = (const char*)str;
01871   while (*s) 
01872     x = x ^ (x<<6) ^ (unsigned char)(*s++);
01873   return x;
01874 }
01875 
01876 void 
01877 GBaseString::throw_illegal_subscript()
01878 {
01879   G_THROW( ERR_MSG("GString.bad_subscript") );
01880 }
01881 
01882 unsigned char *
01883 GStringRep::UTF8::UCS4toString(
01884   const unsigned long w0,unsigned char *ptr, mbstate_t *) const
01885 {
01886   return UCS4toUTF8(w0,ptr);
01887 }
01888 
01889 int
01890 GStringRep::UTF8::ncopy(
01891   wchar_t * const buf, const int buflen ) const
01892 {
01893   int retval=(-1);
01894   if(buf && buflen)
01895   {
01896     buf[0]=0;
01897     if(data[0])
01898     {
01899       const size_t length=strlen(data);
01900       const unsigned char * const eptr=(const unsigned char *)(data+length);
01901       wchar_t *r=buf;
01902       wchar_t const * const rend=buf+buflen;
01903       for(const unsigned char *s=(const unsigned char *)data;(r<rend)&&(s<eptr)&&*s;)
01904       {
01905         const unsigned long w0=UTF8toUCS4(s,eptr);
01906         unsigned short w1;
01907         unsigned short w2=1;
01908         for(int count=(sizeof(wchar_t) == sizeof(w1))?UCS4toUTF16(w0,w1,w2):1;
01909             count&&(r<rend);
01910             --count,w1=w2,++r)
01911         {
01912           r[0]=(sizeof(wchar_t) == sizeof(w1))?(wchar_t)w1:(wchar_t)w0;
01913         }
01914       }
01915       if(r<rend)
01916       {
01917         r[0]=0;
01918         retval=((size_t)r-(size_t)buf)/sizeof(wchar_t);
01919       }
01920     }else
01921     {
01922       retval=0;
01923     }
01924   }
01925   return retval;
01926 }
01927 
01928 GP<GStringRep> 
01929 GStringRep::UTF8::toNative(const EscapeMode escape) const
01930 {
01931   GP<GStringRep> retval;
01932   if(data[0])
01933   {
01934     const size_t length=strlen(data);
01935     const unsigned char * const eptr=(const unsigned char *)(data+length);
01936     unsigned char *buf;
01937     GPBuffer<unsigned char> gbuf(buf,12*length+12); 
01938     unsigned char *r=buf;
01939     mbstate_t ps;
01940     memset(&ps,0,sizeof(mbstate_t));
01941     for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
01942     {
01943       const unsigned long w0=UTF8toUCS4(s,eptr);
01944       const unsigned char * const r0=r;
01945       r=UCS4toNative(w0,r,&ps);
01946       if(r == r0)
01947       {
01948         if(escape == IS_ESCAPED)
01949         {
01950           sprintf((char *)r,"&#%lu;",w0);
01951           r+=strlen((char *)r);
01952         }else
01953         {
01954           r=buf;
01955           break;
01956         }
01957       }
01958     }
01959     r[0]=0;
01960     retval = NATIVE_CREATE( (const char *)buf );
01961   } else
01962   {
01963     retval = NATIVE_CREATE( (unsigned int)0 );
01964   }
01965   return retval;
01966 }
01967 
01968 GP<GStringRep>
01969 GStringRep::UTF8::toUTF8(const bool nothrow) const
01970 {
01971   if(!nothrow)
01972     G_THROW( ERR_MSG("GStringRep.UTF8ToUTF8") );
01973   return const_cast<GStringRep::UTF8 *>(this);
01974 }
01975 
01976 // Tests if a string is legally encoded in the current character set.
01977 bool 
01978 GStringRep::UTF8::is_valid(void) const
01979 {
01980   bool retval=true;
01981   if(data && size)
01982   {
01983     const unsigned char * const eptr=(const unsigned char *)(data+size);
01984     for(const unsigned char *s=(const unsigned char *)data;(s<eptr)&& *s;)
01985     {
01986       const unsigned char * const r=s;
01987       (void)UTF8toUCS4(s,eptr);
01988       if(r == s)
01989       {
01990         retval=false;
01991         break;
01992       }
01993     }
01994   }
01995   return retval;
01996 }
01997 
01998 static inline unsigned long
01999 add_char(unsigned long const U, unsigned char const * const r)
02000 {
02001   unsigned long const C=r[0];
02002   return ((C|0x3f) == 0xbf)?((U<<6)|(C&0x3f)):0;
02003 }
02004 
02005 unsigned long
02006 GStringRep::UTF8toUCS4(
02007   unsigned char const *&s,void const * const eptr)
02008 {
02009   unsigned long U=0;
02010   unsigned char const *r=s;
02011   if(r < eptr)
02012   {
02013     unsigned long const C1=r++[0];
02014     if(C1&0x80)
02015     {
02016       if(r < eptr)
02017       {
02018         U=C1;
02019         if((U=((C1&0x40)?add_char(U,r++):0)))
02020         {
02021           if(C1&0x20)
02022           {
02023             if(r < eptr)
02024             {
02025               if((U=add_char(U,r++)))
02026               {
02027                 if(C1&0x10)
02028                 {
02029                   if(r < eptr)
02030                   {
02031                     if((U=add_char(U,r++)))
02032                     {
02033                       if(C1&0x8)
02034                       {
02035                         if(r < eptr)
02036                         {
02037                           if((U=add_char(U,r++)))
02038                           {
02039                             if(C1&0x4)
02040                             {
02041                               if(r < eptr)
02042                               {
02043                                 if((U=((!(C1&0x2))?(add_char(U,r++)&0x7fffffff):0)))
02044                                 {
02045                                   s=r;
02046                                 }else
02047                                 {
02048                                   U=(unsigned int)(-1)-s++[0];
02049                                 }
02050                               }else
02051                               {
02052                                 U=0;
02053                               }
02054                             }else if((U=((U&0x4000000)?0:(U&0x3ffffff))))
02055                             {
02056                               s=r;
02057                             }
02058                           }else
02059                           {
02060                             U=(unsigned int)(-1)-s++[0];
02061                           }
02062                         }else
02063                         {
02064                           U=0;
02065                         }
02066                       }else if((U=((U&0x200000)?0:(U&0x1fffff))))
02067                       {
02068                         s=r;
02069                       }
02070                     }else
02071                     {
02072                       U=(unsigned int)(-1)-s++[0];
02073                     }
02074                   }else
02075                   {
02076                     U=0;
02077                   }
02078                 }else if((U=((U&0x10000)?0:(U&0xffff))))
02079                 {
02080                   s=r;
02081                 }
02082               }else
02083               {
02084                 U=(unsigned int)(-1)-s++[0];
02085               }
02086             }else
02087             {
02088               U=0;
02089             }
02090           }else if((U=((U&0x800)?0:(U&0x7ff))))
02091           {
02092             s=r;
02093           }
02094         }else
02095         {
02096           U=(unsigned int)(-1)-s++[0];
02097         }
02098       }else
02099       {
02100         U=0;
02101       }
02102     }else if((U=C1))
02103     {
02104       s=r;
02105     }
02106   }
02107   return U;
02108 }
02109 
02110 unsigned char *
02111 GStringRep::UCS4toUTF8(const unsigned long w,unsigned char *ptr)
02112 {
02113   if(w <= 0x7f)
02114   {
02115     *ptr++ = (unsigned char)w;
02116   }
02117   else if(w <= 0x7ff)
02118   {
02119     *ptr++ = (unsigned char)((w>>6)|0xC0);
02120     *ptr++ = (unsigned char)((w|0x80)&0xBF);
02121   }
02122   else if(w <= 0xFFFF)
02123   {
02124     *ptr++ = (unsigned char)((w>>12)|0xE0);
02125     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02126     *ptr++ = (unsigned char)((w|0x80)&0xBF);
02127   }
02128   else if(w <= 0x1FFFFF)
02129   {
02130     *ptr++ = (unsigned char)((w>>18)|0xF0);
02131     *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
02132     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02133     *ptr++ = (unsigned char)((w|0x80)&0xBF);
02134   }
02135   else if(w <= 0x3FFFFFF)
02136   {
02137     *ptr++ = (unsigned char)((w>>24)|0xF8);
02138     *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
02139     *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
02140     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02141     *ptr++ = (unsigned char)((w|0x80)&0xBF);
02142   }
02143   else if(w <= 0x7FFFFFFF)
02144   {
02145     *ptr++ = (unsigned char)((w>>30)|0xFC);
02146     *ptr++ = (unsigned char)(((w>>24)|0x80)&0xBF);
02147     *ptr++ = (unsigned char)(((w>>18)|0x80)&0xBF);
02148     *ptr++ = (unsigned char)(((w>>12)|0x80)&0xBF);
02149     *ptr++ = (unsigned char)(((w>>6)|0x80)&0xBF);
02150     *ptr++ = (unsigned char)((w|0x80)&0xBF);
02151   }
02152   else
02153   { 
02154     *ptr++ = '?';
02155   }
02156   return ptr;
02157 }
02158 
02159    // Creates with a concat operation.
02160 GP<GStringRep> 
02161 GStringRep::concat( const char *s1, const GP<GStringRep> &s2) const
02162 {
02163   GP<GStringRep> retval;
02164   if(s2)
02165   {
02166     retval=toThis(s2);
02167     if(s1 && s1[0])
02168     {
02169       if(retval)
02170       {
02171         retval=concat(s1,retval->data);
02172       }else
02173       {
02174         retval=strdup(s1);
02175       }
02176     }
02177   }else if(s1 && s1[0])
02178   {
02179     retval=strdup(s1);
02180   }
02181   return retval;
02182 }
02183 
02184    // Creates with a concat operation.
02185 
02186 GP<GStringRep> 
02187 GStringRep::concat( const GP<GStringRep> &s1,const char *s2) const
02188 {
02189   GP<GStringRep> retval;
02190   if(s1)
02191   {
02192     retval=toThis(s1);
02193     if(s2 && s2[0])
02194     {
02195       if(retval)
02196       {
02197         retval=retval->append(s2);
02198       }else
02199       {
02200         retval=strdup(s2);
02201       }
02202     }
02203   }else if(s2 && s2[0])
02204   {
02205     retval=strdup(s2);
02206   }
02207   return retval;
02208 }
02209 
02210 GP<GStringRep> 
02211 GStringRep::concat(const GP<GStringRep> &s1,const GP<GStringRep> &s2) const
02212 { 
02213   GP<GStringRep> retval; 
02214   if(s1)
02215   {
02216     retval=toThis(s1,s2);
02217     if(retval && s2)
02218     {
02219       retval=retval->append(toThis(s2));
02220     }
02221   }else if(s2)
02222   {
02223     retval=toThis(s2);
02224   }
02225   return retval;
02226 }
02227 
02228 #ifdef WIN32
02229 static const char *setlocale_win32(void)
02230 {
02231   static const char *locale=setlocale(LC_ALL,0);
02232   if(! locale || (locale[0] == 'C' && !locale[1]))
02233   {
02234     locale=setlocale(LC_ALL,"");
02235   }
02236   return locale;
02237 }
02238 #endif
02239 
02240 GStringRep::GStringRep(void)
02241 {
02242 #ifdef WIN32
02243   static const char *locale=setlocale_win32();
02244 #endif
02245   size=0;
02246   data=0;
02247 }
02248 
02249 GStringRep::~GStringRep()
02250 {
02251   if(data)
02252   {
02253     data[0]=0;
02254     ::operator delete(data);
02255   }
02256   data=0;
02257 }
02258 
02259 GStringRep::UTF8::UTF8(void) {}
02260 
02261 GStringRep::UTF8::~UTF8() {}
02262 
02263 int
02264 GStringRep::cmp(const char *s1,const int len) const
02265 {
02266   return cmp(data,s1,len);
02267 }
02268 
02269 int
02270 GStringRep::cmp(const char *s1, const char *s2,const int len)
02271 {
02272   return (len
02273    ?((s1&&s1[0])
02274       ?((s2&&s2[0])
02275         ?((len>0)
02276           ?strncmp(s1,s2,len)
02277           :strcmp(s1,s2))
02278         :1)
02279       :((s2&&s2[0])?(-1):0))
02280    :0);
02281 }
02282 
02283 int 
02284 GStringRep::cmp(const GP<GStringRep> &s1, const GP<GStringRep> &s2,
02285   const int len )
02286 {
02287   return (s1?(s1->cmp(s2,len)):cmp(0,(s2?(s2->data):0),len));
02288 }
02289 
02290 int 
02291 GStringRep::cmp(const GP<GStringRep> &s1, const char *s2, 
02292   const int len )
02293 {
02294   return cmp((s1?s1->data:0),s2,len);
02295 }
02296 
02297 int 
02298 GStringRep::cmp(const char *s1, const GP<GStringRep> &s2,
02299   const int len )
02300 {
02301   return cmp(s1,(s2?(s2->data):0),len);
02302 }
02303 
02304 int
02305 GStringRep::UTF8::cmp(const GP<GStringRep> &s2,const int len) const
02306 {
02307   int retval;
02308   if(s2)
02309   {
02310     if(s2->isNative())
02311     {
02312       GP<GStringRep> r(s2->toUTF8(true));
02313       if(r)
02314       {
02315         retval=GStringRep::cmp(data,r->data,len);
02316       }else
02317       {
02318         retval=-(s2->cmp(toNative(NOT_ESCAPED),len));
02319       }
02320     }else
02321     {
02322       retval=GStringRep::cmp(data,s2->data,len);
02323     }
02324   }else
02325   { 
02326     retval=GStringRep::cmp(data,0,len);
02327   }
02328   return retval;
02329 } 
02330 
02331 int
02332 GStringRep::UTF8::toInt() const
02333 {
02334   int endpos;
02335   return (int)toLong(0,endpos);
02336 }
02337 
02338 static inline long
02339 Cstrtol(char *data,char **edata, const int base)
02340 {
02341   GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
02342   while (data && *data==' ') data++;
02343   return strtol(data,edata,base);
02344 }
02345 
02346 long 
02347 GStringRep::UTF8::toLong(
02348   const int pos, int &endpos, const int base) const
02349 {
02350   char *edata=0;
02351   long retval=Cstrtol(data+pos,&edata, base);
02352   if(edata)
02353   {
02354     endpos=edata-data;
02355   }else
02356   {
02357     endpos=(-1);
02358     GP<GStringRep> ptr=ptr->strdup(data+pos);
02359     if(ptr)
02360       ptr=ptr->toNative(NOT_ESCAPED);
02361     if(ptr)
02362     {
02363       int xendpos;
02364       retval=ptr->toLong(0,xendpos,base);
02365       if(xendpos> 0)
02366       {
02367         endpos=(int)size;
02368         ptr=ptr->strdup(data+xendpos);
02369         if(ptr)
02370         {
02371           ptr=ptr->toUTF8(true);
02372           if(ptr)
02373           {
02374             endpos-=(int)(ptr->size);
02375           }
02376         }
02377       }
02378     }
02379   }
02380   return retval;
02381 }
02382 
02383 static inline unsigned long
02384 Cstrtoul(char *data,char **edata, const int base)
02385 {
02386   GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
02387   while (data && *data==' ') data++;
02388   return strtoul(data,edata,base);
02389 }
02390 
02391 unsigned long 
02392 GStringRep::UTF8::toULong(
02393   const int pos, int &endpos, const int base) const
02394 {
02395   char *edata=0;
02396   unsigned long retval=Cstrtoul(data+pos,&edata, base);
02397   if(edata)
02398   {
02399     endpos=edata-data;
02400   }else
02401   {
02402     endpos=(-1);
02403     GP<GStringRep> ptr=ptr->strdup(data+pos);
02404     if(ptr)
02405       ptr=ptr->toNative(NOT_ESCAPED);
02406     if(ptr)
02407     {
02408       int xendpos;
02409       retval=ptr->toULong(0,xendpos,base);
02410       if(xendpos> 0)
02411       {
02412         endpos=(int)size;
02413         ptr=ptr->strdup(data+xendpos);
02414         if(ptr)
02415         {
02416           ptr=ptr->toUTF8(true);
02417           if(ptr)
02418           {
02419             endpos-=(int)(ptr->size);
02420           }
02421         }
02422       }
02423     }
02424   }
02425   return retval;
02426 }
02427 
02428 static inline double
02429 Cstrtod(char *data,char **edata)
02430 {
02431   GStringRep::ChangeLocale locale(LC_NUMERIC,"C");
02432   while (data && *data==' ') data++;
02433   return strtod(data,edata);
02434 }
02435 
02436 double
02437 GStringRep::UTF8::toDouble(const int pos, int &endpos) const
02438 {
02439   char *edata=0;
02440   double retval=Cstrtod(data+pos,&edata);
02441   if(edata)
02442   {
02443     endpos=edata-data;
02444   }else
02445   {
02446     endpos=(-1);
02447     GP<GStringRep> ptr=ptr->strdup(data+pos);
02448     if(ptr)
02449       ptr=ptr->toNative(NOT_ESCAPED);
02450     if(ptr)
02451     {
02452       int xendpos;
02453       retval=ptr->toDouble(0,xendpos);
02454       if(xendpos >= 0)
02455       {
02456         endpos=(int)size;
02457         ptr=ptr->strdup(data+xendpos);
02458         if(ptr)
02459         {
02460           ptr=ptr->toUTF8(true);
02461           if(ptr)
02462           {
02463             endpos-=(int)(ptr->size);
02464           }
02465         }
02466       }
02467     }
02468   }
02469   return retval;
02470 }
02471 
02472 int 
02473 GStringRep::getUCS4(unsigned long &w, const int from) const
02474 {
02475   int retval;
02476   if(from>=size)
02477   {
02478     w=0;
02479     retval=size;
02480   }else if(from<0)
02481   {
02482     w=(unsigned int)(-1);
02483     retval=(-1);
02484   }else
02485   {
02486     const char *source=data+from;
02487     w=getValidUCS4(source);
02488     retval=(int)((size_t)source-(size_t)data);
02489   } 
02490   return retval;
02491 }
02492 
02493 
02494 unsigned long
02495 GStringRep::UTF8::getValidUCS4(const char *&source) const
02496 {
02497   return GStringRep::UTF8toUCS4((const unsigned char *&)source,data+size);
02498 }
02499 
02500 int
02501 GStringRep::nextNonSpace(const int from,const int len) const
02502 {
02503   return nextCharType(giswspace,from,len,true);
02504 }
02505 
02506 int
02507 GStringRep::nextSpace(const int from,const int len) const
02508 {
02509   return nextCharType(giswspace,from,len,false);
02510 }
02511 
02512 int
02513 GStringRep::nextChar(const int from) const
02514 {
02515   char const * xptr=data+from;
02516   (void)getValidUCS4(xptr);
02517   return (int)((size_t)xptr-(size_t)data);
02518 }
02519 
02520 int 
02521 GStringRep::firstEndSpace(int from,const int len) const
02522 {
02523   const int xsize=(len<0)?size:(from+len);
02524   const int ysize=(size<xsize)?size:xsize;
02525   int retval=ysize;
02526   while(from<ysize)
02527   {
02528     from=nextNonSpace(from,ysize-from);
02529     if(from < size)
02530     {
02531       const int r=nextSpace(from,ysize-from);
02532       // If a character isn't legal, then it will return
02533       // tru for both nextSpace and nextNonSpace.
02534       if(r == from)
02535       {
02536         from++;
02537       }else
02538       {
02539         from=retval=r;
02540       }
02541     }
02542   }
02543   return retval;
02544 }
02545 
02546 int
02547 GStringRep::UCS4toUTF16(
02548   const unsigned long w,unsigned short &w1, unsigned short &w2)
02549 {
02550   int retval;
02551   if(w<0x10000)
02552   {
02553     w1=(unsigned short)w;
02554     w2=0;
02555     retval=1;
02556   }else
02557   {
02558     w1=(unsigned short)((((w-0x10000)>>10)&0x3ff)+0xD800);
02559     w2=(unsigned short)((w&0x3ff)+0xDC00);
02560     retval=2;
02561   }
02562   return retval;
02563 }
02564 
02565 int
02566 GStringRep::UTF16toUCS4(
02567   unsigned long &U,unsigned short const * const s,void const * const eptr)
02568 {
02569   int retval=0;
02570   U=0;
02571   unsigned short const * const r=s+1;
02572   if(r <= eptr)
02573   {
02574     unsigned long const W1=s[0];
02575     if((W1<0xD800)||(W1>0xDFFF))
02576     {
02577       if((U=W1))
02578       {
02579         retval=1;
02580       }
02581     }else if(W1<=0xDBFF)
02582     {
02583       unsigned short const * const rr=r+1;
02584       if(rr <= eptr)
02585       {
02586         unsigned long const W2=s[1];
02587         if(((W2>=0xDC00)||(W2<=0xDFFF))&&((U=(0x10000+((W1&0x3ff)<<10))|(W2&0x3ff))))
02588         {
02589           retval=2;
02590         }else
02591         {
02592           retval=(-1);
02593         }
02594       }
02595     }
02596   }
02597   return retval;
02598 }
02599 
02600 
02601 //bcr
02602 
02603 GUTF8String&
02604 GUTF8String::operator+= (char ch)
02605 {
02606   return init(
02607     GStringRep::UTF8::create((const char*)*this,
02608     GStringRep::UTF8::create(&ch,0,1)));
02609 }
02610 
02611 GUTF8String&
02612 GUTF8String::operator+= (const char *str)
02613 {
02614   return init(GStringRep::UTF8::create(*this,str));
02615 }
02616 
02617 GUTF8String&
02618 GUTF8String::operator+= (const GBaseString &str)
02619 {
02620   return init(GStringRep::UTF8::create(*this,str));
02621 }
02622 
02623 GUTF8String
02624 GUTF8String::substr(int from, int len) const
02625 { return GUTF8String(*this, from, len); }
02626 
02627 GUTF8String
02628 GUTF8String::operator+(const GBaseString &s2) const
02629 { return GStringRep::UTF8::create(*this,s2); }
02630 
02631 GUTF8String
02632 GUTF8String::operator+(const GUTF8String &s2) const
02633 { return GStringRep::UTF8::create(*this,s2); }
02634 
02635 GUTF8String
02636 GUTF8String::operator+(const char    *s2) const
02637 { return GStringRep::UTF8::create(*this,s2); }
02638 
02639 char *
02640 GUTF8String::getbuf(int n)
02641 {
02642   if(ptr)
02643     init((*this)->getbuf(n));
02644   else if(n>0)
02645     init(GStringRep::UTF8::create(n));
02646   else
02647     init(0);
02648   return ptr?((*this)->data):0;
02649 }
02650 
02651 void 
02652 GUTF8String::setat(const int n, const char ch)
02653 {
02654   if((!n)&&(!ptr))
02655   {
02656     init(GStringRep::UTF8::create(&ch,0,1));
02657   }else
02658   {
02659     init((*this)->setat(CheckSubscript(n),ch));
02660   }
02661 }
02662 
02663 GP<GStringRep>
02664 GStringRep::UTF8ToNative( const char *s, const EscapeMode escape )
02665 {
02666   return GStringRep::UTF8::create(s)->toNative(escape);
02667 }
02668 
02669 GUTF8String::GUTF8String(const char dat)
02670 { init(GStringRep::UTF8::create(&dat,0,1)); }
02671 
02672 GUTF8String::GUTF8String(const GUTF8String &fmt, va_list &args)
02673 { 
02674   if (fmt.ptr)
02675     init(fmt->vformat(args));
02676   else 
02677     init(fmt); 
02678 }
02679 
02680 GUTF8String::GUTF8String(const char *str)
02681 { init(GStringRep::UTF8::create(str)); }
02682 
02683 GUTF8String::GUTF8String(const unsigned char *str)
02684 { init(GStringRep::UTF8::create((const char *)str)); }
02685 
02686 GUTF8String::GUTF8String(const unsigned short *str)
02687 { init(GStringRep::UTF8::create(str,0,-1)); }
02688 
02689 GUTF8String::GUTF8String(const unsigned long *str)
02690 { init(GStringRep::UTF8::create(str,0,-1)); }
02691 
02692 GUTF8String::GUTF8String(const char *dat, unsigned int len)
02693 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
02694 
02695 GUTF8String::GUTF8String(const unsigned short *dat, unsigned int len)
02696 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
02697 
02698 GUTF8String::GUTF8String(const unsigned long *dat, unsigned int len)
02699 { init(GStringRep::UTF8::create(dat,0,((int)len<0)?(-1):(int)len)); }
02700 
02701 GUTF8String::GUTF8String(const GBaseString &gs, int from, int len)
02702 { init(GStringRep::UTF8::create(gs,from,((int)len<0)?(-1):(int)len)); }
02703 
02704 GUTF8String::GUTF8String(const int number)
02705 { init(GStringRep::UTF8::create_format("%d",number)); }
02706 
02707 GUTF8String::GUTF8String(const double number)
02708 { init(GStringRep::UTF8::create_format("%f",number)); }
02709 
02710 GUTF8String& GUTF8String::operator= (const char str)
02711 { return init(GStringRep::UTF8::create(&str,0,1)); }
02712 
02713 GUTF8String& GUTF8String::operator= (const char *str)
02714 { return init(GStringRep::UTF8::create(str)); }
02715 
02716 GUTF8String GBaseString::operator+(const GUTF8String &s2) const
02717 { return GStringRep::UTF8::create(*this,s2); }
02718 
02719 #if HAS_WCHAR
02720 GUTF8String
02721 GNativeString::operator+(const GUTF8String &s2) const
02722 {
02723   if (ptr)
02724     return GStringRep::UTF8::create((*this)->toUTF8(true),s2);
02725   else
02726     return GStringRep::UTF8::create((*this),s2);
02727 }
02728 #endif
02729 
02730 GUTF8String
02731 GUTF8String::operator+(const GNativeString &s2) const
02732 {
02733   GP<GStringRep> g = s2;
02734   if (s2.ptr)
02735     g = s2->toUTF8(true);
02736   return GStringRep::UTF8::create(*this,g);
02737 }
02738 
02739 GUTF8String
02740 operator+(const char    *s1, const GUTF8String &s2)
02741 { return GStringRep::UTF8::create(s1,s2); }
02742 
02743 #if HAS_WCHAR
02744 GNativeString
02745 operator+(const char    *s1, const GNativeString &s2)
02746 { return GStringRep::Native::create(s1,s2); }
02747 
02748 GNativeString&
02749 GNativeString::operator+= (char ch)
02750 {
02751   char s[2]; s[0]=ch; s[1]=0;
02752   return init(GStringRep::Native::create((const char*)*this, s));
02753 }
02754 
02755 GNativeString&
02756 GNativeString::operator+= (const char *str)
02757 {
02758   return init(GStringRep::Native::create(*this,str));
02759 }
02760 
02761 GNativeString&
02762 GNativeString::operator+= (const GBaseString &str)
02763 {
02764   return init(GStringRep::Native::create(*this,str));
02765 }
02766 
02767 GNativeString
02768 GNativeString::operator+(const GBaseString &s2) const
02769 { return GStringRep::Native::create(*this,s2); }
02770 
02771 GNativeString
02772 GNativeString::operator+(const GNativeString &s2) const
02773 { return GStringRep::Native::create(*this,s2); }
02774 
02775 GNativeString
02776 GNativeString::operator+(const char    *s2) const
02777 { return GStringRep::Native::create(*this,s2); }
02778 
02779 char *
02780 GNativeString::getbuf(int n)
02781 {
02782   if(ptr)
02783     init((*this)->getbuf(n));
02784   else if(n>0)
02785     init(GStringRep::Native::create(n));
02786   else
02787     init(0);
02788   return ptr?((*this)->data):0;
02789 }
02790 
02791 void
02792 GNativeString::setat(const int n, const char ch)
02793 {
02794   if((!n)&&(!ptr))
02795   {
02796     init(GStringRep::Native::create(&ch,0,1));
02797   }else
02798   {
02799     init((*this)->setat(CheckSubscript(n),ch));
02800   }
02801 }
02802 
02803 #endif
02804 
02805 
02806 #ifdef HAVE_NAMESPACES
02807 }
02808 # ifndef NOT_USING_DJVU_NAMESPACE
02809 using namespace DJVU;
02810 # endif
02811 #endif

kviewshell

Skip menu "kviewshell"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members

API Reference

Skip menu "API Reference"
  • kviewshell
Generated for API Reference by doxygen 1.5.9
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal