• Skip to content
  • Skip to link menu
KDE 3.5 API Reference
  • KDE API Reference
  • API Reference
  • Sitemap
  • Contact Us
 

kviewshell

BSEncodeByteStream.cpp

Go to the documentation of this file.
00001 //C-  -*- C++ -*-
00002 //C- -------------------------------------------------------------------
00003 //C- DjVuLibre-3.5
00004 //C- Copyright (c) 2002  Leon Bottou and Yann Le Cun.
00005 //C- Copyright (c) 2001  AT&T
00006 //C-
00007 //C- This software is subject to, and may be distributed under, the
00008 //C- GNU General Public License, Version 2. The license should have
00009 //C- accompanied the software or you may obtain a copy of the license
00010 //C- from the Free Software Foundation at http://www.fsf.org .
00011 //C-
00012 //C- This program is distributed in the hope that it will be useful,
00013 //C- but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 //C- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 //C- GNU General Public License for more details.
00016 //C- 
00017 //C- DjVuLibre-3.5 is derived from the DjVu(r) Reference Library
00018 //C- distributed by Lizardtech Software.  On July 19th 2002, Lizardtech 
00019 //C- Software authorized us to replace the original DjVu(r) Reference 
00020 //C- Library notice by the following text (see doc/lizard2002.djvu):
00021 //C-
00022 //C-  ------------------------------------------------------------------
00023 //C- | DjVu (r) Reference Library (v. 3.5)
00024 //C- | Copyright (c) 1999-2001 LizardTech, Inc. All Rights Reserved.
00025 //C- | The DjVu Reference Library is protected by U.S. Pat. No.
00026 //C- | 6,058,214 and patents pending.
00027 //C- |
00028 //C- | This software is subject to, and may be distributed under, the
00029 //C- | GNU General Public License, Version 2. The license should have
00030 //C- | accompanied the software or you may obtain a copy of the license
00031 //C- | from the Free Software Foundation at http://www.fsf.org .
00032 //C- |
00033 //C- | The computer code originally released by LizardTech under this
00034 //C- | license and unmodified by other parties is deemed "the LIZARDTECH
00035 //C- | ORIGINAL CODE."  Subject to any third party intellectual property
00036 //C- | claims, LizardTech grants recipient a worldwide, royalty-free, 
00037 //C- | non-exclusive license to make, use, sell, or otherwise dispose of 
00038 //C- | the LIZARDTECH ORIGINAL CODE or of programs derived from the 
00039 //C- | LIZARDTECH ORIGINAL CODE in compliance with the terms of the GNU 
00040 //C- | General Public License.   This grant only confers the right to 
00041 //C- | infringe patent claims underlying the LIZARDTECH ORIGINAL CODE to 
00042 //C- | the extent such infringement is reasonably necessary to enable 
00043 //C- | recipient to make, have made, practice, sell, or otherwise dispose 
00044 //C- | of the LIZARDTECH ORIGINAL CODE (or portions thereof) and not to 
00045 //C- | any greater extent that may be necessary to utilize further 
00046 //C- | modifications or combinations.
00047 //C- |
00048 //C- | The LIZARDTECH ORIGINAL CODE is provided "AS IS" WITHOUT WARRANTY
00049 //C- | OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
00050 //C- | TO ANY WARRANTY OF NON-INFRINGEMENT, OR ANY IMPLIED WARRANTY OF
00051 //C- | MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
00052 //C- +------------------------------------------------------------------
00053 // 
00054 // $Id: BSEncodeByteStream.cpp,v 1.8 2003/11/07 22:08:20 leonb Exp $
00055 // $Name: release_3_5_15 $
00056 
00057 #ifdef HAVE_CONFIG_H
00058 # include "config.h"
00059 #endif
00060 #if NEED_GNUG_PRAGMAS
00061 # pragma implementation
00062 #endif
00063 
00064 // - Author: Leon Bottou, 07/1998
00065 
00066 
00067 
00068 #include "BSByteStream.h"
00069 #include "GString.h"
00070 #undef BSORT_TIMER
00071 #ifdef BSORT_TIMER
00072 #include "GOS.h"
00073 #endif
00074 
00075 #include <stdlib.h>
00076 #include <stdio.h>
00077 #include <string.h>
00078 
00079 
00080 #ifdef HAVE_NAMESPACES
00081 namespace DJVU {
00082 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
00083 }
00084 #endif
00085 #endif
00086 
00087 
00088 // ========================================
00089 // --- Assertion
00090 
00091 #define ASSERT(expr) do{if(!(expr))G_THROW("assertion ("#expr") failed");}while(0)
00092 
00093 
00094 
00095 // ========================================
00096 // --- Global Definitions
00097             
00098 
00099 #ifdef OVERFLOW
00100 #undef OVERFLOW
00101 #endif
00102 // Overflow required when encoding
00103 static const int OVERFLOW=32;
00104 
00105 // Sorting tresholds
00106 static const int RANKSORT_THRESH=10;
00107 static const int QUICKSORT_STACK=512;
00108 static const int PRESORT_THRESH=10;
00109 static const int PRESORT_DEPTH=8;
00110 static const int RADIX_THRESH=32768;
00111 
00112 static const int FREQS0=100000;
00113 static const int FREQS1=1000000;
00114 
00115 // ========================================
00116 // -- Sorting Routines
00117 
00118   
00119 class _BSort  // DJVU_CLASS
00120 {
00121 public:
00122   ~_BSort();
00123   _BSort(unsigned char *data, int size);
00124   void run(int &markerpos);
00125 private:
00126   // Members
00127   int            size;
00128   unsigned char *data;
00129   unsigned int  *posn;
00130   GPBuffer<unsigned int> gposn;
00131   int           *rank;
00132   GPBuffer<int> grank;
00133   // Helpers
00134   inline int GT(int p1, int p2, int depth);
00135   inline int GTD(int p1, int p2, int depth);
00136   // -- final in-depth sort
00137   void ranksort(int lo, int hi, int d);
00138   // -- doubling sort
00139   int  pivot3r(int *rr, int lo, int hi);
00140   void quicksort3r(int lo, int hi, int d);
00141   // -- presort to depth PRESORT_DEPTH
00142   unsigned char pivot3d(unsigned char *dd, int lo, int hi);
00143   void quicksort3d(int lo, int hi, int d);
00144   // -- radixsort
00145   void radixsort16(void);
00146   void radixsort8(void);
00147 };
00148 
00149 
00150 // blocksort -- the main entry point
00151 
00152 static void 
00153 blocksort(unsigned char *data, int size, int &markerpos)
00154 {
00155   _BSort bsort(data, size);
00156   bsort.run(markerpos);
00157 }
00158 
00159 
00160 // _BSort construction
00161 
00162 _BSort::_BSort(unsigned char *xdata, int xsize)
00163   : size(xsize), data(xdata), gposn(posn,xsize), grank(rank,xsize+1)
00164 {
00165   ASSERT(size>0 && size<0x1000000);
00166   rank[size] = -1;
00167 }
00168 
00169 _BSort::~_BSort()
00170 {
00171 }
00172 
00173 
00174 
00175 // GT -- compare suffixes using rank information
00176 
00177 inline int 
00178 _BSort::GT(int p1, int p2, int depth)
00179 {
00180   int r1, r2;
00181   int twod = depth + depth;
00182   while (1)
00183     {
00184       r1=rank[p1+depth]; r2=rank[p2+depth];
00185       p1+=twod;  p2+=twod;
00186       if (r1!=r2) 
00187         return (r1>r2);
00188       r1=rank[p1]; r2=rank[p2];
00189       if (r1!=r2) 
00190         return (r1>r2);
00191       r1=rank[p1+depth]; r2=rank[p2+depth];
00192       p1+=twod;  p2+=twod;
00193       if (r1!=r2) 
00194         return (r1>r2);
00195       r1=rank[p1]; r2=rank[p2];
00196       if (r1!=r2) 
00197         return (r1>r2);
00198       r1=rank[p1+depth]; r2=rank[p2+depth];
00199       p1+=twod;  p2+=twod;
00200       if (r1!=r2) 
00201         return (r1>r2);
00202       r1=rank[p1]; r2=rank[p2];
00203       if (r1!=r2) 
00204         return (r1>r2);
00205       r1=rank[p1+depth]; r2=rank[p2+depth];
00206       p1+=twod;  p2+=twod;
00207       if (r1!=r2) 
00208         return (r1>r2);
00209       r1=rank[p1]; r2=rank[p2];
00210       if (r1!=r2) 
00211         return (r1>r2);
00212     };
00213 }
00214 
00215 
00216 // _BSort::ranksort -- 
00217 // -- a simple insertion sort based on GT
00218 
00219 void 
00220 _BSort::ranksort(int lo, int hi, int depth)
00221 {
00222   int i,j;
00223   for (i=lo+1; i<=hi; i++)
00224     {
00225       int tmp = posn[i];
00226       for(j=i-1; j>=lo && GT(posn[j], tmp, depth); j--)
00227         posn[j+1] = posn[j];
00228       posn[j+1] = tmp;
00229     }
00230   for(i=lo;i<=hi;i++) 
00231     rank[posn[i]]=i;
00232 }
00233 
00234 // pivot -- return suitable pivot
00235 
00236 inline int
00237 _BSort::pivot3r(int *rr, int lo, int hi)
00238 {
00239   int c1, c2, c3;
00240   if (hi-lo > 256)
00241     {
00242       c1 = pivot3r(rr, lo, (6*lo+2*hi)/8);
00243       c2 = pivot3r(rr, (5*lo+3*hi)/8, (3*lo+5*hi)/8);
00244       c3 = pivot3r(rr, (2*lo+6*hi)/8, hi);
00245     }
00246   else
00247     {
00248       c1 = rr[posn[lo]];
00249       c2 = rr[posn[(lo+hi)/2]];
00250       c3 = rr[posn[hi]];
00251     }
00252   // Extract median
00253   if (c1>c3)
00254     { int tmp=c1; c1=c3; c3=tmp; }
00255   if (c2<=c1)
00256     return c1;
00257   else if (c2>=c3)
00258     return c3;
00259   else
00260     return c2;
00261 }
00262 
00263 
00264 // _BSort::quicksort3r -- Three way quicksort algorithm 
00265 //    Sort suffixes based on rank at pos+depth
00266 //    The algorithm breaks into ranksort when size is 
00267 //    smaller than RANKSORT_THRESH
00268 
00269 static inline int
00270 mini(int a, int b) 
00271 {
00272   return (a<=b) ? a : b;
00273 }
00274 
00275 static inline void
00276 vswap(int i, int j, int n, unsigned int *x)
00277 {
00278   while (n-- > 0) 
00279     { int tmp = x[i]; x[i++]=x[j]; x[j++]=tmp; }
00280 }
00281 
00282 void 
00283 _BSort::quicksort3r(int lo, int hi, int depth)
00284 {
00285   /* Initialize stack */
00286   int slo[QUICKSORT_STACK];
00287   int shi[QUICKSORT_STACK];
00288   int sp = 1;
00289   slo[0] = lo;
00290   shi[0] = hi;
00291   // Recursion elimination loop
00292   while (--sp>=0)
00293     {
00294       lo = slo[sp];
00295       hi = shi[sp];
00296       // Test for insertion sort
00297       if (hi-lo<RANKSORT_THRESH)
00298         {
00299           ranksort(lo, hi, depth);
00300         }
00301       else
00302         {
00303           int tmp;
00304           int *rr=rank+depth;
00305           int med = pivot3r(rr,lo,hi);
00306           // -- positions are organized as follows:
00307           //   [lo..l1[ [l1..l[ ]h..h1] ]h1..hi]
00308           //      =        <       >        =
00309           int l1 = lo;
00310           int h1 = hi;
00311           while (rr[posn[l1]]==med && l1<h1) { l1++; }
00312           while (rr[posn[h1]]==med && l1<h1) { h1--; }
00313           int l = l1;
00314           int h = h1;
00315           // -- partition set
00316           for (;;)
00317             {
00318               while (l<=h)
00319                 {
00320                   int c = rr[posn[l]] - med;
00321                   if (c > 0) break;
00322                   if (c == 0) { tmp=posn[l]; posn[l]=posn[l1]; posn[l1++]=tmp; }
00323                   l++;
00324                 }
00325               while (l<=h)
00326                 {
00327                   int c = rr[posn[h]] - med;
00328                   if (c < 0) break;
00329                   if (c == 0) { tmp=posn[h]; posn[h]=posn[h1]; posn[h1--]=tmp; }
00330                   h--;
00331                 }
00332               if (l>h) break;
00333               tmp=posn[l]; posn[l]=posn[h]; posn[h]=tmp;
00334             }
00335           // -- reorganize as follows
00336           //   [lo..l1[ [l1..h1] ]h1..hi]
00337           //      <        =        > 
00338           tmp = mini(l1-lo, l-l1);
00339           vswap(lo, l-tmp, tmp, posn);
00340           l1 = lo + (l-l1);
00341           tmp = mini(hi-h1, h1-h);
00342           vswap(hi-tmp+1, h+1, tmp, posn);
00343           h1 = hi - (h1-h);
00344           // -- process segments
00345           ASSERT(sp+2<QUICKSORT_STACK);
00346           // ----- middle segment (=?) [l1, h1]
00347           for(int i=l1;i<=h1;i++) 
00348             rank[posn[i]] = h1;
00349           // ----- lower segment (<) [lo, l1[
00350           if (l1 > lo)
00351             {
00352               for(int i=lo;i<l1;i++) 
00353                 rank[posn[i]]=l1-1;
00354               slo[sp]=lo;
00355               shi[sp]=l1-1;
00356               if (slo[sp] < shi[sp])  
00357                 sp++;
00358             }
00359           // ----- upper segment (>) ]h1, hi]
00360           if (h1 < hi)
00361             {
00362               slo[sp]=h1+1;
00363               shi[sp]=hi;
00364               if (slo[sp] < shi[sp])  
00365                 sp++;
00366             }
00367         }
00368     }
00369 }
00370 
00371 
00372 
00373 
00374 
00375 
00376 // GTD -- compare suffixes using data information 
00377 //  (up to depth PRESORT_DEPTH)
00378 
00379 inline int 
00380 _BSort::GTD(int p1, int p2, int depth)
00381 {
00382   unsigned char c1, c2;
00383   p1+=depth; p2+=depth;
00384   while (depth < PRESORT_DEPTH)
00385     {
00386       // Perform two
00387       c1=data[p1]; c2=data[p2];
00388       if (c1!=c2) 
00389         return (c1>c2);
00390       c1=data[p1+1]; c2=data[p2+1];
00391       p1+=2;  p2+=2; depth+=2;
00392       if (c1!=c2) 
00393         return (c1>c2);
00394     }
00395   if (p1<size && p2<size)
00396     return 0;
00397   return (p1<p2);
00398 }
00399 
00400 // pivot3d -- return suitable pivot
00401 
00402 inline unsigned char
00403 _BSort::pivot3d(unsigned char *rr, int lo, int hi)
00404 {
00405   unsigned char c1, c2, c3;
00406   if (hi-lo > 256)
00407     {
00408       c1 = pivot3d(rr, lo, (6*lo+2*hi)/8);
00409       c2 = pivot3d(rr, (5*lo+3*hi)/8, (3*lo+5*hi)/8);
00410       c3 = pivot3d(rr, (2*lo+6*hi)/8, hi);
00411     }
00412   else
00413     {
00414       c1 = rr[posn[lo]];
00415       c2 = rr[posn[(lo+hi)/2]];
00416       c3 = rr[posn[hi]];
00417     }
00418   // Extract median
00419   if (c1>c3)
00420     { int tmp=c1; c1=c3; c3=tmp; }
00421   if (c2<=c1)
00422     return c1;
00423   else if (c2>=c3)
00424     return c3;
00425   else
00426     return c2;
00427 }
00428 
00429 
00430 // _BSort::quicksort3d -- Three way quicksort algorithm 
00431 //    Sort suffixes based on strings until reaching
00432 //    depth rank at pos+depth
00433 //    The algorithm breaks into ranksort when size is 
00434 //    smaller than PRESORT_THRESH
00435 
00436 void 
00437 _BSort::quicksort3d(int lo, int hi, int depth)
00438 {
00439   /* Initialize stack */
00440   int slo[QUICKSORT_STACK];
00441   int shi[QUICKSORT_STACK];
00442   int sd[QUICKSORT_STACK];
00443   int sp = 1;
00444   slo[0] = lo;
00445   shi[0] = hi;
00446   sd[0] = depth;
00447   // Recursion elimination loop
00448   while (--sp>=0)
00449     {
00450       lo = slo[sp];
00451       hi = shi[sp];
00452       depth = sd[sp];
00453       // Test for insertion sort
00454       if (depth >= PRESORT_DEPTH)
00455         {
00456           for (int i=lo; i<=hi; i++)
00457             rank[posn[i]] = hi;
00458         }
00459       else if (hi-lo<PRESORT_THRESH)
00460         {
00461           int i,j;
00462           for (i=lo+1; i<=hi; i++)
00463             {
00464               int tmp = posn[i];
00465               for(j=i-1; j>=lo && GTD(posn[j], tmp, depth); j--)
00466                 posn[j+1] = posn[j];
00467               posn[j+1] = tmp;
00468             }
00469           for(i=hi;i>=lo;i=j)
00470             {
00471               int tmp = posn[i];
00472               rank[tmp] = i;
00473               for (j=i-1; j>=lo && !GTD(tmp,posn[j],depth); j--)
00474                 rank[posn[j]] = i;
00475             }
00476         }
00477         else
00478         {
00479           int tmp;
00480           unsigned char *dd=data+depth;
00481           unsigned char med = pivot3d(dd,lo,hi);
00482           // -- positions are organized as follows:
00483           //   [lo..l1[ [l1..l[ ]h..h1] ]h1..hi]
00484           //      =        <       >        =
00485           int l1 = lo;
00486           int h1 = hi;
00487           while (dd[posn[l1]]==med && l1<h1) { l1++; }
00488           while (dd[posn[h1]]==med && l1<h1) { h1--; }
00489           int l = l1;
00490           int h = h1;
00491           // -- partition set
00492           for (;;)
00493             {
00494               while (l<=h)
00495                 {
00496                   int c = (int)dd[posn[l]] - (int)med;
00497                   if (c > 0) break;
00498                   if (c == 0) { tmp=posn[l]; posn[l]=posn[l1]; posn[l1++]=tmp; }
00499                   l++;
00500                 }
00501               while (l<=h)
00502                 {
00503                   int c = (int)dd[posn[h]] - (int)med;
00504                   if (c < 0) break;
00505                   if (c == 0) { tmp=posn[h]; posn[h]=posn[h1]; posn[h1--]=tmp; }
00506                   h--;
00507                 }
00508               if (l>h) break;
00509               tmp=posn[l]; posn[l]=posn[h]; posn[h]=tmp;
00510             }
00511           // -- reorganize as follows
00512           //   [lo..l1[ [l1..h1] ]h1..hi]
00513           //      <        =        > 
00514           tmp = mini(l1-lo, l-l1);
00515           vswap(lo, l-tmp, tmp, posn);
00516           l1 = lo + (l-l1);
00517           tmp = mini(hi-h1, h1-h);
00518           vswap(hi-tmp+1, h+1, tmp, posn);
00519           h1 = hi - (h1-h);
00520           // -- process segments
00521           ASSERT(sp+3<QUICKSORT_STACK);
00522           // ----- middle segment (=?) [l1, h1]
00523           l = l1; h = h1;
00524           if (med==0) // special case for marker [slow]
00525             for (int i=l; i<=h; i++)
00526               if ((int)posn[i]+depth == size-1)
00527                 { 
00528                   tmp=posn[i]; posn[i]=posn[l]; posn[l]=tmp; 
00529                   rank[tmp]=l++; break; 
00530                 }
00531           if (l<h)
00532             { slo[sp] = l; shi[sp] = h; sd[sp++] = depth+1; }
00533           else if (l==h)
00534             { rank[posn[h]] = h; }
00535           // ----- lower segment (<) [lo, l1[
00536           l = lo;
00537           h = l1-1;
00538           if (l<h)
00539             { slo[sp] = l; shi[sp] = h; sd[sp++] = depth; }
00540           else if (l==h)
00541             { rank[posn[h]] = h; }
00542           // ----- upper segment (>) ]h1, hi]
00543           l = h1+1;
00544           h = hi;
00545           if (l<h)
00546             { slo[sp] = l; shi[sp] = h; sd[sp++] = depth; }
00547           else if (l==h)
00548             { rank[posn[h]] = h; }
00549         }
00550     }
00551 }
00552 
00553 
00554 
00555 
00556 // _BSort::radixsort8 -- 8 bit radix sort
00557 
00558 void 
00559 _BSort::radixsort8(void)
00560 {
00561   int i;
00562   // Initialize frequency array
00563   int lo[256], hi[256];
00564   for (i=0; i<256; i++)
00565     hi[i] = lo[i] = 0;
00566   // Count occurences
00567   for (i=0; i<size-1; i++)
00568     hi[data[i]] ++;
00569   // Compute positions (lo)
00570   int last = 1;
00571   for (i=0; i<256; i++)
00572     {
00573       lo[i] = last;
00574       hi[i] = last + hi[i] - 1;
00575       last = hi[i] + 1;
00576     }
00577   for (i=0; i<size-1; i++)
00578     {
00579       posn[ lo[data[i]]++ ] = i;
00580       rank[ i ] = hi[data[i]];
00581     }
00582   // Process marker "$"
00583   posn[0] = size-1;
00584   rank[size-1] = 0;
00585   // Extra element
00586   rank[size] = -1;
00587 }
00588 
00589 
00590 // _BSort::radixsort16 -- 16 bit radix sort
00591 
00592 void 
00593 _BSort::radixsort16(void)
00594 {
00595   int i;
00596   // Initialize frequency array
00597   int *ftab;
00598   GPBuffer<int> gftab(ftab,65536);
00599   for (i=0; i<65536; i++)
00600     ftab[i] = 0;
00601   // Count occurences
00602   unsigned char c1 = data[0];
00603   for (i=0; i<size-1; i++)
00604     {
00605       unsigned char c2 = data[i+1];
00606       ftab[(c1<<8)|c2] ++;
00607       c1 = c2;
00608     }
00609   // Generate upper position
00610   for (i=1;i<65536;i++)
00611     ftab[i] += ftab[i-1];
00612   // Fill rank array with upper bound
00613   c1 = data[0];
00614   for (i=0; i<size-2; i++)
00615     {
00616       unsigned char c2 = data[i+1];
00617       rank[i] = ftab[(c1<<8)|c2];
00618       c1 = c2;
00619     }
00620   // Fill posn array (backwards)
00621   c1 = data[size-2];
00622   for (i=size-3; i>=0; i--)
00623     {
00624       unsigned char c2 = data[i];
00625       posn[ ftab[(c2<<8)|c1]-- ] = i;
00626       c1 = c2;
00627     }
00628   // Fixup marker stuff
00629   ASSERT(data[size-1]==0);
00630   c1 = data[size-2];
00631   posn[0] = size-1;
00632   posn[ ftab[(c1<<8)] ] = size-2;
00633   rank[size-1] = 0;
00634   rank[size-2] = ftab[(c1<<8)];
00635   // Extra element
00636   rank[size] = -1;
00637 }
00638 
00639 
00640 
00641 // _BSort::run -- main sort loop
00642 
00643 void
00644 _BSort::run(int &markerpos)
00645 {
00646   int lo, hi;
00647   ASSERT(size>0);
00648   ASSERT(data[size-1]==0);
00649 #ifdef BSORT_TIMER
00650   long start = GOS::ticks();
00651 #endif  
00652   // Step 1: Radix sort 
00653   int depth = 0;
00654   if (size > RADIX_THRESH)
00655     { 
00656       radixsort16();
00657       depth=2;
00658     }
00659   else
00660     { 
00661       radixsort8(); 
00662       depth=1;
00663     }
00664   // Step 2: Perform presort to depth PRESORT_DEPTH
00665   for (lo=0; lo<size; lo++)
00666     {
00667       hi = rank[posn[lo]];
00668       if (lo < hi)
00669         quicksort3d(lo, hi, depth);
00670       lo = hi;
00671     }
00672   depth = PRESORT_DEPTH;
00673 #ifdef BSORT_TIMER
00674   long middle = GOS::ticks();
00675 #endif  
00676   // Step 3: Perform rank doubling
00677   int again = 1;
00678   while (again)
00679     {
00680       again = 0;
00681       int sorted_lo = 0;
00682       for (lo=0; lo<size; lo++)
00683         {
00684           hi = rank[posn[lo]&0xffffff];
00685           if (lo == hi)
00686             {
00687               lo += (posn[lo]>>24) & 0xff;
00688             }
00689           else
00690             {
00691               if (hi-lo < RANKSORT_THRESH)
00692                 {
00693                   ranksort(lo, hi, depth);
00694                 }
00695               else
00696                 {
00697                   again += 1;
00698                   while (sorted_lo < lo-1)
00699                     {
00700                       int step = mini(255, lo-1-sorted_lo);
00701                       posn[sorted_lo] = (posn[sorted_lo]&0xffffff) | (step<<24);
00702                       sorted_lo += step+1;
00703                     }
00704                   quicksort3r(lo, hi, depth);
00705                   sorted_lo = hi + 1;
00706                 }
00707               lo = hi;
00708             }
00709         }
00710       // Finish threading
00711       while (sorted_lo < lo-1)
00712         {
00713           int step = mini(255, lo-1-sorted_lo);
00714           posn[sorted_lo] = (posn[sorted_lo]&0xffffff) | (step<<24);
00715           sorted_lo += step+1;
00716         }
00717       // Double depth
00718       depth += depth;
00719     }
00720   // Step 4: Permute data
00721   int i;
00722   markerpos = -1;
00723   for (i=0; i<size; i++)
00724     rank[i] = data[i];
00725   for (i=0; i<size; i++)
00726     {
00727       int j = posn[i] & 0xffffff;
00728       if (j>0) 
00729         { 
00730           data[i] = rank[j-1];
00731         } 
00732       else 
00733         {
00734           data[i] = 0;
00735           markerpos = i;
00736         }
00737     }
00738   ASSERT(markerpos>=0 && markerpos<size);
00739 #ifdef BSORT_TIMER
00740   long end = GOS::ticks();
00741   DjVuPrintErrorUTF8("Sorting time: %d bytes in %ld + %ld = %ld ms\n", 
00742           size-1, middle-start, end-middle, end-start);
00743 #endif  
00744 }
00745 
00746 
00747 // ========================================
00748 // -- Encoding
00749 
00750 static void
00751 encode_raw(ZPCodec &zp, int bits, int x)
00752 {
00753   int n = 1;
00754   int m = (1<<bits);
00755   while (n < m)
00756     {
00757       x = (x & (m-1)) << 1;
00758       int b = (x >> bits);
00759       zp.encoder(b);
00760       n = (n<<1) | b;
00761     }
00762 }
00763 
00764 static inline void
00765 encode_binary(ZPCodec &zp, BitContext *ctx, int bits, int x)
00766 {
00767   // Require 2^bits-1  contexts
00768   int n = 1;
00769   int m = (1<<bits);
00770   ctx = ctx - 1;
00771   while (n < m)
00772     {
00773       x = (x & (m-1)) << 1;
00774       int b = (x >> bits);
00775       zp.encoder(b, ctx[n]);
00776       n = (n<<1) | b;
00777     }
00778 }
00779 
00780 class BSByteStream::Encode : public BSByteStream
00781 {
00782 public:
00785   Encode(GP<ByteStream> bs);
00786   ~Encode();
00787   void init(const int encoding);
00788   // Virtual functions
00789   virtual size_t write(const void *buffer, size_t sz);
00790   virtual void flush(void);
00791 protected:
00792   unsigned int encode(void);
00793 };
00794 
00795 unsigned int
00796 BSByteStream::Encode::encode()
00797 { 
00800 
00801   int markerpos = size-1;
00802   blocksort(data,size,markerpos);
00803 
00806 
00807   // Header
00808   ZPCodec &zp=*gzp;
00809   encode_raw(zp, 24, size);
00810   // Determine and Encode Estimation Speed
00811   int fshift = 0;
00812   if (size < FREQS0)
00813     { fshift=0; zp.encoder(0); }
00814   else if (size < FREQS1)
00815     { fshift = 1; zp.encoder(1); zp.encoder(0); }
00816   else
00817     { fshift = 2; zp.encoder(1); zp.encoder(1); }
00818   // MTF
00819   unsigned char mtf[256];
00820   unsigned char rmtf[256];
00821   unsigned int  freq[FREQMAX];
00822   int m = 0;
00823   for (m=0; m<256; m++)
00824     mtf[m] = m;
00825   for (m=0; m<256; m++)
00826     rmtf[mtf[m]] = m;
00827   int fadd = 4;
00828   for (m=0; m<FREQMAX; m++)
00829     freq[m] = 0;
00830   // Encode
00831   int i;
00832   int mtfno = 3;
00833   for (i=0; i<size; i++)
00834     {
00835       // Get MTF data
00836       int c = data[i];
00837       int ctxid = CTXIDS-1;
00838       if (ctxid>mtfno) ctxid=mtfno;
00839       mtfno = rmtf[c];
00840       if (i==markerpos)
00841         mtfno = 256;
00842       // Encode using ZPCoder
00843       int b;
00844       BitContext *cx = ctx;
00845       b = (mtfno==0);
00846       zp.encoder(b, cx[ctxid]);
00847       if (b) goto rotate; cx+=CTXIDS;
00848       b = (mtfno==1);
00849       zp.encoder(b, cx[ctxid]);
00850       if (b) goto rotate; cx+=CTXIDS;
00851       b = (mtfno<4);
00852       zp.encoder(b, cx[0]);
00853       if (b) { encode_binary(zp,cx+1,1,mtfno-2); goto rotate; } 
00854       cx+=1+1;
00855       b = (mtfno<8);
00856       zp.encoder(b, cx[0]);
00857       if (b) { encode_binary(zp,cx+1,2,mtfno-4); goto rotate; } 
00858       cx+=1+3;
00859       b = (mtfno<16);
00860       zp.encoder(b, cx[0]);
00861       if (b) { encode_binary(zp,cx+1,3,mtfno-8); goto rotate; } 
00862       cx+=1+7;
00863       b = (mtfno<32);
00864       zp.encoder(b, cx[0]);
00865       if (b) { encode_binary(zp,cx+1,4,mtfno-16); goto rotate; } 
00866       cx+=1+15;
00867       b = (mtfno<64);
00868       zp.encoder(b, cx[0]);
00869       if (b) { encode_binary(zp,cx+1,5,mtfno-32); goto rotate; } 
00870       cx+=1+31;
00871       b = (mtfno<128);
00872       zp.encoder(b, cx[0]);
00873       if (b) { encode_binary(zp,cx+1,6,mtfno-64); goto rotate; } 
00874       cx+=1+63;
00875       b = (mtfno<256);
00876       zp.encoder(b, cx[0]);
00877       if (b) { encode_binary(zp,cx+1,7,mtfno-128); goto rotate; } 
00878       continue;
00879       // Rotate MTF according to empirical frequencies (new!)
00880     rotate:
00881       // Adjust frequencies for overflow
00882       fadd = fadd + (fadd>>fshift);
00883       if (fadd > 0x10000000) 
00884         {
00885           fadd = fadd>>24;
00886           freq[0] >>= 24;
00887           freq[1] >>= 24;
00888           freq[2] >>= 24;
00889           freq[3] >>= 24;
00890           for (int k=4; k<FREQMAX; k++)
00891             freq[k] = freq[k]>>24;
00892         }
00893       // Relocate new char according to new freq
00894       unsigned int fc = fadd;
00895       if (mtfno < FREQMAX)
00896         fc += freq[mtfno];
00897       int k;
00898       for (k=mtfno; k>=FREQMAX; k--)
00899         {
00900           mtf[k] = mtf[k-1];
00901           rmtf[mtf[k]] = k;
00902         }
00903       for (; k>0 && fc>=freq[k-1]; k--)
00904         {
00905           mtf[k] = mtf[k-1];
00906           freq[k] = freq[k-1];
00907           rmtf[mtf[k]] = k;
00908         }
00909       mtf[k] = c;
00910       freq[k] = fc;
00911       rmtf[mtf[k]] = k;
00912     }
00913   // Terminate
00914   return 0;
00915 }
00916 
00917 // ========================================
00918 // --- Construction
00919 
00920 BSByteStream::Encode::Encode(GP<ByteStream> xbs)
00921 : BSByteStream(xbs) {}
00922 
00923 void
00924 BSByteStream::Encode::init(const int xencoding)
00925 {
00926   gzp=ZPCodec::create(gbs,true,true);
00927   const int encoding=(xencoding<MINBLOCK)?MINBLOCK:xencoding;
00928   if (encoding > MAXBLOCK)
00929     G_THROW( ERR_MSG("ByteStream.blocksize") "\t" + GUTF8String(MAXBLOCK) );
00930   // Record block size
00931   blocksize = encoding * 1024;
00932   // Initialize context array
00933 }
00934 
00935 BSByteStream::Encode::~Encode()
00936 {
00937   // Flush
00938   flush();
00939   // Encode EOF marker
00940   encode_raw(*gzp, 24, 0);
00941   // Free allocated memory
00942 }
00943 
00944 GP<ByteStream>
00945 BSByteStream::create(GP<ByteStream> xbs,const int blocksize)
00946 {
00947   BSByteStream::Encode *rbs=new BSByteStream::Encode(xbs);
00948   GP<ByteStream> retval=rbs;
00949   rbs->init(blocksize);
00950   return retval;
00951 }
00952 
00953 // ========================================
00954 // -- ByteStream interface
00955 
00956 void 
00957 BSByteStream::Encode::flush()
00958 {
00959   if (bptr>0)
00960   {
00961     ASSERT(bptr<(int)blocksize);
00962     memset(data+bptr, 0, OVERFLOW);
00963     size = bptr+1;
00964     encode();
00965   }
00966   size = bptr = 0;
00967 }
00968 
00969 size_t 
00970 BSByteStream::Encode::write(const void *buffer, size_t sz)
00971 {
00972   // Trivial checks
00973   if (sz == 0)
00974     return 0;
00975   // Loop
00976   int copied = 0;
00977   while (sz > 0)
00978     {
00979       // Initialize
00980       if (!data) 
00981         {
00982           bptr = 0;
00983           gdata.resize(blocksize+OVERFLOW);
00984         }
00985       // Compute remaining
00986       int bytes = blocksize - 1 - bptr;
00987       if (bytes > (int)sz)
00988         bytes = sz;
00989       // Store date (todo: rle)
00990       memcpy(data+bptr, buffer, bytes);
00991       buffer = (void*)((char*)buffer + bytes);
00992       bptr += bytes;
00993       sz -= bytes;
00994       copied += bytes;
00995       offset += bytes;
00996       // Flush when needed
00997       if (bptr + 1 >= (int)blocksize)
00998         flush();
00999     }
01000   // return
01001   return copied;
01002 }
01003 
01004 
01005 #ifdef HAVE_NAMESPACES
01006 }
01007 # ifndef NOT_USING_DJVU_NAMESPACE
01008 using namespace DJVU;
01009 # endif
01010 #endif

kviewshell

Skip menu "kviewshell"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members

API Reference

Skip menu "API Reference"
  • kviewshell
Generated for API Reference by doxygen 1.5.9
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal