00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 #ifdef HAVE_CONFIG_H
00058 # include "config.h"
00059 #endif
00060 #if NEED_GNUG_PRAGMAS
00061 # pragma implementation
00062 #endif
00063
00064 #include "DjVuText.h"
00065 #include "IFFByteStream.h"
00066 #include "BSByteStream.h"
00067 #include "debug.h"
00068 #include <ctype.h>
00069
00070
00071
00072 #ifdef HAVE_NAMESPACES
00073 namespace DJVU {
00074 # ifdef NOT_DEFINED // Just to fool emacs c++ mode
00075 }
00076 #endif
00077 #endif
00078
00079
00080
00081 #ifdef min
00082 #undef min
00083 #endif
00084 template<class TYPE>
00085 static inline TYPE min(TYPE a,TYPE b) { return (a<b)?a:b; }
00086
00087
00088
00089
00090
00091 const char DjVuTXT::end_of_column = 013;
00092 const char DjVuTXT::end_of_region = 035;
00093 const char DjVuTXT::end_of_paragraph = 037;
00094 const char DjVuTXT::end_of_line = 012;
00095
00096 const int DjVuTXT::Zone::version = 1;
00097
00098 DjVuTXT::Zone::Zone()
00099 : ztype(DjVuTXT::PAGE), text_start(0), text_length(0), zone_parent(0)
00100 {
00101 }
00102
00103 DjVuTXT::Zone *
00104 DjVuTXT::Zone::append_child()
00105 {
00106 Zone empty;
00107 empty.ztype = ztype;
00108 empty.text_start = 0;
00109 empty.text_length = 0;
00110 empty.zone_parent=this;
00111 children.append(empty);
00112 return & children[children.lastpos()];
00113 }
00114
00115 void
00116 DjVuTXT::Zone::cleartext()
00117 {
00118 text_start = 0;
00119 text_length = 0;
00120 for (GPosition i=children; i; ++i)
00121 children[i].cleartext();
00122 }
00123
00124 void
00125 DjVuTXT::Zone::normtext(const char *instr, GUTF8String &outstr)
00126 {
00127 if (text_length == 0)
00128 {
00129
00130 text_start = outstr.length();
00131 for (GPosition i=children; i; ++i)
00132 children[i].normtext(instr, outstr);
00133 text_length = outstr.length() - text_start;
00134
00135 if (text_length == 0)
00136 return;
00137 }
00138 else
00139 {
00140
00141 int new_start = outstr.length();
00142 outstr = outstr + GUTF8String(instr+text_start, text_length);
00143 text_start = new_start;
00144
00145 for (GPosition i=children; i; ++i)
00146 children[i].cleartext();
00147 }
00148
00149 char sep;
00150 switch (ztype)
00151 {
00152 case COLUMN:
00153 sep = end_of_column; break;
00154 case REGION:
00155 sep = end_of_region; break;
00156 case PARAGRAPH:
00157 sep = end_of_paragraph; break;
00158 case LINE:
00159 sep = end_of_line; break;
00160 case WORD:
00161 sep = ' '; break;
00162 default:
00163 return;
00164 }
00165
00166 if (outstr[text_start+text_length-1] != sep)
00167 {
00168 outstr = outstr + GUTF8String(&sep, 1);
00169 text_length += 1;
00170 }
00171 }
00172
00173 unsigned int
00174 DjVuTXT::Zone::memuse() const
00175 {
00176 int memuse = sizeof(*this);
00177 for (GPosition i=children; i; ++i)
00178 memuse += children[i].memuse();
00179 return memuse;
00180 }
00181
00182
00183 #ifndef NEED_DECODER_ONLY
00184 void
00185 DjVuTXT::Zone::encode(
00186 const GP<ByteStream> &gbs, const Zone * parent, const Zone * prev) const
00187 {
00188 ByteStream &bs=*gbs;
00189
00190 bs.write8(ztype);
00191
00192
00193
00194 int start=text_start;
00195 int x=rect.xmin, y=rect.ymin;
00196 int width=rect.width(), height=rect.height();
00197 if (prev)
00198 {
00199 if (ztype==PAGE || ztype==PARAGRAPH || ztype==LINE)
00200 {
00201
00202
00203
00204 x=x-prev->rect.xmin;
00205 y=prev->rect.ymin-(y+height);
00206 } else
00207 {
00208
00209
00210
00211 x=x-prev->rect.xmax;
00212 y=y-prev->rect.ymin;
00213 }
00214 start-=prev->text_start+prev->text_length;
00215 } else if (parent)
00216 {
00217
00218
00219 x=x-parent->rect.xmin;
00220 y=parent->rect.ymax-(y+height);
00221 start-=parent->text_start;
00222 }
00223
00224 bs.write16(0x8000+x);
00225 bs.write16(0x8000+y);
00226 bs.write16(0x8000+width);
00227 bs.write16(0x8000+height);
00228
00229 bs.write16(0x8000+start);
00230 bs.write24(text_length);
00231
00232 bs.write24(children.size());
00233
00234 const Zone * prev_child=0;
00235
00236 for (GPosition i=children; i; ++i)
00237 {
00238 children[i].encode(gbs, this, prev_child);
00239 prev_child=&children[i];
00240 }
00241 }
00242 #endif
00243
00244 void
00245 DjVuTXT::Zone::decode(const GP<ByteStream> &gbs, int maxtext,
00246 const Zone * parent, const Zone * prev)
00247 {
00248 ByteStream &bs=*gbs;
00249
00250 ztype = (ZoneType) bs.read8();
00251 if ( ztype<PAGE || ztype>CHARACTER )
00252 G_THROW( ERR_MSG("DjVuText.corrupt_text") );
00253
00254
00255 int x=(int) bs.read16()-0x8000;
00256 int y=(int) bs.read16()-0x8000;
00257 int width=(int) bs.read16()-0x8000;
00258 int height=(int) bs.read16()-0x8000;
00259
00260
00261 text_start = (int) bs.read16()-0x8000;
00262
00263 text_length = bs.read24();
00264 if (prev)
00265 {
00266 if (ztype==PAGE || ztype==PARAGRAPH || ztype==LINE)
00267 {
00268 x=x+prev->rect.xmin;
00269 y=prev->rect.ymin-(y+height);
00270 } else
00271 {
00272 x=x+prev->rect.xmax;
00273 y=y+prev->rect.ymin;
00274 }
00275 text_start+=prev->text_start+prev->text_length;
00276 } else if (parent)
00277 {
00278 x=x+parent->rect.xmin;
00279 y=parent->rect.ymax-(y+height);
00280 text_start+=parent->text_start;
00281 }
00282 rect=GRect(x, y, width, height);
00283
00284 int size = bs.read24();
00285
00286
00287 if (rect.isempty() || text_start<0 || text_start+text_length>maxtext )
00288 G_THROW( ERR_MSG("DjVuText.corrupt_text") );
00289
00290
00291 const Zone * prev_child=0;
00292 children.empty();
00293 while (size-- > 0)
00294 {
00295 Zone *z = append_child();
00296 z->decode(gbs, maxtext, this, prev_child);
00297 prev_child=z;
00298 }
00299 }
00300
00301 void
00302 DjVuTXT::normalize_text()
00303 {
00304 GUTF8String newtextUTF8;
00305 page_zone.normtext( (const char*)textUTF8, newtextUTF8 );
00306 textUTF8 = newtextUTF8;
00307 }
00308
00309 int
00310 DjVuTXT::has_valid_zones() const
00311 {
00312 if (!textUTF8)
00313 return false;
00314 if (page_zone.children.isempty() || page_zone.rect.isempty())
00315 return false;
00316 return true;
00317 }
00318
00319
00320 #ifndef NEED_DECODER_ONLY
00321 void
00322 DjVuTXT::encode(const GP<ByteStream> &gbs) const
00323 {
00324 ByteStream &bs=*gbs;
00325 if (! textUTF8 )
00326 G_THROW( ERR_MSG("DjVuText.no_text") );
00327
00328 int textsize = textUTF8.length();
00329 bs.write24( textsize );
00330 bs.writall( (void*)(const char*)textUTF8, textsize );
00331
00332 if (has_valid_zones())
00333 {
00334 bs.write8(Zone::version);
00335 page_zone.encode(gbs);
00336 }
00337 }
00338 #endif
00339
00340 void
00341 DjVuTXT::decode(const GP<ByteStream> &gbs)
00342 {
00343 ByteStream &bs=*gbs;
00344
00345 textUTF8.empty();
00346 int textsize = bs.read24();
00347 char *buffer = textUTF8.getbuf(textsize);
00348 int readsize = bs.read(buffer,textsize);
00349 buffer[readsize] = 0;
00350 if (readsize < textsize)
00351 G_THROW( ERR_MSG("DjVuText.corrupt_chunk") );
00352
00353 unsigned char version;
00354 if ( bs.read( (void*) &version, 1 ) == 1)
00355 {
00356 if (version != Zone::version)
00357 G_THROW( ERR_MSG("DjVuText.bad_version") "\t" + GUTF8String(version) );
00358 page_zone.decode(gbs, textsize);
00359 }
00360 }
00361
00362 GP<DjVuTXT>
00363 DjVuTXT::copy(void) const
00364 {
00365 return new DjVuTXT(*this);
00366 }
00367
00368
00369 static inline bool
00370 intersects_zone(GRect box, const GRect &zone)
00371 {
00372 return
00373 ((box.xmin < zone.xmin)
00374 ?(box.xmax >= zone.xmin)
00375 :(box.xmin <= zone.xmax))
00376 &&((box.ymin < zone.ymin)
00377 ?(box.ymax >= zone.ymin)
00378 :(box.ymin <= zone.ymax));
00379 }
00380
00381 void
00382 DjVuTXT::Zone::get_text_with_rect(const GRect &box,
00383 int &string_start, int &string_end) const
00384 {
00385 GPosition pos=children;
00386 if(pos?box.contains(rect):intersects_zone(box,rect))
00387 {
00388 const int text_end=text_start+text_length;
00389 if(string_start == string_end)
00390 {
00391 string_start=text_start;
00392 string_end=text_end;
00393 }else
00394 {
00395 if (string_end < text_end)
00396 string_end=text_end;
00397 if(text_start < string_start)
00398 string_start=text_start;
00399 }
00400 }else if(pos&&intersects_zone(box,rect))
00401 {
00402 do
00403 {
00404 children[pos].get_text_with_rect(box,string_start,string_end);
00405 } while(++pos);
00406 }
00407 }
00408
00409 void
00410 DjVuTXT::Zone::find_zones(GList<Zone *> &list,
00411 const int string_start, const int string_end) const
00412 {
00413 const int text_end=text_start+text_length;
00414 if(text_start >= string_start)
00415 {
00416 if(text_end <= string_end)
00417 {
00418 list.append(const_cast<Zone *>(this));
00419 }
00420 else if(text_start < string_end)
00421 {
00422 if (children.size())
00423 for (GPosition pos=children; pos; ++pos)
00424 children[pos].find_zones(list,string_start,string_end);
00425 else
00426 list.append(const_cast<Zone *>(this));
00427 }
00428 }
00429 else if( text_end > string_start)
00430 {
00431 if (children.size())
00432 for (GPosition pos=children; pos; ++pos)
00433 children[pos].find_zones(list,string_start,string_end);
00434 else
00435 list.append(const_cast<Zone *>(this));
00436 }
00437 }
00438
00439 void
00440 DjVuTXT::Zone::get_smallest(GList<GRect> &list) const
00441 {
00442 GPosition pos=children;
00443 if(pos)
00444 {
00445 do {
00446 children[pos].get_smallest(list);
00447 } while (++pos);
00448 }
00449 else
00450 {
00451 list.append(rect);
00452 }
00453 }
00454
00455 void
00456 DjVuTXT::Zone::get_smallest(GList<GRect> &list, const int padding) const
00457 {
00458 GPosition pos=children;
00459 if(pos)
00460 {
00461 do {
00462 children[pos].get_smallest(list,padding);
00463 } while (++pos);
00464 }
00465 else if(zone_parent && zone_parent->ztype >= PARAGRAPH)
00466 {
00467 const GRect &xrect=zone_parent->rect;
00468 if(xrect.height() < xrect.width())
00469 {
00470 list.append(GRect(rect.xmin-padding,xrect.ymin-padding,rect.width()
00471 +2*padding,xrect.height()+2*padding));
00472 }
00473 else
00474 {
00475 list.append(GRect(xrect.xmin-padding,rect.ymin-padding,xrect.width()
00476 +2*padding,rect.height()+2*padding));
00477 }
00478 }
00479 else
00480 {
00481 list.append(GRect(rect.xmin-padding,rect.ymin-padding,rect.width()
00482 +2*padding,rect.height()+2*padding));
00483 }
00484 }
00485
00486 void
00487 DjVuTXT::get_zones(int zone_type, const Zone *parent,
00488 GList<Zone *> & zone_list) const
00489
00490 {
00491
00492 const Zone *zone=parent;
00493 for( int cur_ztype=zone->ztype; cur_ztype<zone_type; ++cur_ztype )
00494 {
00495 GPosition pos;
00496 for(pos=zone->children; pos; ++pos)
00497 {
00498 Zone *zcur=(Zone *)&zone->children[pos];
00499 if ( zcur->ztype == zone_type )
00500 {
00501 GPosition zpos=zone_list;
00502 if ( !zone_list.search(zcur,zpos) )
00503 zone_list.append(zcur);
00504 }
00505 else if ( zone->children[pos].ztype < zone_type )
00506 get_zones(zone_type, &zone->children[pos], zone_list);
00507 }
00508 }
00509 }
00510
00511 GList<GRect>
00512 DjVuTXT::find_text_with_rect(const GRect &box, GUTF8String &text,
00513 const int padding) const
00514 {
00515 GList<GRect> retval;
00516 int text_start=0;
00517 int text_end=0;
00518 page_zone.get_text_with_rect(box,text_start,text_end);
00519 if(text_start != text_end)
00520 {
00521 GList<Zone *> zones;
00522 page_zone.find_zones(zones,text_start,text_end);
00523 GPosition pos=zones;
00524 if(pos)
00525 {
00526 do
00527 {
00528 if(padding >= 0)
00529 {
00530 zones[pos]->get_smallest(retval,padding);
00531 }else
00532 {
00533 zones[pos]->get_smallest(retval);
00534 }
00535 } while(++pos);
00536 }
00537 }
00538 text=textUTF8.substr(text_start,text_end-text_start);
00539 return retval;
00540 }
00541
00542
00543 GList<DjVuTXT::Zone *>
00544 DjVuTXT::find_text_in_rect(GRect target_rect, GUTF8String &text) const
00545
00546 {
00547 GList<Zone *> zone_list;
00548 GList<Zone *> lines;
00549
00550 get_zones((int)PARAGRAPH, &page_zone, zone_list);
00551
00552
00553
00554
00555 if (zone_list.isempty())
00556 {
00557 get_zones((int)LINE, &page_zone, zone_list);
00558 GPosition pos;
00559 for(pos=zone_list; pos; ++pos)
00560 {
00561 GRect rect=zone_list[pos]->rect;
00562 int h0=rect.height()/2;
00563 if(rect.intersect(rect,target_rect) && rect.height()>h0)
00564 lines.append(zone_list[pos]);
00565 }
00566 } else
00567 {
00568 GPosition pos, pos_sel=zone_list;
00569 float ar=0;
00570 for(pos=zone_list; pos; ++pos)
00571 {
00572 GRect rect=zone_list[pos]->rect;
00573 int area=rect.area();
00574 if (rect.intersect(rect, target_rect))
00575 {
00576 float ftmp=rect.area()/(float)area;
00577 if ( !ar || ar<ftmp )
00578 {
00579 ar=ftmp;
00580 pos_sel=pos;
00581 }
00582 }
00583 }
00584 Zone *parag = 0;
00585 if ( ar>0 ) parag=zone_list[pos_sel];
00586 zone_list.empty();
00587 if ( ar>0 )
00588 {
00589 get_zones((int)LINE, parag, zone_list);
00590 if ( !zone_list.isempty() )
00591 {
00592 for(GPosition pos=zone_list; pos; ++pos)
00593 {
00594 GRect rect=zone_list[pos]->rect;
00595 int h0=rect.height()/2;
00596 if(rect.intersect(rect,target_rect) && rect.height()>h0)
00597 lines.append(zone_list[pos]);
00598 }
00599 }
00600 }
00601 }
00602
00603 zone_list.empty();
00604 if (!lines.isempty())
00605 {
00606 int i=1, lsize=lines.size();
00607
00608 GList<Zone *> words;
00609 for (GPosition pos=lines; pos; ++pos, ++i)
00610 {
00611 words.empty();
00612 get_zones((int)WORD, lines[pos], words);
00613
00614 if ( lsize==1 )
00615 {
00616 for(GPosition p=words;p;++p)
00617 {
00618 GRect rect=words[p]->rect;
00619 if(rect.intersect(rect,target_rect))
00620
00621 zone_list.append(words[p]);
00622 }
00623 } else
00624 {
00625 if (i==1)
00626 {
00627 bool start=true;
00628 for(GPosition p=words; p; ++p)
00629 {
00630 if ( start )
00631 {
00632 GRect rect=words[p]->rect;
00633 if(rect.intersect(rect,target_rect))
00634
00635 {
00636 start=false;
00637 zone_list.append(words[p]);
00638 }
00639 } else
00640 zone_list.append(words[p]);
00641 }
00642 } else if (i==lsize)
00643 {
00644 bool end=true;
00645 for(GPosition p=words.lastpos();p;--p)
00646 {
00647 if ( end )
00648 {
00649 GRect rect=words[p]->rect;
00650 if(rect.intersect(rect,target_rect))
00651
00652 {
00653 end=false;
00654 zone_list.append(words[p]);
00655 }
00656 } else
00657 zone_list.append(words[p]);
00658 }
00659 }
00660
00661 if (i!=1 && i!=lsize )
00662 {
00663 for(GPosition p=words;p;++p)
00664 zone_list.append(words[p]);
00665 }
00666 }
00667 }
00668 }
00669
00670 return zone_list;
00671 }
00672
00673 unsigned int
00674 DjVuTXT::get_memory_usage() const
00675 {
00676 return sizeof(*this) + textUTF8.length() + page_zone.memuse() - sizeof(page_zone);
00677 }
00678
00679
00680
00681
00682
00683
00684
00685 void
00686 DjVuText::decode(const GP<ByteStream> &gbs)
00687 {
00688 GUTF8String chkid;
00689 GP<IFFByteStream> giff=IFFByteStream::create(gbs);
00690 IFFByteStream &iff=*giff;
00691 while( iff.get_chunk(chkid) )
00692 {
00693 if (chkid == "TXTa")
00694 {
00695 if (txt)
00696 G_THROW( ERR_MSG("DjVuText.dupl_text") );
00697 txt = DjVuTXT::create();
00698 txt->decode(iff.get_bytestream());
00699 }
00700 else if (chkid == "TXTz")
00701 {
00702 if (txt)
00703 G_THROW( ERR_MSG("DjVuText.dupl_text") );
00704 txt = DjVuTXT::create();
00705 const GP<ByteStream> gbsiff=BSByteStream::create(iff.get_bytestream());
00706 txt->decode(gbsiff);
00707 }
00708
00709 iff.close_chunk();
00710 }
00711 }
00712
00713 void
00714 DjVuText::encode(const GP<ByteStream> &gbs)
00715 {
00716 if (txt)
00717 {
00718 const GP<IFFByteStream> giff=IFFByteStream::create(gbs);
00719 IFFByteStream &iff=*giff;
00720 iff.put_chunk("TXTz");
00721 {
00722 GP<ByteStream> gbsiff=BSByteStream::create(iff.get_bytestream(),50);
00723 txt->encode(gbsiff);
00724 }
00725 iff.close_chunk();
00726 }
00727
00728 }
00729
00730
00731 GP<DjVuText>
00732 DjVuText::copy(void) const
00733 {
00734 GP<DjVuText> text= new DjVuText;
00735
00736 *text=*this;
00737
00738 if (txt)
00739 text->txt = txt->copy();
00740 return text;
00741 }
00742
00743 static GUTF8String
00744 indent ( int spaces)
00745 {
00746 GUTF8String ret;
00747 for( int i = 0 ; i < spaces ; i++ )
00748 ret += ' ';
00749 return ret;
00750 }
00751
00752 static const char *tags[8]=
00753 { 0,
00754 "HIDDENTEXT",
00755 "PAGECOLUMN",
00756 "REGION",
00757 "PARAGRAPH",
00758 "LINE",
00759 "WORD",
00760 "CHARACTER" };
00761 static const int tags_size=sizeof(tags)/sizeof(const char *);
00762
00763 static GUTF8String
00764 start_tag(const DjVuTXT::ZoneType zone)
00765 {
00766 GUTF8String retval;
00767 if((tags_size > (int)zone)&&((int)zone > 0))
00768 {
00769 switch (zone)
00770 {
00771 case DjVuTXT::CHARACTER:
00772 retval="<"+GUTF8String(tags[zone])+">";
00773 break;
00774 case DjVuTXT::WORD:
00775 retval=indent(2*(int)zone+2)+"<"+tags[zone]+">";
00776 break;
00777 default:
00778 retval=indent(2*(int)zone+2)+"<"+tags[zone]+">\n";
00779 break;
00780 }
00781 }
00782 return retval;
00783 }
00784
00785 static GUTF8String
00786 start_tag(const DjVuTXT::ZoneType zone, const GUTF8String &attributes)
00787 {
00788 GUTF8String retval;
00789 if((tags_size > (int)zone)&&((int)zone > 0))
00790 {
00791 switch (zone)
00792 {
00793 case DjVuTXT::CHARACTER:
00794 retval="<"+GUTF8String(tags[zone])+" "+attributes+">";
00795 break;
00796 case DjVuTXT::WORD:
00797 retval=indent(2*(int)zone+2)+"<"+tags[zone]+" "+attributes+">";
00798 break;
00799 default:
00800 retval=indent(2*(int)zone+2)+"<"+tags[zone]+" "+attributes+">\n";
00801 break;
00802 }
00803 }
00804 return retval;
00805 }
00806
00807 static inline GUTF8String
00808 start_tag(const int layer)
00809 {
00810 return start_tag((const DjVuTXT::ZoneType)layer);
00811 }
00812
00813
00814 static GUTF8String
00815 end_tag(const DjVuTXT::ZoneType zone)
00816 {
00817 GUTF8String retval;
00818 if((tags_size > (int)zone)&&((int)zone >= 0))
00819 {
00820 switch (zone)
00821 {
00822 case DjVuTXT::CHARACTER:
00823 retval="</"+GUTF8String(tags[zone])+">";
00824 break;
00825 case DjVuTXT::WORD:
00826 retval="</"+GUTF8String(tags[zone])+">\n";
00827 break;
00828 default:
00829 retval=indent(2*(int)zone+2)+"</"+tags[zone]+">\n";
00830 break;
00831 }
00832 }
00833 return retval;
00834 }
00835
00836 static inline GUTF8String
00837 end_tag(const int layer)
00838 {
00839 return end_tag((const DjVuTXT::ZoneType)layer);
00840 }
00841
00842 static GUTF8String
00843 tolayer(int &layer, const DjVuTXT::ZoneType next_layer)
00844 {
00845 GUTF8String retval;
00846 for( ;layer < (int)next_layer;layer++ )
00847 {
00848 retval+=start_tag(layer);
00849 }
00850 while (layer > (int)next_layer )
00851 {
00852 retval+=end_tag(--layer);
00853 }
00854 return retval;
00855 }
00856
00857 static void
00858 writeText( ByteStream & str_out,
00859 const GUTF8String &textUTF8,
00860 const DjVuTXT::Zone &zone,
00861 const int WindowHeight );
00862
00863 static void
00864 writeText( ByteStream & str_out,
00865 const GUTF8String &textUTF8,
00866 const DjVuTXT::ZoneType zlayer,
00867 const GList<DjVuTXT::Zone> &children,
00868 const int WindowHeight )
00869 {
00870
00871
00872
00873
00874 int layer=(int)zlayer;
00875
00876 for(GPosition pos=children ; pos ; ++pos )
00877 {
00878 str_out.writestring(tolayer(layer,children[pos].ztype));
00879 writeText( str_out,
00880 textUTF8,
00881 children[pos],
00882 WindowHeight );
00883 }
00884 str_out.writestring(tolayer(layer,zlayer));
00885 }
00886
00887 static void
00888 writeText( ByteStream & str_out,
00889 const GUTF8String &textUTF8,
00890 const DjVuTXT::Zone &zone,
00891 const int WindowHeight )
00892 {
00893
00894
00895 const GUTF8String xindent(indent( 2 * zone.ztype + 2 ));
00896 GPosition pos=zone.children;
00897
00898 if( ! pos )
00899 {
00900 GUTF8String coords;
00901 coords.format("coords=\"%d,%d,%d,%d\"",
00902 zone.rect.xmin, WindowHeight - 1 - zone.rect.ymin,
00903 zone.rect.xmax, WindowHeight - 1 - zone.rect.ymax);
00904 const int start=zone.text_start;
00905 const int end=textUTF8.firstEndSpace(start,zone.text_length);
00906 str_out.writestring(start_tag(zone.ztype,coords));
00907 str_out.writestring(textUTF8.substr(start,end-start).toEscaped());
00908 str_out.writestring(end_tag(zone.ztype));
00909 } else
00910 {
00911 writeText(str_out,textUTF8,zone.ztype,zone.children,WindowHeight);
00912 }
00913 }
00914
00915 void
00916 DjVuTXT::writeText(ByteStream &str_out,const int height) const
00917 {
00918 if(has_valid_zones())
00919 {
00920 ::writeText(str_out,textUTF8,DjVuTXT::PAGE,page_zone.children,height);
00921 }else
00922 {
00923 str_out.writestring(start_tag(DjVuTXT::PAGE));
00924 str_out.writestring(end_tag(DjVuTXT::PAGE));
00925 }
00926 }
00927
00928 void
00929 DjVuText::writeText(ByteStream &str_out,const int height) const
00930 {
00931 if(txt)
00932 {
00933 txt->writeText(str_out,height);
00934 }else
00935 {
00936 str_out.writestring("<"+GUTF8String(tags[DjVuTXT::PAGE])+"/>\n");
00937 }
00938
00939 }
00940 GUTF8String
00941 DjVuTXT::get_xmlText(const int height) const
00942 {
00943 GP<ByteStream> gbs(ByteStream::create());
00944 ByteStream &bs=*gbs;
00945 writeText(bs,height);
00946 bs.seek(0L);
00947 return bs.getAsUTF8();
00948 }
00949
00950 GUTF8String
00951 DjVuText::get_xmlText(const int height) const
00952 {
00953 GUTF8String retval;
00954 if(txt)
00955 {
00956 retval=txt->get_xmlText(height);
00957 }else
00958 {
00959 retval="<"+GUTF8String(tags[DjVuTXT::PAGE])+"/>\n";
00960 }
00961 return retval;
00962 }
00963
00964
00965 #ifdef HAVE_NAMESPACES
00966 }
00967 # ifndef NOT_USING_DJVU_NAMESPACE
00968 using namespace DJVU;
00969 # endif
00970 #endif
00971