• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdegraphics API Reference
  • KDE Home
  • Contact Us
 

okular

  • sources
  • kde-4.14
  • kdegraphics
  • okular
  • core
textpage.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2005 by Piotr Szymanski <niedakh@gmail.com> *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 2 of the License, or *
7  * (at your option) any later version. *
8  ***************************************************************************/
9 
10 #include "textpage.h"
11 #include "textpage_p.h"
12 
13 #include <kdebug.h>
14 
15 #include "area.h"
16 #include "debug_p.h"
17 #include "misc.h"
18 #include "page.h"
19 #include "page_p.h"
20 
21 #include <cstring>
22 
23 #include <QtAlgorithms>
24 #include <QVarLengthArray>
25 
26 using namespace Okular;
27 
28 class SearchPoint
29 {
30  public:
31  SearchPoint()
32  : offset_begin( -1 ), offset_end( -1 )
33  {
34  }
35 
37  TextList::ConstIterator it_begin;
38 
40  TextList::ConstIterator it_end;
41 
45  int offset_begin;
46 
50  int offset_end;
51 };
52 
53 /* text comparison functions */
54 
55 static bool CaseInsensitiveCmpFn( const QStringRef & from, const QStringRef & to )
56 {
57  return from.compare( to, Qt::CaseInsensitive ) == 0;
58 }
59 
60 static bool CaseSensitiveCmpFn( const QStringRef & from, const QStringRef & to )
61 {
62  return from.compare( to, Qt::CaseSensitive ) == 0;
63 }
64 
65 
72 static bool segmentsOverlap(double left1, double right1, double left2, double right2, int threshold)
73 {
74  // check if one consumes another fully (speed optimization)
75 
76  if (left1 <= left2 && right1 >= right2)
77  return true;
78 
79  if (left1 >= left2 && right1 <= right2)
80  return true;
81 
82  // check if there is overlap above threshold
83  if (right2 >= left1 && right1 >= left2)
84  {
85  double overlap = (right2 >= right1) ? right1 - left2
86  : right2 - left1;
87 
88  double length1 = right1 - left1,
89  length2 = right2 - left2;
90 
91  return overlap * 100 >= threshold * qMin(length1, length2);
92  }
93 
94  return false;
95 }
96 
97 static bool doesConsumeY(const QRect& first, const QRect& second, int threshold)
98 {
99  return segmentsOverlap(first.top(), first.bottom(), second.top(), second.bottom(), threshold);
100 }
101 
102 static bool doesConsumeY(const NormalizedRect& first, const NormalizedRect& second, int threshold)
103 {
104  return segmentsOverlap(first.top, first.bottom, second.top, second.bottom, threshold);
105 }
106 
107 
108 /*
109  Rationale behind TinyTextEntity:
110 
111  instead of storing directly a QString for the text of an entity,
112  we store the UTF-16 data and their length. This way, we save about
113  4 int's wrt a QString, and we can create a new string from that
114  raw data (that's the only penalty of that).
115  Even better, if the string we need to store has at most
116  MaxStaticChars characters, then we store those in place of the QChar*
117  that would be used (with new[] + free[]) for the data.
118  */
119 class TinyTextEntity
120 {
121  static const int MaxStaticChars = sizeof( QChar * ) / sizeof( QChar );
122 
123  public:
124  TinyTextEntity( const QString &text, const NormalizedRect &rect )
125  : area( rect )
126  {
127  Q_ASSERT_X( !text.isEmpty(), "TinyTextEntity", "empty string" );
128  Q_ASSERT_X( sizeof( d ) == sizeof( QChar * ), "TinyTextEntity",
129  "internal storage is wider than QChar*, fix it!" );
130  length = text.length();
131  switch ( length )
132  {
133 #if QT_POINTER_SIZE >= 8
134  case 4:
135  d.qc[3] = text.at( 3 ).unicode();
136  // fall through
137  case 3:
138  d.qc[2] = text.at( 2 ).unicode();
139  // fall through
140 #endif
141  case 2:
142  d.qc[1] = text.at( 1 ).unicode();
143  // fall through
144  case 1:
145  d.qc[0] = text.at( 0 ).unicode();
146  break;
147  default:
148  d.data = new QChar[ length ];
149  std::memcpy( d.data, text.constData(), length * sizeof( QChar ) );
150  }
151  }
152 
153  ~TinyTextEntity()
154  {
155  if ( length > MaxStaticChars )
156  {
157  delete [] d.data;
158  }
159  }
160 
161  inline QString text() const
162  {
163  return length <= MaxStaticChars ? QString::fromRawData( ( const QChar * )&d.qc[0], length )
164  : QString::fromRawData( d.data, length );
165  }
166 
167  inline NormalizedRect transformedArea( const QTransform &matrix ) const
168  {
169  NormalizedRect transformed_area = area;
170  transformed_area.transform( matrix );
171  return transformed_area;
172  }
173 
174  NormalizedRect area;
175 
176  private:
177  Q_DISABLE_COPY( TinyTextEntity )
178 
179  union
180  {
181  QChar *data;
182  ushort qc[MaxStaticChars];
183  } d;
184  int length;
185 };
186 
187 
188 TextEntity::TextEntity( const QString &text, NormalizedRect *area )
189  : m_text( text ), m_area( area ), d( 0 )
190 {
191 }
192 
193 TextEntity::~TextEntity()
194 {
195  delete m_area;
196 }
197 
198 QString TextEntity::text() const
199 {
200  return m_text;
201 }
202 
203 NormalizedRect* TextEntity::area() const
204 {
205  return m_area;
206 }
207 
208 NormalizedRect TextEntity::transformedArea(const QTransform &matrix) const
209 {
210  NormalizedRect transformed_area = *m_area;
211  transformed_area.transform( matrix );
212  return transformed_area;
213 }
214 
215 
216 TextPagePrivate::TextPagePrivate()
217  : m_page( 0 )
218 {
219 }
220 
221 TextPagePrivate::~TextPagePrivate()
222 {
223  qDeleteAll( m_searchPoints );
224  qDeleteAll( m_words );
225 }
226 
227 
228 TextPage::TextPage()
229  : d( new TextPagePrivate() )
230 {
231 }
232 
233 TextPage::TextPage( const TextEntity::List &words )
234  : d( new TextPagePrivate() )
235 {
236  TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd();
237  for ( ; it != itEnd; ++it )
238  {
239  TextEntity *e = *it;
240  if ( !e->text().isEmpty() )
241  d->m_words.append( new TinyTextEntity( e->text(), *e->area() ) );
242  delete e;
243  }
244 }
245 
246 TextPage::~TextPage()
247 {
248  delete d;
249 }
250 
251 void TextPage::append( const QString &text, NormalizedRect *area )
252 {
253  if ( !text.isEmpty() )
254  d->m_words.append( new TinyTextEntity( text.normalized(QString::NormalizationForm_KC), *area ) );
255  delete area;
256 }
257 
258 struct WordWithCharacters
259 {
260  WordWithCharacters(TinyTextEntity *w, const TextList &c)
261  : word(w), characters(c)
262  {
263  }
264 
265  inline QString text() const
266  {
267  return word->text();
268  }
269 
270  inline const NormalizedRect &area() const
271  {
272  return word->area;
273  }
274 
275  TinyTextEntity *word;
276  TextList characters;
277 };
278 typedef QList<WordWithCharacters> WordsWithCharacters;
279 
285 class RegionText
286 {
287 
288 public:
289  RegionText()
290  {
291  };
292 
293  RegionText(const WordsWithCharacters &wordsWithCharacters, const QRect &area)
294  : m_region_wordWithCharacters(wordsWithCharacters), m_area(area)
295  {
296  }
297 
298  inline QString string() const
299  {
300  QString res;
301  foreach(const WordWithCharacters &word, m_region_wordWithCharacters)
302  res += word.text();
303  return res;
304  }
305 
306  inline WordsWithCharacters text() const
307  {
308  return m_region_wordWithCharacters;
309  }
310 
311  inline QRect area() const
312  {
313  return m_area;
314  }
315 
316  inline void setArea(const QRect &area)
317  {
318  m_area = area;
319  }
320 
321  inline void setText(const WordsWithCharacters &wordsWithCharacters)
322  {
323  m_region_wordWithCharacters = wordsWithCharacters;
324  }
325 
326 private:
327  WordsWithCharacters m_region_wordWithCharacters;
328  QRect m_area;
329 };
330 
331 RegularAreaRect * TextPage::textArea ( TextSelection * sel) const
332 {
333  if ( d->m_words.isEmpty() )
334  return new RegularAreaRect();
335 
350  RegularAreaRect * ret= new RegularAreaRect;
351 
352  const QTransform matrix = d->m_page ? d->m_page->rotationMatrix() : QTransform();
353 #if 0
354  int it = -1;
355  int itB = -1;
356  int itE = -1;
357 
358  // ending cursor is higher than start cursor, we need to find positions in reverse
359  NormalizedRect tmp;
360  NormalizedRect start;
361  NormalizedRect end;
362 
363  NormalizedPoint startC = sel->start();
364  double startCx = startC.x;
365  double startCy = startC.y;
366 
367  NormalizedPoint endC = sel->end();
368  double endCx = endC.x;
369  double endCy = endC.y;
370 
371  if ( sel->direction() == 1 || ( sel->itB() == -1 && sel->direction() == 0 ) )
372  {
373 #ifdef DEBUG_TEXTPAGE
374  kWarning() << "running first loop";
375 #endif
376  const int count = d->m_words.count();
377  for ( it = 0; it < count; it++ )
378  {
379  tmp = *d->m_words[ it ]->area();
380  if ( tmp.contains( startCx, startCy )
381  || ( tmp.top <= startCy && tmp.bottom >= startCy && tmp.left >= startCx )
382  || ( tmp.top >= startCy))
383  {
385  itB = it;
386 #ifdef DEBUG_TEXTPAGE
387  kWarning() << "start is" << itB << "count is" << d->m_words.count();
388 #endif
389  break;
390  }
391  }
392  sel->itB( itB );
393  }
394  itB = sel->itB();
395 #ifdef DEBUG_TEXTPAGE
396  kWarning() << "direction is" << sel->direction();
397  kWarning() << "reloaded start is" << itB << "against" << sel->itB();
398 #endif
399  if ( sel->direction() == 0 || ( sel->itE() == -1 && sel->direction() == 1 ) )
400  {
401 #ifdef DEBUG_TEXTPAGE
402  kWarning() << "running second loop";
403 #endif
404  for ( it = d->m_words.count() - 1; it >= itB; it-- )
405  {
406  tmp = *d->m_words[ it ]->area();
407  if ( tmp.contains( endCx, endCy )
408  || ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx )
409  || ( tmp.bottom <= endCy ) )
410  {
412  itE = it;
413 #ifdef DEBUG_TEXTPAGE
414  kWarning() << "ending is" << itE << "count is" << d->m_words.count();
415  kWarning() << "conditions" << tmp.contains( endCx, endCy ) << " "
416  << ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx ) << " " <<
417  ( tmp.top >= endCy);
418 #endif
419  break;
420  }
421  }
422  sel->itE( itE );
423  }
424 #ifdef DEBUG_TEXTPAGE
425  kWarning() << "reloaded ending is" << itE << "against" << sel->itE();
426 #endif
427 
428  if ( sel->itB() != -1 && sel->itE() != -1 )
429  {
430  start = *d->m_words[ sel->itB() ]->area();
431  end = *d->m_words[ sel->itE() ]->area();
432 
433  NormalizedRect first, second, third;
437  first = start;
438  second.top = start.bottom;
439  first.right = second.right = 1;
440  third = end;
441  third.left = second.left = 0;
442  second.bottom = end.top;
443  int selMax = qMax( sel->itB(), sel->itE() );
444  for ( it = qMin( sel->itB(), sel->itE() ); it <= selMax; ++it )
445  {
446  tmp = *d->m_words[ it ]->area();
447  if ( tmp.intersects( &first ) || tmp.intersects( &second ) || tmp.intersects( &third ) )
448  ret->appendShape( d->m_words.at( it )->transformedArea( matrix ) );
449  }
450  }
451 #else
452  const double scaleX = d->m_page->m_page->width();
453  const double scaleY = d->m_page->m_page->height();
454 
455  NormalizedPoint startC = sel->start();
456  NormalizedPoint endC = sel->end();
457  NormalizedPoint temp;
458 
459  // if startPoint is right to endPoint swap them
460  if(startC.x > endC.x)
461  {
462  temp = startC;
463  startC = endC;
464  endC = temp;
465  }
466 
467  // minX,maxX,minY,maxY gives the bounding rectangle coordinates of the document
468  const NormalizedRect boundingRect = d->m_page->m_page->boundingBox();
469  const QRect content = boundingRect.geometry(scaleX,scaleY);
470  const double minX = content.left();
471  const double maxX = content.right();
472  const double minY = content.top();
473  const double maxY = content.bottom();
474 
506  // we know that startC.x > endC.x, we need to decide which is top and which is bottom
507  const NormalizedRect start_end = (startC.y < endC.y) ? NormalizedRect(startC.x, startC.y, endC.x, endC.y)
508  : NormalizedRect(startC.x, endC.y, endC.x, startC.y);
509 
510  // Case 1(a)
511  if(!boundingRect.intersects(start_end)) return ret;
512 
513  // case 1(b)
519  else
520  {
521  // if start is left to content rect take it to content rect boundary
522  if(startC.x * scaleX < minX) startC.x = minX/scaleX;
523  if(endC.x * scaleX > maxX) endC.x = maxX/scaleX;
524 
525  // if start is top to end (selection type 01)
526  if(startC.y * scaleY < minY) startC.y = minY/scaleY;
527  if(endC.y * scaleY > maxY) endC.y = maxY/scaleY;
528 
529  // if start is bottom to end (selection type 02)
530  if(startC.y * scaleY > maxY) startC.y = maxY/scaleY;
531  if(endC.y * scaleY < minY) endC.y = minY/scaleY;
532  }
533 
534  TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
535  TextList::ConstIterator start = it, end = itEnd, tmpIt = it; //, tmpItEnd = itEnd;
536  const MergeSide side = d->m_page ? (MergeSide)d->m_page->m_page->totalOrientation() : MergeRight;
537 
538  NormalizedRect tmp;
539  //case 2(a)
540  for ( ; it != itEnd; ++it )
541  {
542  tmp = (*it)->area;
543  if(tmp.contains(startC.x,startC.y)){
544  start = it;
545  }
546  if(tmp.contains(endC.x,endC.y)){
547  end = it;
548  }
549  }
550 
551  //case 2(b)
552  it = tmpIt;
553  if(start == it && end == itEnd)
554  {
555  for ( ; it != itEnd; ++it )
556  {
557  // is there any text reactangle within the start_end rect
558  tmp = (*it)->area;
559  if(start_end.intersects(tmp))
560  break;
561  }
562 
563  // we have searched every text entities, but none is within the rectangle created by start and end
564  // so, no selection should be done
565  if(it == itEnd)
566  {
567  return ret;
568  }
569  }
570  it = tmpIt;
571  bool selection_two_start = false;
572 
573  //case 3.a
574  if(start == it)
575  {
576  bool flagV = false;
577  NormalizedRect rect;
578 
579  // selection type 01
580  if(startC.y <= endC.y)
581  {
582  for ( ; it != itEnd; ++it )
583  {
584  rect= (*it)->area;
585  rect.isBottom(startC) ? flagV = false: flagV = true;
586 
587  if(flagV && rect.isRight(startC))
588  {
589  start = it;
590  break;
591  }
592  }
593  }
594 
595  //selection type 02
596  else
597  {
598  selection_two_start = true;
599  int distance = scaleX + scaleY + 100;
600  int count = 0;
601 
602  for ( ; it != itEnd; ++it )
603  {
604  rect= (*it)->area;
605 
606  if(rect.isBottomOrLevel(startC) && rect.isRight(startC))
607  {
608  count++;
609  QRect entRect = rect.geometry(scaleX,scaleY);
610  int xdist, ydist;
611  xdist = entRect.center().x() - startC.x * scaleX;
612  ydist = entRect.center().y() - startC.y * scaleY;
613 
614  //make them positive
615  if(xdist < 0) xdist = -xdist;
616  if(ydist < 0) ydist = -ydist;
617 
618  if( (xdist + ydist) < distance)
619  {
620  distance = xdist+ ydist;
621  start = it;
622  }
623  }
624  }
625  }
626  }
627 
628  //case 3.b
629  if(end == itEnd)
630  {
631  it = tmpIt;
632  itEnd = itEnd-1;
633 
634  bool flagV = false;
635  NormalizedRect rect;
636 
637  if(startC.y <= endC.y)
638  {
639  for ( ; itEnd >= it; itEnd-- )
640  {
641  rect= (*itEnd)->area;
642  rect.isTop(endC) ? flagV = false: flagV = true;
643 
644  if(flagV && rect.isLeft(endC))
645  {
646  end = itEnd;
647  break;
648  }
649  }
650  }
651 
652  else
653  {
654  int distance = scaleX + scaleY + 100;
655  for ( ; itEnd >= it; itEnd-- )
656  {
657  rect= (*itEnd)->area;
658 
659  if(rect.isTopOrLevel(endC) && rect.isLeft(endC))
660  {
661  QRect entRect = rect.geometry(scaleX,scaleY);
662  int xdist, ydist;
663  xdist = entRect.center().x() - endC.x * scaleX;
664  ydist = entRect.center().y() - endC.y * scaleY;
665 
666  //make them positive
667  if(xdist < 0) xdist = -xdist;
668  if(ydist < 0) ydist = -ydist;
669 
670  if( (xdist + ydist) < distance)
671  {
672  distance = xdist+ ydist;
673  end = itEnd;
674  }
675 
676  }
677  }
678  }
679  }
680 
681  /* if start and end in selection 02 are in the same column, and we
682  start at an empty space we have to remove the selection of last
683  character
684  */
685  if(selection_two_start)
686  {
687  if(start > end)
688  {
689  start = start - 1;
690  }
691  }
692 
693  // if start is less than end swap them
694  if(start > end)
695  {
696  it = start;
697  start = end;
698  end = it;
699  }
700 
701  // removes the possibility of crash, in case none of 1 to 3 is true
702  if(end == d->m_words.constEnd()) end--;
703 
704  for( ;start <= end ; start++)
705  {
706  ret->appendShape( (*start)->transformedArea( matrix ), side );
707  }
708 
709 #endif
710 
711  return ret;
712 }
713 
714 
715 RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchDirection direct,
716  Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area )
717 {
718  SearchDirection dir=direct;
719  // invalid search request
720  if ( d->m_words.isEmpty() || query.isEmpty() || ( area && area->isNull() ) )
721  return 0;
722  TextList::ConstIterator start;
723  int start_offset = 0;
724  TextList::ConstIterator end;
725  const QMap< int, SearchPoint* >::const_iterator sIt = d->m_searchPoints.constFind( searchID );
726  if ( sIt == d->m_searchPoints.constEnd() )
727  {
728  // if no previous run of this search is found, then set it to start
729  // from the beginning (respecting the search direction)
730  if ( dir == NextResult )
731  dir = FromTop;
732  else if ( dir == PreviousResult )
733  dir = FromBottom;
734  }
735  bool forward = true;
736  switch ( dir )
737  {
738  case FromTop:
739  start = d->m_words.constBegin();
740  start_offset = 0;
741  end = d->m_words.constEnd();
742  break;
743  case FromBottom:
744  start = d->m_words.constEnd();
745  start_offset = 0;
746  end = d->m_words.constBegin();
747  forward = false;
748  break;
749  case NextResult:
750  start = (*sIt)->it_end;
751  start_offset = (*sIt)->offset_end;
752  end = d->m_words.constEnd();
753  break;
754  case PreviousResult:
755  start = (*sIt)->it_begin;
756  start_offset = (*sIt)->offset_begin;
757  end = d->m_words.constBegin();
758  forward = false;
759  break;
760  };
761  RegularAreaRect* ret = 0;
762  const TextComparisonFunction cmpFn = caseSensitivity == Qt::CaseSensitive
763  ? CaseSensitiveCmpFn : CaseInsensitiveCmpFn;
764  if ( forward )
765  {
766  ret = d->findTextInternalForward( searchID, query, cmpFn, start, start_offset, end );
767  }
768  else
769  {
770  ret = d->findTextInternalBackward( searchID, query, cmpFn, start, start_offset, end );
771  }
772  return ret;
773 }
774 
775 // hyphenated '-' must be at the end of a word, so hyphenation means
776 // we have a '-' just followed by a '\n' character
777 // check if the string contains a '-' character
778 // if the '-' is the last entry
779 static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd)
780 {
781  int len = str.length();
782 
783  // hyphenated '-' must be at the end of a word, so hyphenation means
784  // we have a '-' just followed by a '\n' character
785  // check if the string contains a '-' character
786  // if the '-' is the last entry
787  if ( str.endsWith( '-' ) )
788  {
789  // validity chek of it + 1
790  if ( ( it + 1 ) != textListEnd )
791  {
792  // 1. if the next character is '\n'
793  const QString &lookahedStr = (*(it+1))->text();
794  if (lookahedStr.startsWith('\n'))
795  {
796  len -= 1;
797  }
798  else
799  {
800  // 2. if the next word is in a different line or not
801  const NormalizedRect& hyphenArea = (*it)->area;
802  const NormalizedRect& lookaheadArea = (*(it + 1))->area;
803 
804  // lookahead to check whether both the '-' rect and next character rect overlap
805  if( !doesConsumeY( hyphenArea, lookaheadArea, 70 ) )
806  {
807  len -= 1;
808  }
809  }
810  }
811  }
812  // else if it is the second last entry - for example in pdf format
813  else if (str.endsWith("-\n"))
814  {
815  len -= 2;
816  }
817 
818  return len;
819 }
820 
821 RegularAreaRect* TextPagePrivate::searchPointToArea(const SearchPoint* sp)
822 {
823  const QTransform matrix = m_page ? m_page->rotationMatrix() : QTransform();
824  RegularAreaRect* ret=new RegularAreaRect;
825 
826  for (TextList::ConstIterator it = sp->it_begin; ; it++)
827  {
828  const TinyTextEntity* curEntity = *it;
829  ret->append( curEntity->transformedArea( matrix ) );
830 
831  if (it == sp->it_end) {
832  break;
833  }
834  }
835 
836  ret->simplify();
837  return ret;
838 }
839 
840 RegularAreaRect* TextPagePrivate::findTextInternalForward( int searchID, const QString &_query,
841  TextComparisonFunction comparer,
842  const TextList::ConstIterator &start,
843  int start_offset,
844  const TextList::ConstIterator &end)
845 {
846  // normalize query search all unicode (including glyphs)
847  const QString query = _query.normalized(QString::NormalizationForm_KC);
848 
849  // j is the current position in our query
850  // len is the length of the string in TextEntity
851  // queryLeft is the length of the query we have left
852  int j=0, queryLeft=query.length();
853 
854  TextList::ConstIterator it = start;
855  int offset = start_offset;
856 
857  TextList::ConstIterator it_begin = TextList::ConstIterator();
858  int offset_begin = 0; //dummy initial value to suppress compiler warnings
859 
860  while ( it != end )
861  {
862  const TinyTextEntity* curEntity = *it;
863  const QString& str = curEntity->text();
864  int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd());
865 
866  if (offset >= len)
867  {
868  it++;
869  offset = 0;
870  continue;
871  }
872 
873  if ( it_begin == TextList::ConstIterator() )
874  {
875  it_begin = it;
876  offset_begin = offset;
877  }
878 
879  int min=qMin(queryLeft,len-offset);
880  {
881 #ifdef DEBUG_TEXTPAGE
882  kDebug(OkularDebug) << str.midRef(offset, min) << ":" << _query.midRef(j, min);
883 #endif
884  // we have equal (or less than) area of the query left as the length of the current
885  // entity
886 
887  if ( !comparer( str.midRef( offset, min ), query.midRef( j, min ) ) )
888  {
889  // we have not matched
890  // this means we do not have a complete match
891  // we need to get back to query start
892  // and continue the search from this place
893 #ifdef DEBUG_TEXTPAGE
894  kDebug(OkularDebug) << "\tnot matched";
895 #endif
896  j = 0;
897  queryLeft=query.length();
898  it = it_begin;
899  offset = offset_begin+1;
900  it_begin = TextList::ConstIterator();
901  }
902  else
903  {
904  // we have a match
905  // move the current position in the query
906  // to the position after the length of this string
907  // we matched
908  // subtract the length of the current entity from
909  // the left length of the query
910 #ifdef DEBUG_TEXTPAGE
911  kDebug(OkularDebug) << "\tmatched";
912 #endif
913  j += min;
914  queryLeft -= min;
915 
916  if (queryLeft==0)
917  {
918  // save or update the search point for the current searchID
919  QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
920  if ( sIt == m_searchPoints.end() )
921  {
922  sIt = m_searchPoints.insert( searchID, new SearchPoint );
923  }
924  SearchPoint* sp = *sIt;
925  sp->it_begin = it_begin;
926  sp->it_end = it;
927  sp->offset_begin = offset_begin;
928  sp->offset_end = offset + min;
929  return searchPointToArea(sp);
930  }
931 
932  it++;
933  offset = 0;
934  }
935  }
936  }
937  // end of loop - it means that we've ended the textentities
938 
939  const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
940  if ( sIt != m_searchPoints.end() )
941  {
942  SearchPoint* sp = *sIt;
943  m_searchPoints.erase( sIt );
944  delete sp;
945  }
946  return 0;
947 }
948 
949 RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const QString &_query,
950  TextComparisonFunction comparer,
951  const TextList::ConstIterator &start,
952  int start_offset,
953  const TextList::ConstIterator &end)
954 {
955  // normalize query to search all unicode (including glyphs)
956  const QString query = _query.normalized(QString::NormalizationForm_KC);
957 
958  // j is the current position in our query
959  // len is the length of the string in TextEntity
960  // queryLeft is the length of the query we have left
961  int j=query.length(), queryLeft=query.length();
962 
963  TextList::ConstIterator it = start;
964  int offset = start_offset;
965 
966  TextList::ConstIterator it_begin = TextList::ConstIterator();
967  int offset_begin = 0; //dummy initial value to suppress compiler warnings
968 
969  while ( true )
970  {
971  if (offset <= 0)
972  {
973  if ( it == end )
974  {
975  break;
976  }
977  it--;
978  }
979 
980  const TinyTextEntity* curEntity = *it;
981  const QString& str = curEntity->text();
982  int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd());
983 
984  if (offset <= 0)
985  {
986  offset = len;
987  }
988 
989  if ( it_begin == TextList::ConstIterator() )
990  {
991  it_begin = it;
992  offset_begin = offset;
993  }
994 
995  int min=qMin(queryLeft,offset);
996  {
997 #ifdef DEBUG_TEXTPAGE
998  kDebug(OkularDebug) << str.midRef(offset-min, min) << " : " << _query.midRef(j-min, min);
999 #endif
1000  // we have equal (or less than) area of the query left as the length of the current
1001  // entity
1002 
1003  // Note len is not str.length() so we can't use rightRef here
1004  if ( !comparer( str.midRef(offset-min, min ), query.midRef( j - min, min ) ) )
1005  {
1006  // we have not matched
1007  // this means we do not have a complete match
1008  // we need to get back to query start
1009  // and continue the search from this place
1010 #ifdef DEBUG_TEXTPAGE
1011  kDebug(OkularDebug) << "\tnot matched";
1012 #endif
1013 
1014  j = query.length();
1015  queryLeft = query.length();
1016  it = it_begin;
1017  offset = offset_begin-1;
1018  it_begin = TextList::ConstIterator();
1019  }
1020  else
1021  {
1022  // we have a match
1023  // move the current position in the query
1024  // to the position after the length of this string
1025  // we matched
1026  // subtract the length of the current entity from
1027  // the left length of the query
1028 #ifdef DEBUG_TEXTPAGE
1029  kDebug(OkularDebug) << "\tmatched";
1030 #endif
1031  j -= min;
1032  queryLeft -= min;
1033 
1034  if ( queryLeft == 0 )
1035  {
1036  // save or update the search point for the current searchID
1037  QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
1038  if ( sIt == m_searchPoints.end() )
1039  {
1040  sIt = m_searchPoints.insert( searchID, new SearchPoint );
1041  }
1042  SearchPoint* sp = *sIt;
1043  sp->it_begin = it;
1044  sp->it_end = it_begin;
1045  sp->offset_begin = offset - min;
1046  sp->offset_end = offset_begin;
1047  return searchPointToArea(sp);
1048  }
1049 
1050  offset = 0;
1051  }
1052 
1053  }
1054 
1055  }
1056  // end of loop - it means that we've ended the textentities
1057 
1058  const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID );
1059  if ( sIt != m_searchPoints.end() )
1060  {
1061  SearchPoint* sp = *sIt;
1062  m_searchPoints.erase( sIt );
1063  delete sp;
1064  }
1065  return 0;
1066 }
1067 
1068 QString TextPage::text(const RegularAreaRect *area) const
1069 {
1070  return text(area, AnyPixelTextAreaInclusionBehaviour);
1071 }
1072 
1073 QString TextPage::text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const
1074 {
1075  if ( area && area->isNull() )
1076  return QString();
1077 
1078  TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
1079  QString ret;
1080  if ( area )
1081  {
1082  for ( ; it != itEnd; ++it )
1083  {
1084  if (b == AnyPixelTextAreaInclusionBehaviour)
1085  {
1086  if ( area->intersects( (*it)->area ) )
1087  {
1088  ret += (*it)->text();
1089  }
1090  }
1091  else
1092  {
1093  NormalizedPoint center = (*it)->area.center();
1094  if ( area->contains( center.x, center.y ) )
1095  {
1096  ret += (*it)->text();
1097  }
1098  }
1099  }
1100  }
1101  else
1102  {
1103  for ( ; it != itEnd; ++it )
1104  ret += (*it)->text();
1105  }
1106  return ret;
1107 }
1108 
1109 static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second)
1110 {
1111  QRect firstArea = first.area().roundedGeometry(1000,1000);
1112  QRect secondArea = second.area().roundedGeometry(1000,1000);
1113 
1114  return firstArea.left() < secondArea.left();
1115 }
1116 
1117 static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second)
1118 {
1119  const QRect firstArea = first.area().roundedGeometry(1000,1000);
1120  const QRect secondArea = second.area().roundedGeometry(1000,1000);
1121 
1122  return firstArea.top() < secondArea.top();
1123 }
1124 
1128 void TextPagePrivate::setWordList(const TextList &list)
1129 {
1130  qDeleteAll(m_words);
1131  m_words = list;
1132 }
1133 
1138 static void removeSpace(TextList *words)
1139 {
1140  TextList::Iterator it = words->begin();
1141  const QString str(' ');
1142 
1143  while ( it != words->end() )
1144  {
1145  if((*it)->text() == str)
1146  {
1147  it = words->erase(it);
1148  }
1149  else
1150  {
1151  ++it;
1152  }
1153  }
1154 }
1155 
1163 static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
1164 {
1176  WordsWithCharacters wordsWithCharacters;
1177 
1178  TextList::ConstIterator it = characters.begin(), itEnd = characters.end(), tmpIt;
1179  int newLeft,newRight,newTop,newBottom;
1180  int index = 0;
1181 
1182  for( ; it != itEnd ; it++)
1183  {
1184  QString textString = (*it)->text();
1185  QString newString;
1186  QRect lineArea = (*it)->area.roundedGeometry(pageWidth,pageHeight),elementArea;
1187  TextList wordCharacters;
1188  tmpIt = it;
1189  int space = 0;
1190 
1191  while (!space)
1192  {
1193  if (textString.length())
1194  {
1195  newString.append(textString);
1196 
1197  // when textString is the start of the word
1198  if (tmpIt == it)
1199  {
1200  NormalizedRect newRect(lineArea,pageWidth,pageHeight);
1201  wordCharacters.append(new TinyTextEntity(textString.normalized
1202  (QString::NormalizationForm_KC), newRect));
1203  }
1204  else
1205  {
1206  NormalizedRect newRect(elementArea,pageWidth,pageHeight);
1207  wordCharacters.append(new TinyTextEntity(textString.normalized
1208  (QString::NormalizationForm_KC), newRect));
1209  }
1210  }
1211 
1212  ++it;
1213 
1214  /*
1215  we must have to put this line before the if condition of it==itEnd
1216  otherwise the last character can be missed
1217  */
1218  if (it == itEnd) break;
1219  elementArea = (*it)->area.roundedGeometry(pageWidth,pageHeight);
1220  if (!doesConsumeY(elementArea, lineArea, 60))
1221  {
1222  --it;
1223  break;
1224  }
1225 
1226  const int text_y1 = elementArea.top() ,
1227  text_x1 = elementArea.left(),
1228  text_y2 = elementArea.y() + elementArea.height(),
1229  text_x2 = elementArea.x() + elementArea.width();
1230  const int line_y1 = lineArea.top() ,line_x1 = lineArea.left(),
1231  line_y2 = lineArea.y() + lineArea.height(),
1232  line_x2 = lineArea.x() + lineArea.width();
1233 
1234  space = elementArea.left() - lineArea.right();
1235 
1236  if (space != 0)
1237  {
1238  it--;
1239  break;
1240  }
1241 
1242  newLeft = text_x1 < line_x1 ? text_x1 : line_x1;
1243  newRight = line_x2 > text_x2 ? line_x2 : text_x2;
1244  newTop = text_y1 > line_y1 ? line_y1 : text_y1;
1245  newBottom = text_y2 > line_y2 ? text_y2 : line_y2;
1246 
1247  lineArea.setLeft (newLeft);
1248  lineArea.setTop (newTop);
1249  lineArea.setWidth( newRight - newLeft );
1250  lineArea.setHeight( newBottom - newTop );
1251 
1252  textString = (*it)->text();
1253  }
1254 
1255  // if newString is not empty, save it
1256  if (!newString.isEmpty())
1257  {
1258  const NormalizedRect newRect(lineArea, pageWidth, pageHeight);
1259  TinyTextEntity *word = new TinyTextEntity(newString.normalized(QString::NormalizationForm_KC), newRect);
1260  wordsWithCharacters.append(WordWithCharacters(word, wordCharacters));
1261 
1262  index++;
1263  }
1264 
1265  if(it == itEnd) break;
1266  }
1267 
1268  return wordsWithCharacters;
1269 }
1270 
1274 QList< QPair<WordsWithCharacters, QRect> > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight)
1275 {
1287  QList< QPair<WordsWithCharacters, QRect> > lines;
1288 
1289  /*
1290  Make a new copy of the TextList in the words, so that the wordsTmp and lines do
1291  not contain same pointers for all the TinyTextEntity.
1292  */
1293  QList<WordWithCharacters> words = wordsTmp;
1294 
1295  // Step 1
1296  qSort(words.begin(),words.end(),compareTinyTextEntityY);
1297 
1298  // Step 2
1299  QList<WordWithCharacters>::Iterator it = words.begin(), itEnd = words.end();
1300 
1301  //for every non-space texts(characters/words) in the textList
1302  for( ; it != itEnd ; it++)
1303  {
1304  const QRect elementArea = (*it).area().roundedGeometry(pageWidth,pageHeight);
1305  bool found = false;
1306 
1307  for( int i = 0 ; i < lines.length() ; i++)
1308  {
1309  /* the line area which will be expanded
1310  line_rects is only necessary to preserve the topmin and bottommax of all
1311  the texts in the line, left and right is not necessary at all
1312  */
1313  QRect &lineArea = lines[i].second;
1314  const int text_y1 = elementArea.top() ,
1315  text_y2 = elementArea.top() + elementArea.height() ,
1316  text_x1 = elementArea.left(),
1317  text_x2 = elementArea.left() + elementArea.width();
1318  const int line_y1 = lineArea.top() ,
1319  line_y2 = lineArea.top() + lineArea.height(),
1320  line_x1 = lineArea.left(),
1321  line_x2 = lineArea.left() + lineArea.width();
1322 
1323  /*
1324  if the new text and the line has y overlapping parts of more than 70%,
1325  the text will be added to this line
1326  */
1327  if(doesConsumeY(elementArea,lineArea,70))
1328  {
1329  WordsWithCharacters &line = lines[i].first;
1330  line.append(*it);
1331 
1332  const int newLeft = line_x1 < text_x1 ? line_x1 : text_x1;
1333  const int newRight = line_x2 > text_x2 ? line_x2 : text_x2;
1334  const int newTop = line_y1 < text_y1 ? line_y1 : text_y1;
1335  const int newBottom = text_y2 > line_y2 ? text_y2 : line_y2;
1336 
1337  lineArea = QRect( newLeft,newTop, newRight - newLeft, newBottom - newTop );
1338  found = true;
1339  }
1340 
1341  if(found) break;
1342  }
1343 
1344  /* when we have found a new line create a new TextList containing
1345  only one element and append it to the lines
1346  */
1347  if(!found)
1348  {
1349  WordsWithCharacters tmp;
1350  tmp.append((*it));
1351  lines.append(QPair<WordsWithCharacters, QRect>(tmp, elementArea));
1352  }
1353  }
1354 
1355  // Step 3
1356  for(int i = 0 ; i < lines.length() ; i++)
1357  {
1358  WordsWithCharacters &list = lines[i].first;
1359  qSort(list.begin(), list.end(), compareTinyTextEntityX);
1360  }
1361 
1362  return lines;
1363 }
1364 
1368 static void calculateStatisticalInformation(const QList<WordWithCharacters> &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing)
1369 {
1380  const QList< QPair<WordsWithCharacters, QRect> > sortedLines = makeAndSortLines(words, pageWidth, pageHeight);
1381 
1385  QMap<int,int> line_space_stat;
1386  for(int i = 0 ; i < sortedLines.length(); i++)
1387  {
1388  const QRect rectUpper = sortedLines.at(i).second;
1389 
1390  if(i+1 == sortedLines.length()) break;
1391  const QRect rectLower = sortedLines.at(i+1).second;
1392 
1393  int linespace = rectLower.top() - (rectUpper.top() + rectUpper.height());
1394  if(linespace < 0) linespace =-linespace;
1395 
1396  if(line_space_stat.contains(linespace))
1397  line_space_stat[linespace]++;
1398  else line_space_stat[linespace] = 1;
1399  }
1400 
1401  *line_spacing = 0;
1402  int weighted_count = 0;
1403  QMapIterator<int, int> iterate_linespace(line_space_stat);
1404 
1405  while(iterate_linespace.hasNext())
1406  {
1407  iterate_linespace.next();
1408  *line_spacing += iterate_linespace.value() * iterate_linespace.key();
1409  weighted_count += iterate_linespace.value();
1410  }
1411  if (*line_spacing != 0)
1412  *line_spacing = (int) ( (double)*line_spacing / (double) weighted_count + 0.5);
1413 
1417  // We would like to use QMap instead of QHash as it will keep the keys sorted
1418  QMap<int,int> hor_space_stat;
1419  QMap<int,int> col_space_stat;
1420  QList< QList<QRect> > space_rects;
1421  QList<QRect> max_hor_space_rects;
1422 
1423  // Space in every line
1424  for(int i = 0 ; i < sortedLines.length() ; i++)
1425  {
1426  const WordsWithCharacters list = sortedLines.at(i).first;
1427  QList<QRect> line_space_rects;
1428  int maxSpace = 0, minSpace = pageWidth;
1429 
1430  // for every TinyTextEntity element in the line
1431  WordsWithCharacters::ConstIterator it = list.begin(), itEnd = list.end();
1432  QRect max_area1,max_area2;
1433  QString before_max, after_max;
1434 
1435  // for every line
1436  for( ; it != itEnd ; it++ )
1437  {
1438  const QRect area1 = (*it).area().roundedGeometry(pageWidth,pageHeight);
1439  if( it+1 == itEnd ) break;
1440 
1441  const QRect area2 = (*(it+1)).area().roundedGeometry(pageWidth,pageHeight);
1442  int space = area2.left() - area1.right();
1443 
1444  if(space > maxSpace)
1445  {
1446  max_area1 = area1;
1447  max_area2 = area2;
1448  maxSpace = space;
1449  before_max = (*it).text();
1450  after_max = (*(it+1)).text();
1451  }
1452 
1453  if(space < minSpace && space != 0) minSpace = space;
1454 
1455  //if we found a real space, whose length is not zero and also less than the pageWidth
1456  if(space != 0 && space != pageWidth)
1457  {
1458  // increase the count of the space amount
1459  if(hor_space_stat.contains(space)) hor_space_stat[space]++;
1460  else hor_space_stat[space] = 1;
1461 
1462  int left,right,top,bottom;
1463 
1464  left = area1.right();
1465  right = area2.left();
1466 
1467  top = area2.top() < area1.top() ? area2.top() : area1.top();
1468  bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom();
1469 
1470  QRect rect(left,top,right-left,bottom-top);
1471  line_space_rects.append(rect);
1472  }
1473  }
1474 
1475  space_rects.append(line_space_rects);
1476 
1477  if(hor_space_stat.contains(maxSpace))
1478  {
1479  if(hor_space_stat[maxSpace] != 1)
1480  hor_space_stat[maxSpace]--;
1481  else hor_space_stat.remove(maxSpace);
1482  }
1483 
1484  if(maxSpace != 0)
1485  {
1486  if (col_space_stat.contains(maxSpace))
1487  col_space_stat[maxSpace]++;
1488  else col_space_stat[maxSpace] = 1;
1489 
1490  //store the max rect of each line
1491  const int left = max_area1.right();
1492  const int right = max_area2.left();
1493  const int top = (max_area1.top() > max_area2.top()) ? max_area2.top() :
1494  max_area1.top();
1495  const int bottom = (max_area1.bottom() < max_area2.bottom()) ? max_area2.bottom() :
1496  max_area1.bottom();
1497 
1498  const QRect rect(left,top,right-left,bottom-top);
1499  max_hor_space_rects.append(rect);
1500  }
1501  else max_hor_space_rects.append(QRect(0,0,0,0));
1502  }
1503 
1504  // All the between word space counts are in hor_space_stat
1505  *word_spacing = 0;
1506  weighted_count = 0;
1507  QMapIterator<int, int> iterate(hor_space_stat);
1508 
1509  while (iterate.hasNext())
1510  {
1511  iterate.next();
1512 
1513  if(iterate.key() > 0)
1514  {
1515  *word_spacing += iterate.value() * iterate.key();
1516  weighted_count += iterate.value();
1517  }
1518  }
1519  if(weighted_count)
1520  *word_spacing = (int) ((double)*word_spacing / (double)weighted_count + 0.5);
1521 
1522  *col_spacing = 0;
1523  QMapIterator<int, int> iterate_col(col_space_stat);
1524 
1525  while (iterate_col.hasNext())
1526  {
1527  iterate_col.next();
1528  if(iterate_col.value() > *col_spacing) *col_spacing = iterate_col.value();
1529  }
1530  *col_spacing = col_space_stat.key(*col_spacing);
1531 
1532  // if there is just one line in a region, there is no point in dividing it
1533  if(sortedLines.length() == 1)
1534  *word_spacing = *col_spacing;
1535 }
1536 
1542 static RegionTextList XYCutForBoundingBoxes(const QList<WordWithCharacters> &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight)
1543 {
1544  RegionTextList tree;
1545  QRect contentRect(boundingBox.geometry(pageWidth,pageHeight));
1546  const RegionText root(wordsWithCharacters, contentRect);
1547 
1548  // start the tree with the root, it is our only region at the start
1549  tree.push_back(root);
1550 
1551  int i = 0;
1552 
1553  // while traversing the tree has not been ended
1554  while(i < tree.length())
1555  {
1556  const RegionText node = tree.at(i);
1557  QRect regionRect = node.area();
1558 
1562  // allocate the size of proj profiles and initialize with 0
1563  int size_proj_y = node.area().height();
1564  int size_proj_x = node.area().width();
1565  //dynamic memory allocation
1566  QVarLengthArray<int> proj_on_xaxis(size_proj_x);
1567  QVarLengthArray<int> proj_on_yaxis(size_proj_y);
1568 
1569  for( int j = 0 ; j < size_proj_y ; ++j ) proj_on_yaxis[j] = 0;
1570  for( int j = 0 ; j < size_proj_x ; ++j ) proj_on_xaxis[j] = 0;
1571 
1572  const QList<WordWithCharacters> list = node.text();
1573 
1574  // Calculate tcx and tcy locally for each new region
1575  int word_spacing, line_spacing, column_spacing;
1576  calculateStatisticalInformation(list, pageWidth, pageHeight, &word_spacing, &line_spacing, &column_spacing);
1577 
1578  const int tcx = word_spacing * 2;
1579  const int tcy = line_spacing * 2;
1580 
1581  int maxX = 0 , maxY = 0;
1582  int avgX = 0;
1583  int count;
1584 
1585  // for every text in the region
1586  for(int j = 0 ; j < list.length() ; ++j )
1587  {
1588  TinyTextEntity *ent = list.at(j).word;
1589  const QRect entRect = ent->area.geometry(pageWidth, pageHeight);
1590 
1591  // calculate vertical projection profile proj_on_xaxis1
1592  for(int k = entRect.left() ; k <= entRect.left() + entRect.width() ; ++k)
1593  {
1594  if( ( k-regionRect.left() ) < size_proj_x && ( k-regionRect.left() ) >= 0 )
1595  proj_on_xaxis[k - regionRect.left()] += entRect.height();
1596  }
1597 
1598  // calculate horizontal projection profile in the same way
1599  for(int k = entRect.top() ; k <= entRect.top() + entRect.height() ; ++k)
1600  {
1601  if( ( k-regionRect.top() ) < size_proj_y && ( k-regionRect.top() ) >= 0 )
1602  proj_on_yaxis[k - regionRect.top()] += entRect.width();
1603  }
1604  }
1605 
1606  for( int j = 0 ; j < size_proj_y ; ++j )
1607  {
1608  if (proj_on_yaxis[j] > maxY)
1609  maxY = proj_on_yaxis[j];
1610  }
1611 
1612  avgX = count = 0;
1613  for( int j = 0 ; j < size_proj_x ; ++j )
1614  {
1615  if(proj_on_xaxis[j] > maxX) maxX = proj_on_xaxis[j];
1616  if(proj_on_xaxis[j])
1617  {
1618  count++;
1619  avgX+= proj_on_xaxis[j];
1620  }
1621  }
1622  if(count) avgX /= count;
1623 
1624 
1628  int xbegin = 0, xend = size_proj_x - 1;
1629  int ybegin = 0, yend = size_proj_y - 1;
1630  while(xbegin < size_proj_x && proj_on_xaxis[xbegin] <= 0)
1631  xbegin++;
1632  while(xend >= 0 && proj_on_xaxis[xend] <= 0)
1633  xend--;
1634  while(ybegin < size_proj_y && proj_on_yaxis[ybegin] <= 0)
1635  ybegin++;
1636  while(yend >= 0 && proj_on_yaxis[yend] <= 0)
1637  yend--;
1638 
1639  //update the regionRect
1640  int old_left = regionRect.left(), old_top = regionRect.top();
1641  regionRect.setLeft(old_left + xbegin);
1642  regionRect.setRight(old_left + xend);
1643  regionRect.setTop(old_top + ybegin);
1644  regionRect.setBottom(old_top + yend);
1645 
1646  int tnx = (int)((double)avgX * 10.0 / 100.0 + 0.5), tny = 0;
1647  for( int j = 0 ; j < size_proj_x ; ++j )
1648  proj_on_xaxis[j] -= tnx;
1649  for( int j = 0 ; j < size_proj_y ; ++j )
1650  proj_on_yaxis[j] -= tny;
1651 
1655  int gap_hor = -1, pos_hor = -1;
1656  int begin = -1, end = -1;
1657 
1658  // find all hor_gaps and find the maximum between them
1659  for(int j = 1 ; j < size_proj_y ; ++j)
1660  {
1661  //transition from white to black
1662  if(begin >= 0 && proj_on_yaxis[j-1] <= 0
1663  && proj_on_yaxis[j] > 0)
1664  end = j;
1665 
1666  //transition from black to white
1667  if(proj_on_yaxis[j-1] > 0 && proj_on_yaxis[j] <= 0)
1668  begin = j;
1669 
1670  if(begin > 0 && end > 0 && end-begin > gap_hor)
1671  {
1672  gap_hor = end - begin;
1673  pos_hor = (end + begin) / 2;
1674  begin = -1;
1675  end = -1;
1676  }
1677  }
1678 
1679 
1680  begin = -1, end = -1;
1681  int gap_ver = -1, pos_ver = -1;
1682 
1683  //find all the ver_gaps and find the maximum between them
1684  for(int j = 1 ; j < size_proj_x ; ++j)
1685  {
1686  //transition from white to black
1687  if(begin >= 0 && proj_on_xaxis[j-1] <= 0
1688  && proj_on_xaxis[j] > 0){
1689  end = j;
1690  }
1691 
1692  //transition from black to white
1693  if(proj_on_xaxis[j-1] > 0 && proj_on_xaxis[j] <= 0)
1694  begin = j;
1695 
1696  if(begin > 0 && end > 0 && end-begin > gap_ver)
1697  {
1698  gap_ver = end - begin;
1699  pos_ver = (end + begin) / 2;
1700  begin = -1;
1701  end = -1;
1702  }
1703  }
1704 
1705  int cut_pos_x = pos_ver, cut_pos_y = pos_hor;
1706  int gap_x = gap_ver, gap_y = gap_hor;
1707 
1711  bool cut_hor = false, cut_ver = false;
1712 
1713  // For horizontal cut
1714  const int topHeight = cut_pos_y - (regionRect.top() - old_top);
1715  const QRect topRect(regionRect.left(),
1716  regionRect.top(),
1717  regionRect.width(),
1718  topHeight);
1719  const QRect bottomRect(regionRect.left(),
1720  regionRect.top() + topHeight,
1721  regionRect.width(),
1722  regionRect.height() - topHeight );
1723 
1724  // For vertical Cut
1725  const int leftWidth = cut_pos_x - (regionRect.left() - old_left);
1726  const QRect leftRect(regionRect.left(),
1727  regionRect.top(),
1728  leftWidth,
1729  regionRect.height());
1730  const QRect rightRect(regionRect.left() + leftWidth,
1731  regionRect.top(),
1732  regionRect.width() - leftWidth,
1733  regionRect.height());
1734 
1735  if(gap_y >= gap_x && gap_y >= tcy)
1736  cut_hor = true;
1737  else if(gap_y >= gap_x && gap_y <= tcy && gap_x >= tcx)
1738  cut_ver = true;
1739  else if(gap_x >= gap_y && gap_x >= tcx)
1740  cut_ver = true;
1741  else if(gap_x >= gap_y && gap_x <= tcx && gap_y >= tcy)
1742  cut_hor = true;
1743  // no cut possible
1744  else
1745  {
1746  // we can now update the node rectangle with the shrinked rectangle
1747  RegionText tmpNode = tree.at(i);
1748  tmpNode.setArea(regionRect);
1749  tree.replace(i,tmpNode);
1750  i++;
1751  continue;
1752  }
1753 
1754  WordsWithCharacters list1,list2;
1755 
1756  // horizontal cut, topRect and bottomRect
1757  if(cut_hor)
1758  {
1759  for( int j = 0 ; j < list.length() ; ++j )
1760  {
1761  const WordWithCharacters word = list.at(j);
1762  const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
1763 
1764  if(topRect.intersects(wordRect))
1765  list1.append(word);
1766  else
1767  list2.append(word);
1768  }
1769 
1770  RegionText node1(list1,topRect);
1771  RegionText node2(list2,bottomRect);
1772 
1773  tree.replace(i,node1);
1774  tree.insert(i+1,node2);
1775  }
1776 
1777  //vertical cut, leftRect and rightRect
1778  else if(cut_ver)
1779  {
1780  for( int j = 0 ; j < list.length() ; ++j )
1781  {
1782  const WordWithCharacters word = list.at(j);
1783  const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
1784 
1785  if(leftRect.intersects(wordRect))
1786  list1.append(word);
1787  else
1788  list2.append(word);
1789  }
1790 
1791  RegionText node1(list1,leftRect);
1792  RegionText node2(list2,rightRect);
1793 
1794  tree.replace(i,node1);
1795  tree.insert(i+1,node2);
1796  }
1797  }
1798 
1799  return tree;
1800 }
1801 
1805 WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
1806 {
1813  // Only change the texts under RegionTexts, not the area
1814  for(int j = 0 ; j < tree.length() ; j++)
1815  {
1816  RegionText &tmpRegion = tree[j];
1817 
1818  // Step 01
1819  QList< QPair<WordsWithCharacters, QRect> > sortedLines = makeAndSortLines(tmpRegion.text(), pageWidth, pageHeight);
1820 
1821  // Step 02
1822  for(int i = 0 ; i < sortedLines.length() ; i++)
1823  {
1824  WordsWithCharacters &list = sortedLines[i].first;
1825  for(int k = 0 ; k < list.length() ; k++ )
1826  {
1827  const QRect area1 = list.at(k).area().roundedGeometry(pageWidth,pageHeight);
1828  if( k+1 >= list.length() ) break;
1829 
1830  const QRect area2 = list.at(k+1).area().roundedGeometry(pageWidth,pageHeight);
1831  const int space = area2.left() - area1.right();
1832 
1833  if(space != 0)
1834  {
1835  // Make a TinyTextEntity of string space and push it between it and it+1
1836  const int left = area1.right();
1837  const int right = area2.left();
1838  const int top = area2.top() < area1.top() ? area2.top() : area1.top();
1839  const int bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom();
1840 
1841  const QString spaceStr(" ");
1842  const QRect rect(QPoint(left,top),QPoint(right,bottom));
1843  const NormalizedRect entRect(rect,pageWidth,pageHeight);
1844  TinyTextEntity *ent1 = new TinyTextEntity(spaceStr, entRect);
1845  TinyTextEntity *ent2 = new TinyTextEntity(spaceStr, entRect);
1846  WordWithCharacters word(ent1, QList<TinyTextEntity*>() << ent2);
1847 
1848  list.insert(k+1, word);
1849 
1850  // Skip the space
1851  k++;
1852  }
1853  }
1854  }
1855 
1856  WordsWithCharacters tmpList;
1857  for(int i = 0 ; i < sortedLines.length() ; i++)
1858  {
1859  tmpList += sortedLines.at(i).first;
1860  }
1861  tmpRegion.setText(tmpList);
1862  }
1863 
1864  // Step 03
1865  WordsWithCharacters tmp;
1866  for(int i = 0 ; i < tree.length() ; i++)
1867  {
1868  tmp += tree.at(i).text();
1869  }
1870  return tmp;
1871 }
1872 
1876 void TextPagePrivate::correctTextOrder()
1877 {
1878  //m_page->m_page->width() and m_page->m_page->height() are in pixels at
1879  //100% zoom level, and thus depend on display DPI. We scale pageWidth and
1880  //pageHeight to remove the dependence. Otherwise bugs would be more difficult
1881  //to reproduce and Okular could fail in extreme cases like a large TV with low DPI.
1882  const double scalingFactor = 2000.0 / (m_page->m_page->width() + m_page->m_page->height());
1883  const int pageWidth = (int) (scalingFactor * m_page->m_page->width() );
1884  const int pageHeight = (int) (scalingFactor * m_page->m_page->height());
1885 
1886  TextList characters = m_words;
1887 
1891  removeSpace(&characters);
1892 
1896  const QList<WordWithCharacters> wordsWithCharacters = makeWordFromCharacters(characters, pageWidth, pageHeight);
1897 
1901  const RegionTextList tree = XYCutForBoundingBoxes(wordsWithCharacters, m_page->m_page->boundingBox(), pageWidth, pageHeight);
1902 
1906  const WordsWithCharacters listWithWordsAndSpaces = addNecessarySpace(tree, pageWidth, pageHeight);
1907 
1911  TextList listOfCharacters;
1912  foreach(const WordWithCharacters &word, listWithWordsAndSpaces)
1913  {
1914  delete word.word;
1915  listOfCharacters.append(word.characters);
1916  }
1917  setWordList(listOfCharacters);
1918 }
1919 
1920 TextEntity::List TextPage::words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const
1921 {
1922  if ( area && area->isNull() )
1923  return TextEntity::List();
1924 
1925  TextEntity::List ret;
1926  if ( area )
1927  {
1928  foreach (TinyTextEntity *te, d->m_words)
1929  {
1930  if (b == AnyPixelTextAreaInclusionBehaviour)
1931  {
1932  if ( area->intersects( te->area ) )
1933  {
1934  ret.append( new TextEntity( te->text(), new Okular::NormalizedRect( te->area) ) );
1935  }
1936  }
1937  else
1938  {
1939  const NormalizedPoint center = te->area.center();
1940  if ( area->contains( center.x, center.y ) )
1941  {
1942  ret.append( new TextEntity( te->text(), new Okular::NormalizedRect( te->area) ) );
1943  }
1944  }
1945  }
1946  }
1947  else
1948  {
1949  foreach (TinyTextEntity *te, d->m_words)
1950  {
1951  ret.append( new TextEntity( te->text(), new Okular::NormalizedRect( te->area) ) );
1952  }
1953  }
1954  return ret;
1955 }
1956 
1957 RegularAreaRect * TextPage::wordAt( const NormalizedPoint &p, QString *word ) const
1958 {
1959  TextList::ConstIterator itBegin = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
1960  TextList::ConstIterator it = itBegin;
1961  TextList::ConstIterator posIt = itEnd;
1962  for ( ; it != itEnd; ++it )
1963  {
1964  if ( (*it)->area.contains( p.x, p.y ) )
1965  {
1966  posIt = it;
1967  break;
1968  }
1969  }
1970  QString text;
1971  if ( posIt != itEnd )
1972  {
1973  if ( (*posIt)->text().simplified().isEmpty() )
1974  {
1975  return NULL;
1976  }
1977  // Find the first TinyTextEntity of the word
1978  while ( posIt != itBegin )
1979  {
1980  --posIt;
1981  const QString itText = (*posIt)->text();
1982  if ( itText.right(1).at(0).isSpace() )
1983  {
1984  if (itText.endsWith("-\n"))
1985  {
1986  // Is an hyphenated word
1987  // continue searching the start of the word back
1988  continue;
1989  }
1990 
1991  if (itText == "\n" && posIt != itBegin )
1992  {
1993  --posIt;
1994  if ((*posIt)->text().endsWith("-")) {
1995  // Is an hyphenated word
1996  // continue searching the start of the word back
1997  continue;
1998  }
1999  ++posIt;
2000  }
2001 
2002  ++posIt;
2003  break;
2004  }
2005  }
2006  RegularAreaRect *ret = new RegularAreaRect();
2007  for ( ; posIt != itEnd; ++posIt )
2008  {
2009  const QString itText = (*posIt)->text();
2010  if ( itText.simplified().isEmpty() )
2011  {
2012  break;
2013  }
2014 
2015  ret->appendShape( (*posIt)->area );
2016  text += (*posIt)->text();
2017  if (itText.right(1).at(0).isSpace())
2018  {
2019  if (!text.endsWith("-\n"))
2020  {
2021  break;
2022  }
2023  }
2024  }
2025 
2026  if (word)
2027  {
2028  *word = text;
2029  }
2030  return ret;
2031  }
2032  else
2033  {
2034  return NULL;
2035  }
2036 }
Okular::SearchDirection
SearchDirection
Describes the direction of searching.
Definition: global.h:33
Okular::NormalizedPoint
NormalizedPoint is a helper class which stores the coordinates of a normalized point.
Definition: area.h:47
QTransform
QRect::setBottom
void setBottom(int y)
Okular::TextEntity::area
NormalizedRect * area() const
Returns the bounding area of the text entity.
Definition: textpage.cpp:203
Okular::TextPagePrivate::m_words
TextList m_words
Definition: textpage_p.h:69
QString::append
QString & append(QChar ch)
QString::constData
const QChar * constData() const
Okular::NextResult
Searching for the next result on the page, earlier result should be located so we search from the las...
Definition: global.h:37
QMap::erase
iterator erase(iterator pos)
Okular::TextPagePrivate::~TextPagePrivate
~TextPagePrivate()
Definition: textpage.cpp:221
misc.h
QMap::contains
bool contains(const Key &key) const
Okular::TextSelection::direction
int direction() const
Returns the direction of the selection.
Definition: misc.cpp:66
Okular::NormalizedRect::isRight
bool isRight(const NormalizedPoint &pt) const
Returns true if the point pt is located to the left of the right arm of rectangle.
Definition: area.h:276
QRect::right
int right() const
QList::push_back
void push_back(const T &value)
Okular::TextPagePrivate::m_page
PagePrivate * m_page
Definition: textpage_p.h:71
Okular::MergeSide
MergeSide
The side(s) to be considered when merging areas.
Definition: global.h:64
Okular::TextPage::TextPage
TextPage()
Creates a new text page.
Definition: textpage.cpp:228
Okular::PagePrivate::m_page
Page * m_page
Definition: page_p.h:126
Okular::TextPagePrivate::correctTextOrder
void correctTextOrder()
Make necessary modifications in the TextList to make the text order correct, so that textselection wo...
Definition: textpage.cpp:1876
QList::length
int length() const
Okular::TextEntity::text
QString text() const
Returns the text of the text entity.
Definition: textpage.cpp:198
doesConsumeY
static bool doesConsumeY(const QRect &first, const QRect &second, int threshold)
Definition: textpage.cpp:97
Okular::NormalizedRect::transform
void transform(const QTransform &matrix)
Transforms the normalized rectangle with the operations defined by matrix.
Definition: area.cpp:259
QChar
Okular::Page::totalOrientation
Rotation totalOrientation() const
Returns the total orientation which is the original orientation plus the user defined rotation...
Definition: page.cpp:159
Okular::TextSelection::itE
void itE(int pos)
Definition: misc.cpp:56
Okular::RegularArea::contains
bool contains(double x, double y) const
Returns whether the regular area contains the normalized point x, y.
Definition: area.h:800
segmentsOverlap
static bool segmentsOverlap(double left1, double right1, double left2, double right2, int threshold)
Returns true iff segments [left1, right1] and [left2, right2] on the real line overlap within thresho...
Definition: textpage.cpp:72
makeAndSortLines
QList< QPair< WordsWithCharacters, QRect > > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight)
Create Lines from the words and sort them.
Definition: textpage.cpp:1274
QList::at
const T & at(int i) const
QMap
Okular::NormalizedRect::left
double left
The normalized left coordinate.
Definition: area.h:305
QString::simplified
QString simplified() const
Okular::TextPagePrivate::findTextInternalBackward
RegularAreaRect * findTextInternalBackward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
Definition: textpage.cpp:949
Okular::NormalizedRect
NormalizedRect is a helper class which stores the coordinates of a normalized rect, which is a rectangle of.
Definition: area.h:105
Okular::RegularArea::appendShape
void appendShape(const NormalizedShape &shape, MergeSide side=MergeAll)
Appends the given shape to the regular area.
Definition: area.h:725
debug_p.h
QVarLengthArray
area.h
Okular::RegularAreaRect
Definition: area.h:860
Okular::MergeRight
Merge only if the right side of the first area intersect.
Definition: global.h:66
QList::erase
iterator erase(iterator pos)
QRect::height
int height() const
Okular::NormalizedPoint::y
double y
The normalized y coordinate.
Definition: area.h:97
QRect::x
int x() const
QRect::y
int y() const
QMap::constFind
const_iterator constFind(const Key &key) const
QPoint
page_p.h
Okular::RegularArea::intersects
bool intersects(const RegularArea< NormalizedShape, Shape > *area) const
Returns whether the regular area intersects with the given area.
Definition: area.h:690
Okular::TextSelection::start
NormalizedPoint start() const
Returns the start point of the selection.
Definition: misc.cpp:71
Okular::TextPagePrivate::findTextInternalForward
RegularAreaRect * findTextInternalForward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
Definition: textpage.cpp:840
Okular::NormalizedRect::intersects
bool intersects(const NormalizedRect &other) const
Returns whether the normalized rectangle intersects the other normalized rectangle.
Definition: area.cpp:161
page.h
QPoint::x
int x() const
QPoint::y
int y() const
calculateStatisticalInformation
static void calculateStatisticalInformation(const QList< WordWithCharacters > &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing)
Calculate Statistical information from the lines we made previously.
Definition: textpage.cpp:1368
Okular::FromTop
Searching from top of the page, next result is to be found, there was no earlier search result...
Definition: global.h:35
makeWordFromCharacters
static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
We will read the TinyTextEntity from characters and try to create words from there.
Definition: textpage.cpp:1163
QString::normalized
QString normalized(NormalizationForm mode) const
QStringRef
Okular::TextPagePrivate::TextPagePrivate
TextPagePrivate()
Definition: textpage.cpp:216
CaseSensitiveCmpFn
static bool CaseSensitiveCmpFn(const QStringRef &from, const QStringRef &to)
Definition: textpage.cpp:60
Okular::FromBottom
Searching from bottom of the page, next result is to be found, there was no earlier search result...
Definition: global.h:36
Okular::TextPagePrivate::setWordList
void setWordList(const TextList &list)
Copy a TextList to m_words, the pointers of list are adopted.
Definition: textpage.cpp:1128
Okular::TextEntity
Abstract textentity of Okular.
Definition: textpage.h:44
Okular::TextComparisonFunction
bool(* TextComparisonFunction)(const QStringRef &from, const QStringRef &to)
Returns whether the two strings match.
Definition: textpage_p.h:33
Okular::TextEntity::transformedArea
NormalizedRect transformedArea(const QTransform &matrix) const
Returns the transformed area of the text entity.
Definition: textpage.cpp:208
Okular::Page::height
double height() const
Returns the height of the page.
Definition: page.cpp:169
Okular::NormalizedRect::contains
bool contains(double x, double y) const
Returns whether the normalized rectangle contains the normalized coordinates x and y...
Definition: area.cpp:156
QRect
Okular::NormalizedRect::right
double right
The normalized right coordinate.
Definition: area.h:315
Okular::RegularArea::isNull
bool isNull() const
Returns whether the regular area is a null area.
Definition: area.h:656
Okular::PreviousResult
Searching for the previous result on the page, earlier result should be located so we search from the...
Definition: global.h:38
QString::fromRawData
QString fromRawData(const QChar *unicode, int size)
QList::count
int count(const T &value) const
CaseInsensitiveCmpFn
static bool CaseInsensitiveCmpFn(const QStringRef &from, const QStringRef &to)
Definition: textpage.cpp:55
QList::append
void append(const T &value)
QChar::isSpace
bool isSpace() const
QMapIterator
removeSpace
static void removeSpace(TextList *words)
Remove all the spaces in between texts.
Definition: textpage.cpp:1138
Okular::TextPage::words
TextEntity::List words(const RegularAreaRect *rect, TextAreaInclusionBehaviour b) const
Text entity extraction function.
Definition: textpage.cpp:1920
QRect::top
int top() const
QMapIterator::next
Item next()
QRect::setTop
void setTop(int y)
QRect::left
int left() const
QList::isEmpty
bool isEmpty() const
QRect::setWidth
void setWidth(int width)
Okular::NormalizedRect::isTopOrLevel
bool isTopOrLevel(const NormalizedPoint &pt) const
Returns true if the point pt is located above the bottom of the rectangle.
Definition: area.h:258
QString::isEmpty
bool isEmpty() const
QMap::constEnd
const_iterator constEnd() const
OkularDebug
#define OkularDebug
Definition: debug_p.h:13
QString::startsWith
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const
Okular::Page::boundingBox
NormalizedRect boundingBox() const
Returns the bounding box of the page content in normalized [0,1] coordinates, in terms of the upright...
Definition: page.cpp:179
Okular::TextSelection::end
void end(const NormalizedPoint &point)
Changes the end point of the selection to the given point.
Definition: misc.cpp:45
Okular::TextPagePrivate::m_searchPoints
QMap< int, SearchPoint * > m_searchPoints
Definition: textpage_p.h:70
QString::endsWith
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const
QRect::center
QPoint center() const
QList< TinyTextEntity * >::Iterator
typedef Iterator
Okular::Page::width
double width() const
Returns the width of the page.
Definition: page.cpp:164
Okular::PagePrivate::rotationMatrix
QTransform rotationMatrix() const
Definition: page.cpp:120
QList::first
T & first()
QString
QList
QMap::end
iterator end()
Okular::TextEntity::~TextEntity
~TextEntity()
Destroys the text entity.
Definition: textpage.cpp:193
QChar::unicode
ushort unicode() const
textpage.h
QMapIterator::key
const Key & key() const
Okular::NormalizedRect::isBottom
bool isBottom(const NormalizedPoint &pt) const
Returns true if the point pt is located to the bottom of the rectangle.
Definition: area.h:231
QMapIterator::value
const T & value() const
Okular::NormalizedRect::geometry
QRect geometry(int xScale, int yScale) const
Returns the rectangle that accrues when the normalized rectangle is multiplyed with the scaling xScal...
Definition: area.cpp:239
QStringRef::compare
int compare(const QString &other, Qt::CaseSensitivity cs) const
QPair
QString::right
QString right(int n) const
Okular::TextSelection::itB
void itB(int pos)
Definition: misc.cpp:61
QList::end
iterator end()
addNecessarySpace
WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
Add spaces in between words in a line.
Definition: textpage.cpp:1805
Okular::NormalizedRect::isBottomOrLevel
bool isBottomOrLevel(const NormalizedPoint &pt) const
Returns true if the point pt is located under the top of the rectangle.
Definition: area.h:249
QString::midRef
QStringRef midRef(int position, int n) const
compareTinyTextEntityY
static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second)
Definition: textpage.cpp:1117
Okular::NormalizedRect::top
double top
The normalized top coordinate.
Definition: area.h:310
QMap::key
const Key key(const T &value) const
Okular::TextPage::text
QString text(const RegularAreaRect *rect=0) const
Text extraction function.
Definition: textpage.cpp:1068
QRect::setRight
void setRight(int x)
compareTinyTextEntityX
static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second)
Definition: textpage.cpp:1109
Okular::TextPage::textArea
RegularAreaRect * textArea(TextSelection *selection) const
Returns the rectangular area of the given selection.
Definition: textpage.cpp:331
Okular::TextPage::wordAt
RegularAreaRect * wordAt(const NormalizedPoint &p, QString *word=0) const
Returns the area and text of the word at the given point Note that ownership of the returned area bel...
Definition: textpage.cpp:1957
Okular::NormalizedPoint::x
double x
The normalized x coordinate.
Definition: area.h:92
textpage_p.h
Okular::NormalizedRect::isTop
bool isTop(const NormalizedPoint &pt) const
Returns true if the point pt is located on the top of the rectangle.
Definition: area.h:240
QRect::width
int width() const
stringLengthAdaptedWithHyphen
static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd)
Definition: textpage.cpp:779
Okular::TextPage::AnyPixelTextAreaInclusionBehaviour
A character is included into text() result if any pixel of his bounding box is in the given area...
Definition: textpage.h:104
Okular::TextPage::TextAreaInclusionBehaviour
TextAreaInclusionBehaviour
Defines the behaviour of adding characters to text() result.
Definition: textpage.h:102
QList::insert
void insert(int i, const T &value)
QRect::setHeight
void setHeight(int height)
QString::at
const QChar at(int position) const
Okular::RegularArea::simplify
void simplify()
Simplifies the regular area by merging its intersecting subareas.
Definition: area.h:628
QList< TinyTextEntity * >::ConstIterator
typedef ConstIterator
Okular::TextPagePrivate
Definition: textpage_p.h:40
QRect::bottom
int bottom() const
Okular::TextPage::findText
RegularAreaRect * findText(int id, const QString &text, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect)
Returns the bounding rect of the text which matches the following criteria or 0 if the search is not ...
Definition: textpage.cpp:715
QString::length
int length() const
XYCutForBoundingBoxes
static RegionTextList XYCutForBoundingBoxes(const QList< WordWithCharacters > &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight)
Implements the XY Cut algorithm for textpage segmentation The resulting RegionTextList will contain R...
Definition: textpage.cpp:1542
QMap::insert
iterator insert(const Key &key, const T &value)
Okular::NormalizedRect::bottom
double bottom
The normalized bottom coordinate.
Definition: area.h:320
Okular::NormalizedRect::isLeft
bool isLeft(const NormalizedPoint &pt) const
Returns true if the point pt is located to the right of the left arm of rectangle.
Definition: area.h:267
QList::constEnd
const_iterator constEnd() const
QList::constBegin
const_iterator constBegin() const
Okular::TextEntity::TextEntity
TextEntity(const QString &text, NormalizedRect *area)
Creates a new text entity with the given text and the given area.
Definition: textpage.cpp:188
Okular::TextPage::~TextPage
~TextPage()
Destroys the text page.
Definition: textpage.cpp:246
QRect::setLeft
void setLeft(int x)
QMap::find
iterator find(const Key &key)
Okular::TextSelection
Wrapper around the information needed to generate the selection area There are two assumptions inside...
Definition: misc.h:36
QList::begin
iterator begin()
Okular::TextPage::append
void append(const QString &text, NormalizedRect *area)
Appends the given text with the given area as new TextEntity to the page.
Definition: textpage.cpp:251
WordsWithCharacters
QList< WordWithCharacters > WordsWithCharacters
Definition: textpage.cpp:278
QMapIterator::hasNext
bool hasNext() const
QMap::remove
int remove(const Key &key)
QList::replace
void replace(int i, const T &value)
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:19:25 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

okular

Skip menu "okular"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdegraphics API Reference

Skip menu "kdegraphics API Reference"
  •     libkdcraw
  •     libkexiv2
  •     libkipi
  •     libksane
  • okular

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal