23 #include <QtAlgorithms> 
   24 #include <QVarLengthArray> 
   26 using namespace Okular;
 
   32             : offset_begin( -1 ), offset_end( -1 )
 
   37         TextList::ConstIterator it_begin;
 
   40         TextList::ConstIterator it_end;
 
   57     return from.compare( to, Qt::CaseInsensitive ) == 0;
 
   62     return from.compare( to, Qt::CaseSensitive ) == 0;
 
   71 static bool doesConsumeY(
const QRect& first, 
const QRect& second, 
int threshold)
 
   74     if(first.top() <= second.top() && first.bottom() >= second.bottom())
 
   77     if(first.top() >= second.top() && first.bottom() <= second.bottom())
 
   82     if(second.bottom() >= first.top() && first.bottom() >= second.top())
 
   84         const int overlap = (second.bottom() >= first.bottom()) ? first.bottom() - second.top()
 
   85                                                                 : second.bottom() - first.top();
 
   87         const int percentage = (first.height() < second.height()) ? overlap * 100 / (first.bottom() - first.top())
 
   88                                                                 : overlap * 100 / (second.bottom() - second.top());
 
   90         if(percentage >= threshold) 
return true;
 
  109     static const int MaxStaticChars = 
sizeof( QChar * ) / 
sizeof( QChar );
 
  115             Q_ASSERT_X( !text.isEmpty(), 
"TinyTextEntity", 
"empty string" );
 
  116             Q_ASSERT_X( 
sizeof( d ) == 
sizeof( QChar * ), 
"TinyTextEntity",
 
  117                         "internal storage is wider than QChar*, fix it!" );
 
  118             length = text.length();
 
  121 #if QT_POINTER_SIZE >= 8 
  123                     d.qc[3] = text.at( 3 ).unicode();
 
  126                     d.qc[2] = text.at( 2 ).unicode();
 
  130                     d.qc[1] = text.at( 1 ).unicode();
 
  133                     d.qc[0] = text.at( 0 ).unicode();
 
  136                     d.data = 
new QChar[ length ];
 
  137                     std::memcpy( d.data, text.constData(), length * 
sizeof( QChar ) );
 
  143             if ( length > MaxStaticChars )
 
  149         inline QString text()
 const 
  151             return length <= MaxStaticChars ? QString::fromRawData( ( 
const QChar * )&d.qc[0], length )
 
  152                                             : QString::fromRawData( d.data, length );
 
  155         inline NormalizedRect transformedArea( 
const QTransform &matrix )
 const 
  159             return transformed_area;
 
  165         Q_DISABLE_COPY( TinyTextEntity )
 
  170             ushort qc[MaxStaticChars];
 
  177     : m_text( text ), m_area( area ), d( 0 )
 
  200     return transformed_area;
 
  224     TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd();
 
  225     for ( ; it != itEnd; ++it )
 
  228         if ( !e->
text().isEmpty() )
 
  241     if ( !text.isEmpty() )
 
  242         d->
m_words.append( 
new TinyTextEntity( text.normalized(QString::NormalizationForm_KC), *area ) );
 
  246 struct WordWithCharacters
 
  248     WordWithCharacters(TinyTextEntity *w, 
const TextList &c)
 
  249      : word(w), characters(c)
 
  253     inline QString text()
 const 
  263     TinyTextEntity *word;
 
  282         : m_region_wordWithCharacters(wordsWithCharacters), m_area(area)
 
  286     inline QString string()
 const 
  289         foreach(
const WordWithCharacters &word, m_region_wordWithCharacters)
 
  296         return m_region_wordWithCharacters;
 
  299     inline QRect area()
 const 
  304     inline void setArea(
const QRect &area)
 
  311         m_region_wordWithCharacters = wordsWithCharacters;
 
  352     double startCx = startC.
x;
 
  353     double startCy = startC.
y;
 
  356     double endCx = endC.
x;
 
  357     double endCy = endC.
y;
 
  361 #ifdef DEBUG_TEXTPAGE 
  362         kWarning() << 
"running first loop";
 
  364         const int count = d->
m_words.count();
 
  365         for ( it = 0; it < count; it++ )
 
  367             tmp = *d->
m_words[ it ]->area();
 
  368             if ( tmp.
contains( startCx, startCy )
 
  369                  || ( tmp.
top <= startCy && tmp.
bottom >= startCy && tmp.
left >= startCx )
 
  370                  || ( tmp.
top >= startCy))
 
  374 #ifdef DEBUG_TEXTPAGE 
  375                 kWarning() << 
"start is" << itB << 
"count is" << d->
m_words.count();
 
  383 #ifdef DEBUG_TEXTPAGE 
  384     kWarning() << 
"direction is" << sel->
direction();
 
  385     kWarning() << 
"reloaded start is" << itB << 
"against" << sel->
itB();
 
  389 #ifdef DEBUG_TEXTPAGE 
  390         kWarning() << 
"running second loop";
 
  392         for ( it = d->
m_words.count() - 1; it >= itB; it-- )
 
  394             tmp = *d->
m_words[ it ]->area();
 
  396                  || ( tmp.
top <= endCy && tmp.
bottom >= endCy && tmp.
right <= endCx )
 
  397                  || ( tmp.
bottom <= endCy ) )
 
  401 #ifdef DEBUG_TEXTPAGE 
  402                 kWarning() << 
"ending is" << itE << 
"count is" << d->
m_words.count();
 
  403                 kWarning() << 
"conditions" << tmp.
contains( endCx, endCy ) << 
" "  
  404                   << ( tmp.
top <= endCy && tmp.
bottom >= endCy && tmp.
right <= endCx ) << 
" " <<
 
  412 #ifdef DEBUG_TEXTPAGE 
  413     kWarning() << 
"reloaded ending is" << itE << 
"against" << sel->
itE();
 
  416     if ( sel->
itB() != -1 && sel->
itE() != -1 )
 
  431         int selMax = qMax( sel->
itB(), sel->
itE() );
 
  432         for ( it = qMin( sel->
itB(), sel->
itE() ); it <= selMax; ++it )
 
  434             tmp = *d->
m_words[ it ]->area();
 
  448     if(startC.
x > endC.
x)
 
  457     const QRect content = boundingRect.
geometry(scaleX,scaleY);
 
  458     const double minX = content.left();
 
  459     const double maxX = content.right();
 
  460     const double minY = content.top();
 
  461     const double maxY = content.bottom();
 
  499     if(!boundingRect.
intersects(start_end)) 
return ret;
 
  510         if(startC.
x * scaleX < minX) startC.
x = minX/scaleX;
 
  511         if(endC.
x * scaleX > maxX) endC.
x = maxX/scaleX;
 
  514         if(startC.
y * scaleY < minY) startC.
y = minY/scaleY;
 
  515         if(endC.
y * scaleY > maxY) endC.
y = maxY/scaleY;
 
  518         if(startC.
y * scaleY > maxY) startC.
y = maxY/scaleY;
 
  519         if(endC.
y * scaleY < minY) endC.
y = minY/scaleY;
 
  522     TextList::ConstIterator it = d->
m_words.constBegin(), itEnd = d->
m_words.constEnd();
 
  523     TextList::ConstIterator start = it, end = itEnd, tmpIt = it; 
 
  528     for ( ; it != itEnd; ++it )
 
  541     if(start == it && end == itEnd)
 
  543         for ( ; it != itEnd; ++it )
 
  559     bool selection_two_start = 
false;
 
  568         if(startC.
y <= endC.
y)
 
  570             for ( ; it != itEnd; ++it )
 
  573                 rect.
isBottom(startC) ? flagV = 
false: flagV = 
true;
 
  575                 if(flagV && rect.
isRight(startC))
 
  586             selection_two_start = 
true;
 
  587             int distance = scaleX + scaleY + 100;
 
  590             for ( ; it != itEnd; ++it )
 
  597                     QRect entRect = rect.
geometry(scaleX,scaleY);
 
  599                     xdist = entRect.center().x() - startC.
x * scaleX;
 
  600                     ydist = entRect.center().y() - startC.
y * scaleY;
 
  603                     if(xdist < 0) xdist = -xdist;
 
  604                     if(ydist < 0) ydist = -ydist;
 
  606                     if( (xdist + ydist) < distance)
 
  608                         distance = xdist+ ydist;
 
  625         if(startC.
y <= endC.
y)
 
  627             for ( ; itEnd >= it; itEnd-- )
 
  629                 rect= (*itEnd)->area;
 
  630                 rect.
isTop(endC) ? flagV = 
false: flagV = 
true;
 
  632                 if(flagV && rect.
isLeft(endC))
 
  642             int distance = scaleX + scaleY + 100;
 
  643             for ( ; itEnd >= it; itEnd-- )
 
  645                 rect= (*itEnd)->area;
 
  649                     QRect entRect = rect.
geometry(scaleX,scaleY);
 
  651                     xdist = entRect.center().x() - endC.
x * scaleX;
 
  652                     ydist = entRect.center().y() - endC.
y * scaleY;
 
  655                     if(xdist < 0) xdist = -xdist;
 
  656                     if(ydist < 0) ydist = -ydist;
 
  658                     if( (xdist + ydist) < distance)
 
  660                         distance = xdist+ ydist;
 
  673     if(selection_two_start)
 
  690     if(end == d->
m_words.constEnd()) end--;
 
  692     for( ;start <= end ; start++)
 
  694         ret->
appendShape( (*start)->transformedArea( matrix ), side );
 
  708     if ( d->
m_words.isEmpty() || query.isEmpty() || ( area && area->
isNull() ) )
 
  710     TextList::ConstIterator start;
 
  711     int start_offset = 0;
 
  712     TextList::ConstIterator end;
 
  713     const QMap< int, SearchPoint* >::const_iterator sIt = d->
m_searchPoints.constFind( searchID );
 
  727             start = d->
m_words.constBegin();
 
  738             start = (*sIt)->it_end;
 
  739             start_offset = (*sIt)->offset_end;
 
  743             start = (*sIt)->it_begin;
 
  744             start_offset = (*sIt)->offset_begin;
 
  769     int len = str.length();
 
  775     if ( str.endsWith( 
'-' ) )
 
  778         if ( ( it + 1 ) != textListEnd )
 
  781             const QString &lookahedStr = (*(it+1))->text();
 
  782             if (lookahedStr.startsWith(
'\n'))
 
  792                 const QRect hyphenArea = (*it)->area.roundedGeometry(pageWidth, pageHeight);
 
  793                 const QRect lookaheadArea = (*(it + 1))->area.roundedGeometry(pageWidth, pageHeight);
 
  804     else if (str.endsWith(
"-\n"))
 
  812 RegularAreaRect* TextPagePrivate::searchPointToArea(
const SearchPoint* sp)
 
  817     for (TextList::ConstIterator it = sp->it_begin; ; it++)
 
  819         const TinyTextEntity* curEntity = *it;
 
  820         ret->append( curEntity->transformedArea( matrix ) );
 
  822         if (it == sp->it_end) {
 
  833                                                              const TextList::ConstIterator &start,
 
  835                                                              const TextList::ConstIterator &end)
 
  838     const QString query = _query.normalized(QString::NormalizationForm_KC);
 
  843     int j=0, queryLeft=query.length();
 
  845     TextList::ConstIterator it = start;
 
  846     int offset = start_offset;
 
  848     TextList::ConstIterator it_begin = TextList::ConstIterator();
 
  849     int offset_begin = 0; 
 
  853         const TinyTextEntity* curEntity = *it;
 
  854         const QString& str = curEntity->text();
 
  864         if ( it_begin == TextList::ConstIterator() )
 
  867             offset_begin = offset;
 
  870         int min=qMin(queryLeft,len-offset);
 
  872 #ifdef DEBUG_TEXTPAGE 
  873             kDebug(
OkularDebug) << str.midRef(offset, min) << 
":" << _query.midRef(j, min);
 
  878             if ( !comparer( str.midRef( offset, min ), query.midRef( j, min ) ) )
 
  884 #ifdef DEBUG_TEXTPAGE 
  888                     queryLeft=query.length();
 
  890                     offset = offset_begin+1;
 
  891                     it_begin = TextList::ConstIterator();
 
  901 #ifdef DEBUG_TEXTPAGE 
  910                         QMap< int, SearchPoint* >::iterator sIt = 
m_searchPoints.find( searchID );
 
  915                         SearchPoint* sp = *sIt;
 
  916                         sp->it_begin = it_begin;
 
  918                         sp->offset_begin = offset_begin;
 
  919                         sp->offset_end = offset + min;
 
  920                         return searchPointToArea(sp);
 
  930     const QMap< int, SearchPoint* >::iterator sIt = 
m_searchPoints.find( searchID );
 
  933         SearchPoint* sp = *sIt;
 
  942                                                             const TextList::ConstIterator &start,
 
  944                                                             const TextList::ConstIterator &end)
 
  947     const QString query = _query.normalized(QString::NormalizationForm_KC);
 
  952     int j=query.length(), queryLeft=query.length();
 
  954     TextList::ConstIterator it = start;
 
  955     int offset = start_offset;
 
  957     TextList::ConstIterator it_begin = TextList::ConstIterator();
 
  958     int offset_begin = 0; 
 
  971         const TinyTextEntity* curEntity = *it;
 
  972         const QString& str = curEntity->text();
 
  980         if ( it_begin == TextList::ConstIterator() )
 
  983             offset_begin = offset;
 
  986         int min=qMin(queryLeft,offset);
 
  988 #ifdef DEBUG_TEXTPAGE 
  989             kDebug(
OkularDebug) << str.midRef(offset-min, min) << 
" : " << _query.midRef(j-min, min);
 
  995             if ( !comparer( str.midRef(offset-min, min ), query.midRef( j - min, min ) ) )
 
 1001 #ifdef DEBUG_TEXTPAGE 
 1006                     queryLeft = query.length();
 
 1008                     offset = offset_begin-1;
 
 1009                     it_begin = TextList::ConstIterator();
 
 1019 #ifdef DEBUG_TEXTPAGE 
 1025                     if ( queryLeft == 0 )
 
 1028                         QMap< int, SearchPoint* >::iterator sIt = 
m_searchPoints.find( searchID );
 
 1033                         SearchPoint* sp = *sIt;
 
 1035                         sp->it_end = it_begin;
 
 1036                         sp->offset_begin = offset - min;
 
 1037                         sp->offset_end = offset_begin;
 
 1038                         return searchPointToArea(sp);
 
 1049     const QMap< int, SearchPoint* >::iterator sIt = 
m_searchPoints.find( searchID );
 
 1052         SearchPoint* sp = *sIt;
 
 1066     if ( area && area->
isNull() )
 
 1069     TextList::ConstIterator it = d->
m_words.constBegin(), itEnd = d->
m_words.constEnd();
 
 1073         for ( ; it != itEnd; ++it )
 
 1079                     ret += (*it)->text();
 
 1087                     ret += (*it)->text();
 
 1094         for ( ; it != itEnd; ++it )
 
 1095             ret += (*it)->text();
 
 1102     QRect firstArea = first.area().roundedGeometry(1000,1000);
 
 1103     QRect secondArea = second.area().roundedGeometry(1000,1000);
 
 1105     return firstArea.left() < secondArea.left();
 
 1110     const QRect firstArea = first.area().roundedGeometry(1000,1000);
 
 1111     const QRect secondArea = second.area().roundedGeometry(1000,1000);
 
 1113     return firstArea.top() < secondArea.top();
 
 1131     TextList::Iterator it = words->begin();
 
 1132     const QString str(
' ');
 
 1134     while ( it != words->end() )
 
 1136         if((*it)->text() == str)
 
 1138             it = words->erase(it);
 
 1169     TextList::ConstIterator it = characters.begin(), itEnd = characters.end(), tmpIt;
 
 1170     int newLeft,newRight,newTop,newBottom;
 
 1173     for( ; it != itEnd ; it++)
 
 1175         QString textString = (*it)->text();
 
 1177         QRect lineArea = (*it)->area.roundedGeometry(pageWidth,pageHeight),elementArea;
 
 1184             if (textString.length())
 
 1186                 newString.append(textString);
 
 1192                     wordCharacters.append(
new TinyTextEntity(textString.normalized
 
 1193                                                    (QString::NormalizationForm_KC), newRect));
 
 1198                     wordCharacters.append(
new TinyTextEntity(textString.normalized
 
 1199                                                    (QString::NormalizationForm_KC), newRect));
 
 1209             if (it == itEnd) 
break;
 
 1210             elementArea = (*it)->area.roundedGeometry(pageWidth,pageHeight);
 
 1217             const int text_y1 = elementArea.top() ,
 
 1218                       text_x1 = elementArea.left(),
 
 1219                       text_y2 = elementArea.y() + elementArea.height(),
 
 1220                       text_x2 = elementArea.x() + elementArea.width();
 
 1221             const int line_y1 = lineArea.top() ,line_x1 = lineArea.left(),
 
 1222                       line_y2 = lineArea.y() + lineArea.height(),
 
 1223                       line_x2 = lineArea.x() + lineArea.width();
 
 1225             space = elementArea.left() - lineArea.right();
 
 1233             newLeft = text_x1 < line_x1 ? text_x1 : line_x1;
 
 1234             newRight = line_x2 > text_x2 ? line_x2 : text_x2;
 
 1235             newTop = text_y1 > line_y1 ? line_y1 : text_y1;
 
 1236             newBottom = text_y2 > line_y2 ? text_y2 : line_y2;
 
 1238             lineArea.setLeft (newLeft);
 
 1239             lineArea.setTop (newTop);
 
 1240             lineArea.setWidth( newRight - newLeft );
 
 1241             lineArea.setHeight( newBottom - newTop );
 
 1243             textString = (*it)->text();
 
 1247         if (!newString.isEmpty())
 
 1250             TinyTextEntity *word = 
new TinyTextEntity(newString.normalized(QString::NormalizationForm_KC), newRect);
 
 1251             wordsWithCharacters.append(WordWithCharacters(word, wordCharacters));
 
 1256         if(it == itEnd) 
break;
 
 1259     return wordsWithCharacters;
 
 1278     QList< QPair<WordsWithCharacters, QRect> > lines;
 
 1284     QList<WordWithCharacters> words = wordsTmp;
 
 1290     QList<WordWithCharacters>::Iterator it = words.begin(), itEnd = words.end();
 
 1293     for( ; it != itEnd ; it++)
 
 1295         const QRect elementArea = (*it).area().roundedGeometry(pageWidth,pageHeight);
 
 1298         for( 
int i = 0 ; i < lines.length() ; i++)
 
 1304             QRect &lineArea = lines[i].second;
 
 1305             const int text_y1 = elementArea.top() ,
 
 1306                       text_y2 = elementArea.top() + elementArea.height() ,
 
 1307                       text_x1 = elementArea.left(),
 
 1308                       text_x2 = elementArea.left() + elementArea.width();
 
 1309             const int line_y1 = lineArea.top() ,
 
 1310                       line_y2 = lineArea.top() + lineArea.height(),
 
 1311                       line_x1 = lineArea.left(),
 
 1312                       line_x2 = lineArea.left() + lineArea.width();
 
 1323                 const int newLeft = line_x1 < text_x1 ? line_x1 : text_x1;
 
 1324                 const int newRight = line_x2 > text_x2 ? line_x2 : text_x2;
 
 1325                 const int newTop = line_y1 < text_y1 ? line_y1 : text_y1;
 
 1326                 const int newBottom = text_y2 > line_y2 ? text_y2 : line_y2;
 
 1328                 lineArea = QRect( newLeft,newTop, newRight - newLeft, newBottom - newTop );
 
 1342             lines.append(QPair<WordsWithCharacters, QRect>(tmp, elementArea));
 
 1347     for(
int i = 0 ; i < lines.length() ; i++)
 
 1359 static void calculateStatisticalInformation(
const QList<WordWithCharacters> &words, 
int pageWidth, 
int pageHeight, 
int *word_spacing, 
int *line_spacing, 
int *col_spacing)
 
 1371     const QList< QPair<WordsWithCharacters, QRect> > sortedLines = 
makeAndSortLines(words, pageWidth, pageHeight);
 
 1376     QMap<int,int> line_space_stat;
 
 1377     for(
int i = 0 ; i < sortedLines.length(); i++)
 
 1379         const QRect rectUpper = sortedLines.at(i).second;
 
 1381         if(i+1 == sortedLines.length()) 
break;
 
 1382         const QRect rectLower = sortedLines.at(i+1).second;
 
 1384         int linespace = rectLower.top() - (rectUpper.top() + rectUpper.height());
 
 1385         if(linespace < 0) linespace =-linespace;
 
 1387         if(line_space_stat.contains(linespace))
 
 1388             line_space_stat[linespace]++;
 
 1389         else line_space_stat[linespace] = 1;
 
 1393     int weighted_count = 0;
 
 1394     QMapIterator<int, int> iterate_linespace(line_space_stat);
 
 1396     while(iterate_linespace.hasNext())
 
 1398         iterate_linespace.next();
 
 1399         *line_spacing += iterate_linespace.value() * iterate_linespace.key();
 
 1400         weighted_count += iterate_linespace.value();
 
 1402     if (*line_spacing != 0)
 
 1403         *line_spacing = (int) ( (
double)*line_spacing / (double) weighted_count + 0.5);
 
 1409     QMap<int,int> hor_space_stat;
 
 1410     QMap<int,int> col_space_stat;
 
 1411     QList< QList<QRect> > space_rects;
 
 1412     QList<QRect> max_hor_space_rects;
 
 1415     for(
int i = 0 ; i < sortedLines.length() ; i++)
 
 1418         QList<QRect> line_space_rects;
 
 1419         int maxSpace = 0, minSpace = pageWidth;
 
 1422         WordsWithCharacters::ConstIterator it = list.begin(), itEnd = list.end();
 
 1423         QRect max_area1,max_area2;
 
 1424         QString before_max, after_max;
 
 1427         for( ; it != itEnd ; it++ )
 
 1429             const QRect area1 = (*it).area().roundedGeometry(pageWidth,pageHeight);
 
 1430             if( it+1 == itEnd ) 
break;
 
 1432             const QRect area2 = (*(it+1)).area().roundedGeometry(pageWidth,pageHeight);
 
 1433             int space = area2.left() - area1.right();
 
 1435             if(space > maxSpace)
 
 1440                 before_max = (*it).text();
 
 1441                 after_max = (*(it+1)).text();
 
 1444             if(space < minSpace && space != 0) minSpace = space;
 
 1447             if(space != 0 && space != pageWidth)
 
 1450                 if(hor_space_stat.contains(space)) hor_space_stat[space] = hor_space_stat[space]++;
 
 1451                 else hor_space_stat[space] = 1;
 
 1453                 int left,right,top,bottom;
 
 1455                 left = area1.right();
 
 1456                 right = area2.left();
 
 1458                 top = area2.top() < area1.top() ? area2.top() : area1.top();
 
 1459                 bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom();
 
 1461                 QRect rect(left,top,right-left,bottom-top);
 
 1462                 line_space_rects.append(rect);
 
 1466         space_rects.append(line_space_rects);
 
 1468         if(hor_space_stat.contains(maxSpace))
 
 1470             if(hor_space_stat[maxSpace] != 1)
 
 1471                 hor_space_stat[maxSpace] = hor_space_stat[maxSpace]--;
 
 1472             else hor_space_stat.remove(maxSpace);
 
 1477             if (col_space_stat.contains(maxSpace))
 
 1478                 col_space_stat[maxSpace] = col_space_stat[maxSpace]++;
 
 1479             else col_space_stat[maxSpace] = 1;
 
 1482             const int left = max_area1.right();
 
 1483                 const int right = max_area2.left();
 
 1484             const int top = (max_area1.top() > max_area2.top()) ? max_area2.top() :
 
 1486             const int bottom = (max_area1.bottom() < max_area2.bottom()) ? max_area2.bottom() :
 
 1489             const QRect rect(left,top,right-left,bottom-top);
 
 1490             max_hor_space_rects.append(rect);
 
 1492         else max_hor_space_rects.append(QRect(0,0,0,0));
 
 1498     QMapIterator<int, int> iterate(hor_space_stat);
 
 1500     while (iterate.hasNext())
 
 1504         if(iterate.key() > 0)
 
 1506             *word_spacing += iterate.value() * iterate.key();
 
 1507             weighted_count += iterate.value();
 
 1511         *word_spacing = (int) ((
double)*word_spacing / (double)weighted_count + 0.5);
 
 1514     QMapIterator<int, int> iterate_col(col_space_stat);
 
 1516     while (iterate_col.hasNext())
 
 1519         if(iterate_col.value() > *col_spacing) *col_spacing = iterate_col.value();
 
 1521     *col_spacing = col_space_stat.key(*col_spacing);
 
 1524     if(sortedLines.length() == 1)
 
 1525         *word_spacing = *col_spacing;
 
 1536     QRect contentRect(boundingBox.
geometry(pageWidth,pageHeight));
 
 1537     const RegionText root(wordsWithCharacters, contentRect);
 
 1540     tree.push_back(root);
 
 1545     while(i < tree.length())
 
 1547         const RegionText node = tree.at(i);
 
 1548         QRect regionRect = node.area();
 
 1554         int size_proj_y = node.area().height();
 
 1555         int size_proj_x = node.area().width();
 
 1557         QVarLengthArray<int> proj_on_xaxis(size_proj_x);
 
 1558         QVarLengthArray<int> proj_on_yaxis(size_proj_y);
 
 1560         for( 
int j = 0 ; j < size_proj_y ; ++j ) proj_on_yaxis[j] = 0;
 
 1561         for( 
int j = 0 ; j < size_proj_x ; ++j ) proj_on_xaxis[j] = 0;
 
 1563         const QList<WordWithCharacters> list = node.text();
 
 1566         int word_spacing, line_spacing, column_spacing;
 
 1569         const int tcx = word_spacing * 2;
 
 1570         const int tcy = line_spacing * 2;
 
 1572         int maxX = 0 , maxY = 0;
 
 1577         for(
int j = 0 ; j < list.length() ; ++j )
 
 1579             TinyTextEntity *ent = list.at(j).word;
 
 1580             const QRect entRect = ent->area.geometry(pageWidth, pageHeight);
 
 1583             for(
int k = entRect.left() ; k <= entRect.left() + entRect.width() ; ++k)
 
 1585                 if( ( k-regionRect.left() ) < size_proj_x && ( k-regionRect.left() ) >= 0 )
 
 1586                     proj_on_xaxis[k - regionRect.left()] += entRect.height();
 
 1590             for(
int k = entRect.top() ; k <= entRect.top() + entRect.height() ; ++k)
 
 1592                 if( ( k-regionRect.top() ) < size_proj_y && ( k-regionRect.top() ) >= 0 )
 
 1593                     proj_on_yaxis[k - regionRect.top()] += entRect.width();
 
 1597         for( 
int j = 0 ; j < size_proj_y ; ++j )
 
 1599             if (proj_on_yaxis[j] > maxY)
 
 1600                 maxY = proj_on_yaxis[j];
 
 1604         for( 
int j = 0 ; j < size_proj_x ; ++j )
 
 1606             if(proj_on_xaxis[j] > maxX) maxX = proj_on_xaxis[j];
 
 1607             if(proj_on_xaxis[j])
 
 1610                 avgX+= proj_on_xaxis[j];
 
 1613         if(count) avgX /= count;
 
 1619         int xbegin = 0, xend = size_proj_x - 1;
 
 1620         int ybegin = 0, yend = size_proj_y - 1;
 
 1621         while(xbegin < size_proj_x && proj_on_xaxis[xbegin] <= 0)
 
 1623         while(xend >= 0 && proj_on_xaxis[xend] <= 0)
 
 1625         while(ybegin < size_proj_y && proj_on_yaxis[ybegin] <= 0)
 
 1627         while(yend >= 0 && proj_on_yaxis[yend] <= 0)
 
 1631         int old_left = regionRect.left(), old_top = regionRect.top();
 
 1632         regionRect.setLeft(old_left + xbegin);
 
 1633         regionRect.setRight(old_left + xend);
 
 1634         regionRect.setTop(old_top + ybegin);
 
 1635         regionRect.setBottom(old_top + yend);
 
 1637         int tnx = (int)((
double)avgX * 10.0 / 100.0 + 0.5), tny = 0;
 
 1638         for( 
int j = 0 ; j < size_proj_x ; ++j )
 
 1639             proj_on_xaxis[j] -= tnx;
 
 1640         for( 
int j = 0 ; j < size_proj_y ; ++j )
 
 1641             proj_on_yaxis[j] -= tny;
 
 1646         int gap_hor = -1, pos_hor = -1;
 
 1647         int begin = -1, end = -1;
 
 1650         for(
int j = 1 ; j < size_proj_y ; ++j)
 
 1653             if(begin >= 0 && proj_on_yaxis[j-1] <= 0
 
 1654                     && proj_on_yaxis[j] > 0)
 
 1658             if(proj_on_yaxis[j-1] > 0 && proj_on_yaxis[j] <= 0)
 
 1661             if(begin > 0 && end > 0 && end-begin > gap_hor)
 
 1663                 gap_hor = end - begin;
 
 1664                 pos_hor = (end + begin) / 2;
 
 1671         begin = -1, end = -1;
 
 1672         int gap_ver = -1, pos_ver = -1;
 
 1675         for(
int j = 1 ; j < size_proj_x ; ++j)
 
 1678             if(begin >= 0 && proj_on_xaxis[j-1] <= 0
 
 1679                     && proj_on_xaxis[j] > 0){
 
 1684             if(proj_on_xaxis[j-1] > 0 && proj_on_xaxis[j] <= 0)
 
 1687             if(begin > 0 && end > 0 && end-begin > gap_ver)
 
 1689                 gap_ver = end - begin;
 
 1690                 pos_ver = (end + begin) / 2;
 
 1696         int cut_pos_x = pos_ver, cut_pos_y = pos_hor;
 
 1697         int gap_x = gap_ver, gap_y = gap_hor;
 
 1702         bool cut_hor = 
false, cut_ver = 
false;
 
 1705         const int topHeight = cut_pos_y - (regionRect.top() - old_top);
 
 1706         const QRect topRect(regionRect.left(),
 
 1710         const QRect bottomRect(regionRect.left(),
 
 1711                                regionRect.top() + topHeight,
 
 1713                                regionRect.height() - topHeight );
 
 1716         const int leftWidth = cut_pos_x - (regionRect.left() - old_left);
 
 1717         const QRect leftRect(regionRect.left(),
 
 1720                              regionRect.height());
 
 1721         const QRect rightRect(regionRect.left() + leftWidth,
 
 1723                               regionRect.width() - leftWidth,
 
 1724                               regionRect.height());
 
 1726         if(gap_y >= gap_x && gap_y >= tcy)
 
 1728         else if(gap_y >= gap_x && gap_y <= tcy && gap_x >= tcx)
 
 1730         else if(gap_x >= gap_y && gap_x >= tcx)
 
 1732         else if(gap_x >= gap_y && gap_x <= tcx && gap_y >= tcy)
 
 1738             RegionText tmpNode = tree.at(i);
 
 1739             tmpNode.setArea(regionRect);
 
 1740             tree.replace(i,tmpNode);
 
 1750             for( 
int j = 0 ; j < list.length() ; ++j )
 
 1752                 const WordWithCharacters word = list.at(j);
 
 1753                 const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
 
 1755                 if(topRect.intersects(wordRect))
 
 1761             RegionText node1(list1,topRect);
 
 1762             RegionText node2(list2,bottomRect);
 
 1764             tree.replace(i,node1);
 
 1765             tree.insert(i+1,node2);
 
 1771             for( 
int j = 0 ; j < list.length() ; ++j )
 
 1773                 const WordWithCharacters word = list.at(j);
 
 1774                 const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
 
 1776                 if(leftRect.intersects(wordRect))
 
 1782             RegionText node1(list1,leftRect);
 
 1783             RegionText node2(list2,rightRect);
 
 1785             tree.replace(i,node1);
 
 1786             tree.insert(i+1,node2);
 
 1805     for(
int j = 0 ; j < tree.length() ; j++)
 
 1807         RegionText &tmpRegion = tree[j];
 
 1810         QList< QPair<WordsWithCharacters, QRect> > sortedLines = 
makeAndSortLines(tmpRegion.text(), pageWidth, pageHeight);
 
 1813         for(
int i = 0 ; i < sortedLines.length() ; i++)
 
 1816             for(
int k = 0 ; k < list.length() ; k++ )
 
 1818                 const QRect area1 = list.at(k).area().roundedGeometry(pageWidth,pageHeight);
 
 1819                 if( k+1 >= list.length() ) 
break;
 
 1821                 const QRect area2 = list.at(k+1).area().roundedGeometry(pageWidth,pageHeight);
 
 1822                 const int space = area2.left() - area1.right();
 
 1827                     const int left = area1.right();
 
 1828                     const int right = area2.left();
 
 1829                     const int top = area2.top() < area1.top() ? area2.top() : area1.top();
 
 1830                     const int bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom();
 
 1832                     const QString spaceStr(
" ");
 
 1833                     const QRect rect(QPoint(left,top),QPoint(right,bottom));
 
 1835                     TinyTextEntity *ent1 = 
new TinyTextEntity(spaceStr, entRect);
 
 1836                     TinyTextEntity *ent2 = 
new TinyTextEntity(spaceStr, entRect);
 
 1837                     WordWithCharacters word(ent1, QList<TinyTextEntity*>() << ent2);
 
 1839                     list.insert(k+1, word);
 
 1848         for(
int i = 0 ; i < sortedLines.length() ; i++)
 
 1850             tmpList += sortedLines.at(i).first;
 
 1852         tmpRegion.setText(tmpList);
 
 1857     for(
int i = 0 ; i < tree.length() ; i++)
 
 1859         tmp += tree.at(i).text();
 
 1882     const QList<WordWithCharacters> wordsWithCharacters = 
makeWordFromCharacters(characters, pageWidth, pageHeight);
 
 1898     foreach(
const WordWithCharacters &word, listWithWordsAndSpaces)
 
 1901         listOfCharacters.append(word.characters);
 
 1908     if ( area && area->
isNull() )
 
 1914         foreach (TinyTextEntity *te, d->
m_words)
 
 1935         foreach (TinyTextEntity *te, d->
m_words)
 
 1945     TextList::ConstIterator itBegin = d->
m_words.constBegin(), itEnd = d->
m_words.constEnd();
 
 1946     TextList::ConstIterator it = itBegin;
 
 1947     TextList::ConstIterator posIt = itEnd;
 
 1948     for ( ; it != itEnd; ++it )
 
 1950         if ( (*it)->area.contains( p.
x, p.
y ) )
 
 1957     if ( posIt != itEnd )
 
 1959         if ( (*posIt)->text().simplified().isEmpty() )
 
 1964         while ( posIt != itBegin )
 
 1967             const QString itText = (*posIt)->text();
 
 1968             if ( itText.right(1).at(0).isSpace() )
 
 1970                 if (itText.endsWith(
"-\n"))
 
 1977                 if (itText == 
"\n" && posIt != itBegin )
 
 1980                     if ((*posIt)->text().endsWith(
"-")) {
 
 1993         for ( ; posIt != itEnd; ++posIt )
 
 1995             const QString itText = (*posIt)->text();
 
 1996             if ( itText.simplified().isEmpty() )
 
 2002             text += (*posIt)->text();
 
 2003             if (itText.right(1).at(0).isSpace())
 
 2005                 if (!text.endsWith(
"-\n"))
 
SearchDirection
Describes the direction of searching. 
NormalizedPoint is a helper class which stores the coordinates of a normalized point. 
NormalizedRect * area() const 
Returns the bounding area of the text entity. 
Searching for the next result on the page, earlier result should be located so we search from the las...
int direction() const 
Returns the direction of the selection. 
QList< RegionText > RegionTextList
A list of RegionText. 
bool isRight(const NormalizedPoint &pt) const 
Returns true if the point pt is located to the left of the right arm of rectangle. 
MergeSide
The side(s) to be considered when merging areas. 
TextPage()
Creates a new text page. 
void correctTextOrder()
Make necessary modifications in the TextList to make the text order correct, so that textselection wo...
QString text() const 
Returns the text of the text entity. 
static bool doesConsumeY(const QRect &first, const QRect &second, int threshold)
If the vertical arm of one rectangle fully contains the other (example below) -----— -— --— first ...
void transform(const QTransform &matrix)
Transforms the normalized rectangle with the operations defined by matrix. 
Rotation totalOrientation() const 
Returns the total orientation which is the original orientation plus the user defined rotation...
bool contains(double x, double y) const 
Returns whether the regular area contains the normalized point x, y. 
QList< QPair< WordsWithCharacters, QRect > > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight)
Create Lines from the words and sort them. 
double left
The normalized left coordinate. 
RegularAreaRect * findTextInternalBackward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
NormalizedRect is a helper class which stores the coordinates of a normalized rect, which is a rectangle of. 
void appendShape(const NormalizedShape &shape, MergeSide side=MergeAll)
Appends the given shape to the regular area. 
Merge only if the right side of the first area intersect. 
double y
The normalized y coordinate. 
bool intersects(const RegularArea< NormalizedShape, Shape > *area) const 
Returns whether the regular area intersects with the given area. 
NormalizedPoint start() const 
Returns the start point of the selection. 
RegularAreaRect * findTextInternalForward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
bool intersects(const NormalizedRect &other) const 
Returns whether the normalized rectangle intersects the other normalized rectangle. 
static void calculateStatisticalInformation(const QList< WordWithCharacters > &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing)
Calculate Statistical information from the lines we made previously. 
Searching from top of the page, next result is to be found, there was no earlier search result...
static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
We will read the TinyTextEntity from characters and try to create words from there. 
static bool CaseSensitiveCmpFn(const QStringRef &from, const QStringRef &to)
Searching from bottom of the page, next result is to be found, there was no earlier search result...
void setWordList(const TextList &list)
Copy a TextList to m_words, the pointers of list are adopted. 
Abstract textentity of Okular. 
bool(* TextComparisonFunction)(const QStringRef &from, const QStringRef &to)
Returns whether the two strings match. 
NormalizedRect transformedArea(const QTransform &matrix) const 
Returns the transformed area of the text entity. 
double height() const 
Returns the height of the page. 
bool contains(double x, double y) const 
Returns whether the normalized rectangle contains the normalized coordinates x and y...
double right
The normalized right coordinate. 
bool isNull() const 
Returns whether the regular area is a null area. 
Searching for the previous result on the page, earlier result should be located so we search from the...
static bool CaseInsensitiveCmpFn(const QStringRef &from, const QStringRef &to)
static void removeSpace(TextList *words)
Remove all the spaces in between texts. 
TextEntity::List words(const RegularAreaRect *rect, TextAreaInclusionBehaviour b) const 
Text entity extraction function. 
bool isTopOrLevel(const NormalizedPoint &pt) const 
Returns true if the point pt is located above the bottom of the rectangle. 
NormalizedRect boundingBox() const 
Returns the bounding box of the page content in normalized [0,1] coordinates, in terms of the upright...
void end(const NormalizedPoint &point)
Changes the end point of the selection to the given point. 
QList< TinyTextEntity * > TextList
QMap< int, SearchPoint * > m_searchPoints
double width() const 
Returns the width of the page. 
QTransform rotationMatrix() const 
~TextEntity()
Destroys the text entity. 
bool isBottom(const NormalizedPoint &pt) const 
Returns true if the point pt is located to the bottom of the rectangle. 
QRect geometry(int xScale, int yScale) const 
Returns the rectangle that accrues when the normalized rectangle is multiplyed with the scaling xScal...
WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
Add spaces in between words in a line. 
bool isBottomOrLevel(const NormalizedPoint &pt) const 
Returns true if the point pt is located under the top of the rectangle. 
static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second)
double top
The normalized top coordinate. 
static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd, PagePrivate *page)
QString text(const RegularAreaRect *rect=0) const 
Text extraction function. 
static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second)
RegularAreaRect * textArea(TextSelection *selection) const 
Returns the rectangular area of the given selection. 
RegularAreaRect * wordAt(const NormalizedPoint &p, QString *word=0) const 
Returns the area and text of the word at the given point Note that ownership of the returned area bel...
double x
The normalized x coordinate. 
bool isTop(const NormalizedPoint &pt) const 
Returns true if the point pt is located on the top of the rectangle. 
A character is included into text() result if any pixel of his bounding box is in the given area...
TextAreaInclusionBehaviour
Defines the behaviour of adding characters to text() result. 
void simplify()
Simplifies the regular area by merging its intersecting subareas. 
RegularAreaRect * findText(int id, const QString &text, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect)
Returns the bounding rect of the text which matches the following criteria or 0 if the search is not ...
static RegionTextList XYCutForBoundingBoxes(const QList< WordWithCharacters > &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight)
Implements the XY Cut algorithm for textpage segmentation The resulting RegionTextList will contain R...
double bottom
The normalized bottom coordinate. 
bool isLeft(const NormalizedPoint &pt) const 
Returns true if the point pt is located to the right of the left arm of rectangle. 
TextEntity(const QString &text, NormalizedRect *area)
Creates a new text entity with the given text and the given area. 
~TextPage()
Destroys the text page. 
QList< TextEntity * > List
Wrapper around the information needed to generate the selection area There are two assumptions inside...
void append(const QString &text, NormalizedRect *area)
Appends the given text with the given area as new TextEntity to the page. 
QList< WordWithCharacters > WordsWithCharacters