23 #include <QtAlgorithms>
24 #include <QVarLengthArray>
26 using namespace Okular;
32 : offset_begin( -1 ), offset_end( -1 )
37 TextList::ConstIterator it_begin;
40 TextList::ConstIterator it_end;
57 return from.compare( to, Qt::CaseInsensitive ) == 0;
62 return from.compare( to, Qt::CaseSensitive ) == 0;
71 static bool doesConsumeY(
const QRect& first,
const QRect& second,
int threshold)
74 if(first.top() <= second.top() && first.bottom() >= second.bottom())
77 if(first.top() >= second.top() && first.bottom() <= second.bottom())
82 if(second.bottom() >= first.top() && first.bottom() >= second.top())
84 const int overlap = (second.bottom() >= first.bottom()) ? first.bottom() - second.top()
85 : second.bottom() - first.top();
87 const int percentage = (first.height() < second.height()) ? overlap * 100 / (first.bottom() - first.top())
88 : overlap * 100 / (second.bottom() - second.top());
90 if(percentage >= threshold)
return true;
109 static const int MaxStaticChars =
sizeof( QChar * ) /
sizeof( QChar );
115 Q_ASSERT_X( !text.isEmpty(),
"TinyTextEntity",
"empty string" );
116 Q_ASSERT_X(
sizeof( d ) ==
sizeof( QChar * ),
"TinyTextEntity",
117 "internal storage is wider than QChar*, fix it!" );
118 length = text.length();
121 #if QT_POINTER_SIZE >= 8
123 d.qc[3] = text.at( 3 ).unicode();
126 d.qc[2] = text.at( 2 ).unicode();
130 d.qc[1] = text.at( 1 ).unicode();
133 d.qc[0] = text.at( 0 ).unicode();
136 d.data =
new QChar[ length ];
137 std::memcpy( d.data, text.constData(), length *
sizeof( QChar ) );
143 if ( length > MaxStaticChars )
149 inline QString text()
const
151 return length <= MaxStaticChars ? QString::fromRawData( (
const QChar * )&d.qc[0], length )
152 : QString::fromRawData( d.data, length );
155 inline NormalizedRect transformedArea(
const QTransform &matrix )
const
159 return transformed_area;
165 Q_DISABLE_COPY( TinyTextEntity )
170 ushort qc[MaxStaticChars];
177 : m_text( text ), m_area( area ), d( 0 )
200 return transformed_area;
224 TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd();
225 for ( ; it != itEnd; ++it )
228 if ( !e->
text().isEmpty() )
241 if ( !text.isEmpty() )
242 d->
m_words.append(
new TinyTextEntity( text.normalized(QString::NormalizationForm_KC), *area ) );
246 struct WordWithCharacters
248 WordWithCharacters(TinyTextEntity *w,
const TextList &c)
249 : word(w), characters(c)
253 inline QString text()
const
263 TinyTextEntity *word;
282 : m_region_wordWithCharacters(wordsWithCharacters), m_area(area)
286 inline QString string()
const
289 foreach(
const WordWithCharacters &word, m_region_wordWithCharacters)
296 return m_region_wordWithCharacters;
299 inline QRect area()
const
304 inline void setArea(
const QRect &area)
311 m_region_wordWithCharacters = wordsWithCharacters;
352 double startCx = startC.
x;
353 double startCy = startC.
y;
356 double endCx = endC.
x;
357 double endCy = endC.
y;
361 #ifdef DEBUG_TEXTPAGE
362 kWarning() <<
"running first loop";
364 const int count = d->
m_words.count();
365 for ( it = 0; it < count; it++ )
367 tmp = *d->
m_words[ it ]->area();
368 if ( tmp.
contains( startCx, startCy )
369 || ( tmp.
top <= startCy && tmp.
bottom >= startCy && tmp.
left >= startCx )
370 || ( tmp.
top >= startCy))
374 #ifdef DEBUG_TEXTPAGE
375 kWarning() <<
"start is" << itB <<
"count is" << d->
m_words.count();
383 #ifdef DEBUG_TEXTPAGE
384 kWarning() <<
"direction is" << sel->
direction();
385 kWarning() <<
"reloaded start is" << itB <<
"against" << sel->
itB();
389 #ifdef DEBUG_TEXTPAGE
390 kWarning() <<
"running second loop";
392 for ( it = d->
m_words.count() - 1; it >= itB; it-- )
394 tmp = *d->
m_words[ it ]->area();
396 || ( tmp.
top <= endCy && tmp.
bottom >= endCy && tmp.
right <= endCx )
397 || ( tmp.
bottom <= endCy ) )
401 #ifdef DEBUG_TEXTPAGE
402 kWarning() <<
"ending is" << itE <<
"count is" << d->
m_words.count();
403 kWarning() <<
"conditions" << tmp.
contains( endCx, endCy ) <<
" "
404 << ( tmp.
top <= endCy && tmp.
bottom >= endCy && tmp.
right <= endCx ) <<
" " <<
412 #ifdef DEBUG_TEXTPAGE
413 kWarning() <<
"reloaded ending is" << itE <<
"against" << sel->
itE();
416 if ( sel->
itB() != -1 && sel->
itE() != -1 )
431 int selMax = qMax( sel->
itB(), sel->
itE() );
432 for ( it = qMin( sel->
itB(), sel->
itE() ); it <= selMax; ++it )
434 tmp = *d->
m_words[ it ]->area();
448 if(startC.
x > endC.
x)
457 const QRect content = boundingRect.
geometry(scaleX,scaleY);
458 const double minX = content.left();
459 const double maxX = content.right();
460 const double minY = content.top();
461 const double maxY = content.bottom();
499 if(!boundingRect.
intersects(start_end))
return ret;
510 if(startC.
x * scaleX < minX) startC.
x = minX/scaleX;
511 if(endC.
x * scaleX > maxX) endC.
x = maxX/scaleX;
514 if(startC.
y * scaleY < minY) startC.
y = minY/scaleY;
515 if(endC.
y * scaleY > maxY) endC.
y = maxY/scaleY;
518 if(startC.
y * scaleY > maxY) startC.
y = maxY/scaleY;
519 if(endC.
y * scaleY < minY) endC.
y = minY/scaleY;
522 TextList::ConstIterator it = d->
m_words.constBegin(), itEnd = d->
m_words.constEnd();
523 TextList::ConstIterator start = it, end = itEnd, tmpIt = it;
528 for ( ; it != itEnd; ++it )
541 if(start == it && end == itEnd)
543 for ( ; it != itEnd; ++it )
559 bool selection_two_start =
false;
568 if(startC.
y <= endC.
y)
570 for ( ; it != itEnd; ++it )
573 rect.
isBottom(startC) ? flagV =
false: flagV =
true;
575 if(flagV && rect.
isRight(startC))
586 selection_two_start =
true;
587 int distance = scaleX + scaleY + 100;
590 for ( ; it != itEnd; ++it )
597 QRect entRect = rect.
geometry(scaleX,scaleY);
599 xdist = entRect.center().x() - startC.
x * scaleX;
600 ydist = entRect.center().y() - startC.
y * scaleY;
603 if(xdist < 0) xdist = -xdist;
604 if(ydist < 0) ydist = -ydist;
606 if( (xdist + ydist) < distance)
608 distance = xdist+ ydist;
625 if(startC.
y <= endC.
y)
627 for ( ; itEnd >= it; itEnd-- )
629 rect= (*itEnd)->area;
630 rect.
isTop(endC) ? flagV =
false: flagV =
true;
632 if(flagV && rect.
isLeft(endC))
642 int distance = scaleX + scaleY + 100;
643 for ( ; itEnd >= it; itEnd-- )
645 rect= (*itEnd)->area;
649 QRect entRect = rect.
geometry(scaleX,scaleY);
651 xdist = entRect.center().x() - endC.
x * scaleX;
652 ydist = entRect.center().y() - endC.
y * scaleY;
655 if(xdist < 0) xdist = -xdist;
656 if(ydist < 0) ydist = -ydist;
658 if( (xdist + ydist) < distance)
660 distance = xdist+ ydist;
673 if(selection_two_start)
690 if(end == d->
m_words.constEnd()) end--;
692 for( ;start <= end ; start++)
694 ret->
appendShape( (*start)->transformedArea( matrix ), side );
708 if ( d->
m_words.isEmpty() || query.isEmpty() || ( area && area->
isNull() ) )
710 TextList::ConstIterator start;
711 int start_offset = 0;
712 TextList::ConstIterator end;
713 const QMap< int, SearchPoint* >::const_iterator sIt = d->
m_searchPoints.constFind( searchID );
727 start = d->
m_words.constBegin();
738 start = (*sIt)->it_end;
739 start_offset = (*sIt)->offset_end;
743 start = (*sIt)->it_begin;
744 start_offset = (*sIt)->offset_begin;
769 int len = str.length();
775 if ( str.endsWith(
'-' ) )
778 if ( ( it + 1 ) != textListEnd )
781 const QString &lookahedStr = (*(it+1))->text();
782 if (lookahedStr.startsWith(
'\n'))
792 const QRect hyphenArea = (*it)->area.roundedGeometry(pageWidth, pageHeight);
793 const QRect lookaheadArea = (*(it + 1))->area.roundedGeometry(pageWidth, pageHeight);
804 else if (str.endsWith(
"-\n"))
812 RegularAreaRect* TextPagePrivate::searchPointToArea(
const SearchPoint* sp)
817 for (TextList::ConstIterator it = sp->it_begin; ; it++)
819 const TinyTextEntity* curEntity = *it;
820 ret->append( curEntity->transformedArea( matrix ) );
822 if (it == sp->it_end) {
833 const TextList::ConstIterator &start,
835 const TextList::ConstIterator &end)
838 const QString query = _query.normalized(QString::NormalizationForm_KC);
843 int j=0, queryLeft=query.length();
845 TextList::ConstIterator it = start;
846 int offset = start_offset;
848 TextList::ConstIterator it_begin = TextList::ConstIterator();
849 int offset_begin = 0;
853 const TinyTextEntity* curEntity = *it;
854 const QString& str = curEntity->text();
864 if ( it_begin == TextList::ConstIterator() )
867 offset_begin = offset;
870 int min=qMin(queryLeft,len-offset);
872 #ifdef DEBUG_TEXTPAGE
873 kDebug(
OkularDebug) << str.midRef(offset, min) <<
":" << _query.midRef(j, min);
878 if ( !comparer( str.midRef( offset, min ), query.midRef( j, min ) ) )
884 #ifdef DEBUG_TEXTPAGE
888 queryLeft=query.length();
890 offset = offset_begin+1;
891 it_begin = TextList::ConstIterator();
901 #ifdef DEBUG_TEXTPAGE
910 QMap< int, SearchPoint* >::iterator sIt =
m_searchPoints.find( searchID );
915 SearchPoint* sp = *sIt;
916 sp->it_begin = it_begin;
918 sp->offset_begin = offset_begin;
919 sp->offset_end = offset + min;
920 return searchPointToArea(sp);
930 const QMap< int, SearchPoint* >::iterator sIt =
m_searchPoints.find( searchID );
933 SearchPoint* sp = *sIt;
942 const TextList::ConstIterator &start,
944 const TextList::ConstIterator &end)
947 const QString query = _query.normalized(QString::NormalizationForm_KC);
952 int j=query.length(), queryLeft=query.length();
954 TextList::ConstIterator it = start;
955 int offset = start_offset;
957 TextList::ConstIterator it_begin = TextList::ConstIterator();
958 int offset_begin = 0;
971 const TinyTextEntity* curEntity = *it;
972 const QString& str = curEntity->text();
980 if ( it_begin == TextList::ConstIterator() )
983 offset_begin = offset;
986 int min=qMin(queryLeft,offset);
988 #ifdef DEBUG_TEXTPAGE
989 kDebug(
OkularDebug) << str.midRef(offset-min, min) <<
" : " << _query.midRef(j-min, min);
995 if ( !comparer( str.midRef(offset-min, min ), query.midRef( j - min, min ) ) )
1001 #ifdef DEBUG_TEXTPAGE
1006 queryLeft = query.length();
1008 offset = offset_begin-1;
1009 it_begin = TextList::ConstIterator();
1019 #ifdef DEBUG_TEXTPAGE
1025 if ( queryLeft == 0 )
1028 QMap< int, SearchPoint* >::iterator sIt =
m_searchPoints.find( searchID );
1033 SearchPoint* sp = *sIt;
1035 sp->it_end = it_begin;
1036 sp->offset_begin = offset - min;
1037 sp->offset_end = offset_begin;
1038 return searchPointToArea(sp);
1049 const QMap< int, SearchPoint* >::iterator sIt =
m_searchPoints.find( searchID );
1052 SearchPoint* sp = *sIt;
1066 if ( area && area->
isNull() )
1069 TextList::ConstIterator it = d->
m_words.constBegin(), itEnd = d->
m_words.constEnd();
1073 for ( ; it != itEnd; ++it )
1079 ret += (*it)->text();
1087 ret += (*it)->text();
1094 for ( ; it != itEnd; ++it )
1095 ret += (*it)->text();
1102 QRect firstArea = first.area().roundedGeometry(1000,1000);
1103 QRect secondArea = second.area().roundedGeometry(1000,1000);
1105 return firstArea.left() < secondArea.left();
1110 const QRect firstArea = first.area().roundedGeometry(1000,1000);
1111 const QRect secondArea = second.area().roundedGeometry(1000,1000);
1113 return firstArea.top() < secondArea.top();
1131 TextList::Iterator it = words->begin();
1132 const QString str(
' ');
1134 while ( it != words->end() )
1136 if((*it)->text() == str)
1138 it = words->erase(it);
1169 TextList::ConstIterator it = characters.begin(), itEnd = characters.end(), tmpIt;
1170 int newLeft,newRight,newTop,newBottom;
1173 for( ; it != itEnd ; it++)
1175 QString textString = (*it)->text();
1177 QRect lineArea = (*it)->area.roundedGeometry(pageWidth,pageHeight),elementArea;
1184 if (textString.length())
1186 newString.append(textString);
1192 wordCharacters.append(
new TinyTextEntity(textString.normalized
1193 (QString::NormalizationForm_KC), newRect));
1198 wordCharacters.append(
new TinyTextEntity(textString.normalized
1199 (QString::NormalizationForm_KC), newRect));
1209 if (it == itEnd)
break;
1210 elementArea = (*it)->area.roundedGeometry(pageWidth,pageHeight);
1217 const int text_y1 = elementArea.top() ,
1218 text_x1 = elementArea.left(),
1219 text_y2 = elementArea.y() + elementArea.height(),
1220 text_x2 = elementArea.x() + elementArea.width();
1221 const int line_y1 = lineArea.top() ,line_x1 = lineArea.left(),
1222 line_y2 = lineArea.y() + lineArea.height(),
1223 line_x2 = lineArea.x() + lineArea.width();
1225 space = elementArea.left() - lineArea.right();
1233 newLeft = text_x1 < line_x1 ? text_x1 : line_x1;
1234 newRight = line_x2 > text_x2 ? line_x2 : text_x2;
1235 newTop = text_y1 > line_y1 ? line_y1 : text_y1;
1236 newBottom = text_y2 > line_y2 ? text_y2 : line_y2;
1238 lineArea.setLeft (newLeft);
1239 lineArea.setTop (newTop);
1240 lineArea.setWidth( newRight - newLeft );
1241 lineArea.setHeight( newBottom - newTop );
1243 textString = (*it)->text();
1247 if (!newString.isEmpty())
1250 TinyTextEntity *word =
new TinyTextEntity(newString.normalized(QString::NormalizationForm_KC), newRect);
1251 wordsWithCharacters.append(WordWithCharacters(word, wordCharacters));
1256 if(it == itEnd)
break;
1259 return wordsWithCharacters;
1278 QList< QPair<WordsWithCharacters, QRect> > lines;
1284 QList<WordWithCharacters> words = wordsTmp;
1290 QList<WordWithCharacters>::Iterator it = words.begin(), itEnd = words.end();
1293 for( ; it != itEnd ; it++)
1295 const QRect elementArea = (*it).area().roundedGeometry(pageWidth,pageHeight);
1298 for(
int i = 0 ; i < lines.length() ; i++)
1304 QRect &lineArea = lines[i].second;
1305 const int text_y1 = elementArea.top() ,
1306 text_y2 = elementArea.top() + elementArea.height() ,
1307 text_x1 = elementArea.left(),
1308 text_x2 = elementArea.left() + elementArea.width();
1309 const int line_y1 = lineArea.top() ,
1310 line_y2 = lineArea.top() + lineArea.height(),
1311 line_x1 = lineArea.left(),
1312 line_x2 = lineArea.left() + lineArea.width();
1323 const int newLeft = line_x1 < text_x1 ? line_x1 : text_x1;
1324 const int newRight = line_x2 > text_x2 ? line_x2 : text_x2;
1325 const int newTop = line_y1 < text_y1 ? line_y1 : text_y1;
1326 const int newBottom = text_y2 > line_y2 ? text_y2 : line_y2;
1328 lineArea = QRect( newLeft,newTop, newRight - newLeft, newBottom - newTop );
1342 lines.append(QPair<WordsWithCharacters, QRect>(tmp, elementArea));
1347 for(
int i = 0 ; i < lines.length() ; i++)
1359 static void calculateStatisticalInformation(
const QList<WordWithCharacters> &words,
int pageWidth,
int pageHeight,
int *word_spacing,
int *line_spacing,
int *col_spacing)
1371 const QList< QPair<WordsWithCharacters, QRect> > sortedLines =
makeAndSortLines(words, pageWidth, pageHeight);
1376 QMap<int,int> line_space_stat;
1377 for(
int i = 0 ; i < sortedLines.length(); i++)
1379 const QRect rectUpper = sortedLines.at(i).second;
1381 if(i+1 == sortedLines.length())
break;
1382 const QRect rectLower = sortedLines.at(i+1).second;
1384 int linespace = rectLower.top() - (rectUpper.top() + rectUpper.height());
1385 if(linespace < 0) linespace =-linespace;
1387 if(line_space_stat.contains(linespace))
1388 line_space_stat[linespace]++;
1389 else line_space_stat[linespace] = 1;
1393 int weighted_count = 0;
1394 QMapIterator<int, int> iterate_linespace(line_space_stat);
1396 while(iterate_linespace.hasNext())
1398 iterate_linespace.next();
1399 *line_spacing += iterate_linespace.value() * iterate_linespace.key();
1400 weighted_count += iterate_linespace.value();
1402 if (*line_spacing != 0)
1403 *line_spacing = (int) ( (
double)*line_spacing / (double) weighted_count + 0.5);
1409 QMap<int,int> hor_space_stat;
1410 QMap<int,int> col_space_stat;
1411 QList< QList<QRect> > space_rects;
1412 QList<QRect> max_hor_space_rects;
1415 for(
int i = 0 ; i < sortedLines.length() ; i++)
1418 QList<QRect> line_space_rects;
1419 int maxSpace = 0, minSpace = pageWidth;
1422 WordsWithCharacters::ConstIterator it = list.begin(), itEnd = list.end();
1423 QRect max_area1,max_area2;
1424 QString before_max, after_max;
1427 for( ; it != itEnd ; it++ )
1429 const QRect area1 = (*it).area().roundedGeometry(pageWidth,pageHeight);
1430 if( it+1 == itEnd )
break;
1432 const QRect area2 = (*(it+1)).area().roundedGeometry(pageWidth,pageHeight);
1433 int space = area2.left() - area1.right();
1435 if(space > maxSpace)
1440 before_max = (*it).text();
1441 after_max = (*(it+1)).text();
1444 if(space < minSpace && space != 0) minSpace = space;
1447 if(space != 0 && space != pageWidth)
1450 if(hor_space_stat.contains(space)) hor_space_stat[space] = hor_space_stat[space]++;
1451 else hor_space_stat[space] = 1;
1453 int left,right,top,bottom;
1455 left = area1.right();
1456 right = area2.left();
1458 top = area2.top() < area1.top() ? area2.top() : area1.top();
1459 bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom();
1461 QRect rect(left,top,right-left,bottom-top);
1462 line_space_rects.append(rect);
1466 space_rects.append(line_space_rects);
1468 if(hor_space_stat.contains(maxSpace))
1470 if(hor_space_stat[maxSpace] != 1)
1471 hor_space_stat[maxSpace] = hor_space_stat[maxSpace]--;
1472 else hor_space_stat.remove(maxSpace);
1477 if (col_space_stat.contains(maxSpace))
1478 col_space_stat[maxSpace] = col_space_stat[maxSpace]++;
1479 else col_space_stat[maxSpace] = 1;
1482 const int left = max_area1.right();
1483 const int right = max_area2.left();
1484 const int top = (max_area1.top() > max_area2.top()) ? max_area2.top() :
1486 const int bottom = (max_area1.bottom() < max_area2.bottom()) ? max_area2.bottom() :
1489 const QRect rect(left,top,right-left,bottom-top);
1490 max_hor_space_rects.append(rect);
1492 else max_hor_space_rects.append(QRect(0,0,0,0));
1498 QMapIterator<int, int> iterate(hor_space_stat);
1500 while (iterate.hasNext())
1504 if(iterate.key() > 0)
1506 *word_spacing += iterate.value() * iterate.key();
1507 weighted_count += iterate.value();
1511 *word_spacing = (int) ((
double)*word_spacing / (double)weighted_count + 0.5);
1514 QMapIterator<int, int> iterate_col(col_space_stat);
1516 while (iterate_col.hasNext())
1519 if(iterate_col.value() > *col_spacing) *col_spacing = iterate_col.value();
1521 *col_spacing = col_space_stat.key(*col_spacing);
1524 if(sortedLines.length() == 1)
1525 *word_spacing = *col_spacing;
1536 QRect contentRect(boundingBox.
geometry(pageWidth,pageHeight));
1537 const RegionText root(wordsWithCharacters, contentRect);
1540 tree.push_back(root);
1545 while(i < tree.length())
1547 const RegionText node = tree.at(i);
1548 QRect regionRect = node.area();
1554 int size_proj_y = node.area().height();
1555 int size_proj_x = node.area().width();
1557 QVarLengthArray<int> proj_on_xaxis(size_proj_x);
1558 QVarLengthArray<int> proj_on_yaxis(size_proj_y);
1560 for(
int j = 0 ; j < size_proj_y ; ++j ) proj_on_yaxis[j] = 0;
1561 for(
int j = 0 ; j < size_proj_x ; ++j ) proj_on_xaxis[j] = 0;
1563 const QList<WordWithCharacters> list = node.text();
1566 int word_spacing, line_spacing, column_spacing;
1569 const int tcx = word_spacing * 2;
1570 const int tcy = line_spacing * 2;
1572 int maxX = 0 , maxY = 0;
1577 for(
int j = 0 ; j < list.length() ; ++j )
1579 TinyTextEntity *ent = list.at(j).word;
1580 const QRect entRect = ent->area.geometry(pageWidth, pageHeight);
1583 for(
int k = entRect.left() ; k <= entRect.left() + entRect.width() ; ++k)
1585 if( ( k-regionRect.left() ) < size_proj_x && ( k-regionRect.left() ) >= 0 )
1586 proj_on_xaxis[k - regionRect.left()] += entRect.height();
1590 for(
int k = entRect.top() ; k <= entRect.top() + entRect.height() ; ++k)
1592 if( ( k-regionRect.top() ) < size_proj_y && ( k-regionRect.top() ) >= 0 )
1593 proj_on_yaxis[k - regionRect.top()] += entRect.width();
1597 for(
int j = 0 ; j < size_proj_y ; ++j )
1599 if (proj_on_yaxis[j] > maxY)
1600 maxY = proj_on_yaxis[j];
1604 for(
int j = 0 ; j < size_proj_x ; ++j )
1606 if(proj_on_xaxis[j] > maxX) maxX = proj_on_xaxis[j];
1607 if(proj_on_xaxis[j])
1610 avgX+= proj_on_xaxis[j];
1613 if(count) avgX /= count;
1619 int xbegin = 0, xend = size_proj_x - 1;
1620 int ybegin = 0, yend = size_proj_y - 1;
1621 while(xbegin < size_proj_x && proj_on_xaxis[xbegin] <= 0)
1623 while(xend >= 0 && proj_on_xaxis[xend] <= 0)
1625 while(ybegin < size_proj_y && proj_on_yaxis[ybegin] <= 0)
1627 while(yend >= 0 && proj_on_yaxis[yend] <= 0)
1631 int old_left = regionRect.left(), old_top = regionRect.top();
1632 regionRect.setLeft(old_left + xbegin);
1633 regionRect.setRight(old_left + xend);
1634 regionRect.setTop(old_top + ybegin);
1635 regionRect.setBottom(old_top + yend);
1637 int tnx = (int)((
double)avgX * 10.0 / 100.0 + 0.5), tny = 0;
1638 for(
int j = 0 ; j < size_proj_x ; ++j )
1639 proj_on_xaxis[j] -= tnx;
1640 for(
int j = 0 ; j < size_proj_y ; ++j )
1641 proj_on_yaxis[j] -= tny;
1646 int gap_hor = -1, pos_hor = -1;
1647 int begin = -1, end = -1;
1650 for(
int j = 1 ; j < size_proj_y ; ++j)
1653 if(begin >= 0 && proj_on_yaxis[j-1] <= 0
1654 && proj_on_yaxis[j] > 0)
1658 if(proj_on_yaxis[j-1] > 0 && proj_on_yaxis[j] <= 0)
1661 if(begin > 0 && end > 0 && end-begin > gap_hor)
1663 gap_hor = end - begin;
1664 pos_hor = (end + begin) / 2;
1671 begin = -1, end = -1;
1672 int gap_ver = -1, pos_ver = -1;
1675 for(
int j = 1 ; j < size_proj_x ; ++j)
1678 if(begin >= 0 && proj_on_xaxis[j-1] <= 0
1679 && proj_on_xaxis[j] > 0){
1684 if(proj_on_xaxis[j-1] > 0 && proj_on_xaxis[j] <= 0)
1687 if(begin > 0 && end > 0 && end-begin > gap_ver)
1689 gap_ver = end - begin;
1690 pos_ver = (end + begin) / 2;
1696 int cut_pos_x = pos_ver, cut_pos_y = pos_hor;
1697 int gap_x = gap_ver, gap_y = gap_hor;
1702 bool cut_hor =
false, cut_ver =
false;
1705 const int topHeight = cut_pos_y - (regionRect.top() - old_top);
1706 const QRect topRect(regionRect.left(),
1710 const QRect bottomRect(regionRect.left(),
1711 regionRect.top() + topHeight,
1713 regionRect.height() - topHeight );
1716 const int leftWidth = cut_pos_x - (regionRect.left() - old_left);
1717 const QRect leftRect(regionRect.left(),
1720 regionRect.height());
1721 const QRect rightRect(regionRect.left() + leftWidth,
1723 regionRect.width() - leftWidth,
1724 regionRect.height());
1726 if(gap_y >= gap_x && gap_y >= tcy)
1728 else if(gap_y >= gap_x && gap_y <= tcy && gap_x >= tcx)
1730 else if(gap_x >= gap_y && gap_x >= tcx)
1732 else if(gap_x >= gap_y && gap_x <= tcx && gap_y >= tcy)
1738 RegionText tmpNode = tree.at(i);
1739 tmpNode.setArea(regionRect);
1740 tree.replace(i,tmpNode);
1750 for(
int j = 0 ; j < list.length() ; ++j )
1752 const WordWithCharacters word = list.at(j);
1753 const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
1755 if(topRect.intersects(wordRect))
1761 RegionText node1(list1,topRect);
1762 RegionText node2(list2,bottomRect);
1764 tree.replace(i,node1);
1765 tree.insert(i+1,node2);
1771 for(
int j = 0 ; j < list.length() ; ++j )
1773 const WordWithCharacters word = list.at(j);
1774 const QRect wordRect = word.area().geometry(pageWidth,pageHeight);
1776 if(leftRect.intersects(wordRect))
1782 RegionText node1(list1,leftRect);
1783 RegionText node2(list2,rightRect);
1785 tree.replace(i,node1);
1786 tree.insert(i+1,node2);
1805 for(
int j = 0 ; j < tree.length() ; j++)
1807 RegionText &tmpRegion = tree[j];
1810 QList< QPair<WordsWithCharacters, QRect> > sortedLines =
makeAndSortLines(tmpRegion.text(), pageWidth, pageHeight);
1813 for(
int i = 0 ; i < sortedLines.length() ; i++)
1816 for(
int k = 0 ; k < list.length() ; k++ )
1818 const QRect area1 = list.at(k).area().roundedGeometry(pageWidth,pageHeight);
1819 if( k+1 >= list.length() )
break;
1821 const QRect area2 = list.at(k+1).area().roundedGeometry(pageWidth,pageHeight);
1822 const int space = area2.left() - area1.right();
1827 const int left = area1.right();
1828 const int right = area2.left();
1829 const int top = area2.top() < area1.top() ? area2.top() : area1.top();
1830 const int bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom();
1832 const QString spaceStr(
" ");
1833 const QRect rect(QPoint(left,top),QPoint(right,bottom));
1835 TinyTextEntity *ent1 =
new TinyTextEntity(spaceStr, entRect);
1836 TinyTextEntity *ent2 =
new TinyTextEntity(spaceStr, entRect);
1837 WordWithCharacters word(ent1, QList<TinyTextEntity*>() << ent2);
1839 list.insert(k+1, word);
1848 for(
int i = 0 ; i < sortedLines.length() ; i++)
1850 tmpList += sortedLines.at(i).first;
1852 tmpRegion.setText(tmpList);
1857 for(
int i = 0 ; i < tree.length() ; i++)
1859 tmp += tree.at(i).text();
1882 const QList<WordWithCharacters> wordsWithCharacters =
makeWordFromCharacters(characters, pageWidth, pageHeight);
1898 foreach(
const WordWithCharacters &word, listWithWordsAndSpaces)
1901 listOfCharacters.append(word.characters);
1908 if ( area && area->
isNull() )
1914 foreach (TinyTextEntity *te, d->
m_words)
1935 foreach (TinyTextEntity *te, d->
m_words)
1945 TextList::ConstIterator itBegin = d->
m_words.constBegin(), itEnd = d->
m_words.constEnd();
1946 TextList::ConstIterator it = itBegin;
1947 TextList::ConstIterator posIt = itEnd;
1948 for ( ; it != itEnd; ++it )
1950 if ( (*it)->area.contains( p.
x, p.
y ) )
1957 if ( posIt != itEnd )
1959 if ( (*posIt)->text().simplified().isEmpty() )
1964 while ( posIt != itBegin )
1967 const QString itText = (*posIt)->text();
1968 if ( itText.right(1).at(0).isSpace() )
1970 if (itText.endsWith(
"-\n"))
1977 if (itText ==
"\n" && posIt != itBegin )
1980 if ((*posIt)->text().endsWith(
"-")) {
1993 for ( ; posIt != itEnd; ++posIt )
1995 const QString itText = (*posIt)->text();
1996 if ( itText.simplified().isEmpty() )
2002 text += (*posIt)->text();
2003 if (itText.right(1).at(0).isSpace())
2005 if (!text.endsWith(
"-\n"))
SearchDirection
Describes the direction of searching.
NormalizedPoint is a helper class which stores the coordinates of a normalized point.
NormalizedRect * area() const
Returns the bounding area of the text entity.
Searching for the next result on the page, earlier result should be located so we search from the las...
int direction() const
Returns the direction of the selection.
QList< RegionText > RegionTextList
A list of RegionText.
bool isRight(const NormalizedPoint &pt) const
Returns true if the point pt is located to the left of the right arm of rectangle.
MergeSide
The side(s) to be considered when merging areas.
TextPage()
Creates a new text page.
void correctTextOrder()
Make necessary modifications in the TextList to make the text order correct, so that textselection wo...
QString text() const
Returns the text of the text entity.
static bool doesConsumeY(const QRect &first, const QRect &second, int threshold)
If the vertical arm of one rectangle fully contains the other (example below) -----— -— --— first ...
void transform(const QTransform &matrix)
Transforms the normalized rectangle with the operations defined by matrix.
Rotation totalOrientation() const
Returns the total orientation which is the original orientation plus the user defined rotation...
bool contains(double x, double y) const
Returns whether the regular area contains the normalized point x, y.
QList< QPair< WordsWithCharacters, QRect > > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight)
Create Lines from the words and sort them.
double left
The normalized left coordinate.
RegularAreaRect * findTextInternalBackward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
NormalizedRect is a helper class which stores the coordinates of a normalized rect, which is a rectangle of.
void appendShape(const NormalizedShape &shape, MergeSide side=MergeAll)
Appends the given shape to the regular area.
Merge only if the right side of the first area intersect.
double y
The normalized y coordinate.
bool intersects(const RegularArea< NormalizedShape, Shape > *area) const
Returns whether the regular area intersects with the given area.
NormalizedPoint start() const
Returns the start point of the selection.
RegularAreaRect * findTextInternalForward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
bool intersects(const NormalizedRect &other) const
Returns whether the normalized rectangle intersects the other normalized rectangle.
static void calculateStatisticalInformation(const QList< WordWithCharacters > &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing)
Calculate Statistical information from the lines we made previously.
Searching from top of the page, next result is to be found, there was no earlier search result...
static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
We will read the TinyTextEntity from characters and try to create words from there.
static bool CaseSensitiveCmpFn(const QStringRef &from, const QStringRef &to)
Searching from bottom of the page, next result is to be found, there was no earlier search result...
void setWordList(const TextList &list)
Copy a TextList to m_words, the pointers of list are adopted.
Abstract textentity of Okular.
bool(* TextComparisonFunction)(const QStringRef &from, const QStringRef &to)
Returns whether the two strings match.
NormalizedRect transformedArea(const QTransform &matrix) const
Returns the transformed area of the text entity.
double height() const
Returns the height of the page.
bool contains(double x, double y) const
Returns whether the normalized rectangle contains the normalized coordinates x and y...
double right
The normalized right coordinate.
bool isNull() const
Returns whether the regular area is a null area.
Searching for the previous result on the page, earlier result should be located so we search from the...
static bool CaseInsensitiveCmpFn(const QStringRef &from, const QStringRef &to)
static void removeSpace(TextList *words)
Remove all the spaces in between texts.
TextEntity::List words(const RegularAreaRect *rect, TextAreaInclusionBehaviour b) const
Text entity extraction function.
bool isTopOrLevel(const NormalizedPoint &pt) const
Returns true if the point pt is located above the bottom of the rectangle.
NormalizedRect boundingBox() const
Returns the bounding box of the page content in normalized [0,1] coordinates, in terms of the upright...
void end(const NormalizedPoint &point)
Changes the end point of the selection to the given point.
QList< TinyTextEntity * > TextList
QMap< int, SearchPoint * > m_searchPoints
double width() const
Returns the width of the page.
QTransform rotationMatrix() const
~TextEntity()
Destroys the text entity.
bool isBottom(const NormalizedPoint &pt) const
Returns true if the point pt is located to the bottom of the rectangle.
QRect geometry(int xScale, int yScale) const
Returns the rectangle that accrues when the normalized rectangle is multiplyed with the scaling xScal...
WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
Add spaces in between words in a line.
bool isBottomOrLevel(const NormalizedPoint &pt) const
Returns true if the point pt is located under the top of the rectangle.
static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second)
double top
The normalized top coordinate.
static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd, PagePrivate *page)
QString text(const RegularAreaRect *rect=0) const
Text extraction function.
static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second)
RegularAreaRect * textArea(TextSelection *selection) const
Returns the rectangular area of the given selection.
RegularAreaRect * wordAt(const NormalizedPoint &p, QString *word=0) const
Returns the area and text of the word at the given point Note that ownership of the returned area bel...
double x
The normalized x coordinate.
bool isTop(const NormalizedPoint &pt) const
Returns true if the point pt is located on the top of the rectangle.
A character is included into text() result if any pixel of his bounding box is in the given area...
TextAreaInclusionBehaviour
Defines the behaviour of adding characters to text() result.
void simplify()
Simplifies the regular area by merging its intersecting subareas.
RegularAreaRect * findText(int id, const QString &text, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect)
Returns the bounding rect of the text which matches the following criteria or 0 if the search is not ...
static RegionTextList XYCutForBoundingBoxes(const QList< WordWithCharacters > &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight)
Implements the XY Cut algorithm for textpage segmentation The resulting RegionTextList will contain R...
double bottom
The normalized bottom coordinate.
bool isLeft(const NormalizedPoint &pt) const
Returns true if the point pt is located to the right of the left arm of rectangle.
TextEntity(const QString &text, NormalizedRect *area)
Creates a new text entity with the given text and the given area.
~TextPage()
Destroys the text page.
QList< TextEntity * > List
Wrapper around the information needed to generate the selection area There are two assumptions inside...
void append(const QString &text, NormalizedRect *area)
Appends the given text with the given area as new TextEntity to the page.
QList< WordWithCharacters > WordsWithCharacters