okular
textpage.cpp
Go to the documentation of this file.
72 static bool segmentsOverlap(double left1, double right1, double left2, double right2, int threshold)
102 static bool doesConsumeY(const NormalizedRect& first, const NormalizedRect& second, int threshold)
254 d->m_words.append( new TinyTextEntity( text.normalized(QString::NormalizationForm_KC), *area ) );
507 const NormalizedRect start_end = (startC.y < endC.y) ? NormalizedRect(startC.x, startC.y, endC.x, endC.y)
536 const MergeSide side = d->m_page ? (MergeSide)d->m_page->m_page->totalOrientation() : MergeRight;
563 // we have searched every text entities, but none is within the rectangle created by start and end
715 RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchDirection direct,
779 static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd)
949 RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const QString &_query,
1109 static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second)
1117 static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second)
1163 static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
1259 TinyTextEntity *word = new TinyTextEntity(newString.normalized(QString::NormalizationForm_KC), newRect);
1274 QList< QPair<WordsWithCharacters, QRect> > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight)
1368 static void calculateStatisticalInformation(const QList<WordWithCharacters> &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing)
1380 const QList< QPair<WordsWithCharacters, QRect> > sortedLines = makeAndSortLines(words, pageWidth, pageHeight);
1542 static RegionTextList XYCutForBoundingBoxes(const QList<WordWithCharacters> &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight)
1576 calculateStatisticalInformation(list, pageWidth, pageHeight, &word_spacing, &line_spacing, &column_spacing);
1819 QList< QPair<WordsWithCharacters, QRect> > sortedLines = makeAndSortLines(tmpRegion.text(), pageWidth, pageHeight);
1896 const QList<WordWithCharacters> wordsWithCharacters = makeWordFromCharacters(characters, pageWidth, pageHeight);
1901 const RegionTextList tree = XYCutForBoundingBoxes(wordsWithCharacters, m_page->m_page->boundingBox(), pageWidth, pageHeight);
1906 const WordsWithCharacters listWithWordsAndSpaces = addNecessarySpace(tree, pageWidth, pageHeight);
1920 TextEntity::List TextPage::words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const
NormalizedPoint is a helper class which stores the coordinates of a normalized point.
Definition: area.h:47
void setBottom(int y)
NormalizedRect * area() const
Returns the bounding area of the text entity.
Definition: textpage.cpp:203
QString & append(QChar ch)
const QChar * constData() const
Searching for the next result on the page, earlier result should be located so we search from the las...
Definition: global.h:37
iterator erase(iterator pos)
bool contains(const Key &key) const
bool isRight(const NormalizedPoint &pt) const
Returns true if the point pt is located to the left of the right arm of rectangle.
Definition: area.h:276
int right() const
void push_back(const T &value)
void correctTextOrder()
Make necessary modifications in the TextList to make the text order correct, so that textselection wo...
Definition: textpage.cpp:1876
int length() const
static bool doesConsumeY(const QRect &first, const QRect &second, int threshold)
Definition: textpage.cpp:97
void transform(const QTransform &matrix)
Transforms the normalized rectangle with the operations defined by matrix.
Definition: area.cpp:259
Rotation totalOrientation() const
Returns the total orientation which is the original orientation plus the user defined rotation...
Definition: page.cpp:159
bool contains(double x, double y) const
Returns whether the regular area contains the normalized point x, y.
Definition: area.h:800
static bool segmentsOverlap(double left1, double right1, double left2, double right2, int threshold)
Returns true iff segments [left1, right1] and [left2, right2] on the real line overlap within thresho...
Definition: textpage.cpp:72
QList< QPair< WordsWithCharacters, QRect > > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight)
Create Lines from the words and sort them.
Definition: textpage.cpp:1274
const T & at(int i) const
QString simplified() const
RegularAreaRect * findTextInternalBackward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
Definition: textpage.cpp:949
NormalizedRect is a helper class which stores the coordinates of a normalized rect, which is a rectangle of.
Definition: area.h:105
void appendShape(const NormalizedShape &shape, MergeSide side=MergeAll)
Appends the given shape to the regular area.
Definition: area.h:725
Definition: area.h:860
iterator erase(iterator pos)
int height() const
int x() const
int y() const
const_iterator constFind(const Key &key) const
bool intersects(const RegularArea< NormalizedShape, Shape > *area) const
Returns whether the regular area intersects with the given area.
Definition: area.h:690
RegularAreaRect * findTextInternalForward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end)
Definition: textpage.cpp:840
bool intersects(const NormalizedRect &other) const
Returns whether the normalized rectangle intersects the other normalized rectangle.
Definition: area.cpp:161
int x() const
int y() const
static void calculateStatisticalInformation(const QList< WordWithCharacters > &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing)
Calculate Statistical information from the lines we made previously.
Definition: textpage.cpp:1368
Searching from top of the page, next result is to be found, there was no earlier search result...
Definition: global.h:35
static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
We will read the TinyTextEntity from characters and try to create words from there.
Definition: textpage.cpp:1163
QString normalized(NormalizationForm mode) const
static bool CaseSensitiveCmpFn(const QStringRef &from, const QStringRef &to)
Definition: textpage.cpp:60
Searching from bottom of the page, next result is to be found, there was no earlier search result...
Definition: global.h:36
void setWordList(const TextList &list)
Copy a TextList to m_words, the pointers of list are adopted.
Definition: textpage.cpp:1128
bool(* TextComparisonFunction)(const QStringRef &from, const QStringRef &to)
Returns whether the two strings match.
Definition: textpage_p.h:33
NormalizedRect transformedArea(const QTransform &matrix) const
Returns the transformed area of the text entity.
Definition: textpage.cpp:208
bool contains(double x, double y) const
Returns whether the normalized rectangle contains the normalized coordinates x and y...
Definition: area.cpp:156
Searching for the previous result on the page, earlier result should be located so we search from the...
Definition: global.h:38
QString fromRawData(const QChar *unicode, int size)
int count(const T &value) const
static bool CaseInsensitiveCmpFn(const QStringRef &from, const QStringRef &to)
Definition: textpage.cpp:55
void append(const T &value)
bool isSpace() const
static void removeSpace(TextList *words)
Remove all the spaces in between texts.
Definition: textpage.cpp:1138
TextEntity::List words(const RegularAreaRect *rect, TextAreaInclusionBehaviour b) const
Text entity extraction function.
Definition: textpage.cpp:1920
int top() const
Item next()
void setTop(int y)
int left() const
bool isEmpty() const
void setWidth(int width)
bool isTopOrLevel(const NormalizedPoint &pt) const
Returns true if the point pt is located above the bottom of the rectangle.
Definition: area.h:258
bool isEmpty() const
const_iterator constEnd() const
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const
NormalizedRect boundingBox() const
Returns the bounding box of the page content in normalized [0,1] coordinates, in terms of the upright...
Definition: page.cpp:179
void end(const NormalizedPoint &point)
Changes the end point of the selection to the given point.
Definition: misc.cpp:45
QMap< int, SearchPoint * > m_searchPoints
Definition: textpage_p.h:70
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const
QPoint center() const
typedef Iterator
T & first()
iterator end()
ushort unicode() const
const Key & key() const
bool isBottom(const NormalizedPoint &pt) const
Returns true if the point pt is located to the bottom of the rectangle.
Definition: area.h:231
const T & value() const
QRect geometry(int xScale, int yScale) const
Returns the rectangle that accrues when the normalized rectangle is multiplyed with the scaling xScal...
Definition: area.cpp:239
int compare(const QString &other, Qt::CaseSensitivity cs) const
QString right(int n) const
iterator end()
WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
Add spaces in between words in a line.
Definition: textpage.cpp:1805
bool isBottomOrLevel(const NormalizedPoint &pt) const
Returns true if the point pt is located under the top of the rectangle.
Definition: area.h:249
QStringRef midRef(int position, int n) const
static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second)
Definition: textpage.cpp:1117
const Key key(const T &value) const
QString text(const RegularAreaRect *rect=0) const
Text extraction function.
Definition: textpage.cpp:1068
void setRight(int x)
static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second)
Definition: textpage.cpp:1109
RegularAreaRect * textArea(TextSelection *selection) const
Returns the rectangular area of the given selection.
Definition: textpage.cpp:331
RegularAreaRect * wordAt(const NormalizedPoint &p, QString *word=0) const
Returns the area and text of the word at the given point Note that ownership of the returned area bel...
Definition: textpage.cpp:1957
bool isTop(const NormalizedPoint &pt) const
Returns true if the point pt is located on the top of the rectangle.
Definition: area.h:240
int width() const
static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd)
Definition: textpage.cpp:779
A character is included into text() result if any pixel of his bounding box is in the given area...
Definition: textpage.h:104
TextAreaInclusionBehaviour
Defines the behaviour of adding characters to text() result.
Definition: textpage.h:102
void insert(int i, const T &value)
void setHeight(int height)
const QChar at(int position) const
void simplify()
Simplifies the regular area by merging its intersecting subareas.
Definition: area.h:628
typedef ConstIterator
Definition: textpage_p.h:40
int bottom() const
RegularAreaRect * findText(int id, const QString &text, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect)
Returns the bounding rect of the text which matches the following criteria or 0 if the search is not ...
Definition: textpage.cpp:715
int length() const
static RegionTextList XYCutForBoundingBoxes(const QList< WordWithCharacters > &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight)
Implements the XY Cut algorithm for textpage segmentation The resulting RegionTextList will contain R...
Definition: textpage.cpp:1542
iterator insert(const Key &key, const T &value)
bool isLeft(const NormalizedPoint &pt) const
Returns true if the point pt is located to the right of the left arm of rectangle.
Definition: area.h:267
const_iterator constEnd() const
const_iterator constBegin() const
TextEntity(const QString &text, NormalizedRect *area)
Creates a new text entity with the given text and the given area.
Definition: textpage.cpp:188
void setLeft(int x)
iterator find(const Key &key)
Wrapper around the information needed to generate the selection area There are two assumptions inside...
Definition: misc.h:36
iterator begin()
void append(const QString &text, NormalizedRect *area)
Appends the given text with the given area as new TextEntity to the page.
Definition: textpage.cpp:251
bool hasNext() const
int remove(const Key &key)
void replace(int i, const T &value)
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:19:25 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:19:25 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.