Okular

textpage.h
1 /*
2  SPDX-FileCopyrightText: 2005 Piotr Szymanski <niedakh@gmail.com>
3 
4  SPDX-License-Identifier: GPL-2.0-or-later
5 */
6 
7 #ifndef _OKULAR_TEXTPAGE_H_
8 #define _OKULAR_TEXTPAGE_H_
9 
10 #include <QList>
11 #include <QString>
12 
13 #include "area.h"
14 #include "global.h"
15 #include "okularcore_export.h"
16 
17 class QTransform;
18 
19 namespace Okular
20 {
21 class NormalizedPoint;
22 class NormalizedRect;
23 class Page;
24 class PagePrivate;
25 class TextPagePrivate;
26 class TextSelection;
27 class RegularAreaRect;
28 
29 /*! @class TextEntity
30  * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box.
31  *
32  * To enable searching and text selection, a generator can give information about the textual
33  * content of a Page using a TextPage.
34  * A TextPage is created using TextEntity objects.
35  * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page.
36  *
37  * Ideally, every single glyph is represented by its own TextEntity.
38  * If the textual representation of a graphical glyph contains more than one character,
39  * the TextEntity must contain the whole string which represents the glyph.
40  *
41  * When the Generator has created the TextPage, and it is added to a Page,
42  * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection.
43  * This way, the Generator does not need to care about the logical order of lines or paragraphs.
44  *
45  * @par Text Selection/Highlighting
46  * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted.
47  * That is, if the TextEntity represents a word, only the whole word can be selected.
48  * It would not be possible to select a single glyph of the word, because its bounding box is not known.
49  *
50  * @see TextPage, Generator
51  */
52 class OKULARCORE_EXPORT TextEntity
53 {
54 public:
55  typedef QList<TextEntity> List;
56 
57  /**
58  * Creates a new text entity with the given @p text and the
59  * given @p area.
60  */
61  TextEntity(const QString &text, const NormalizedRect &area);
62 
63  /**
64  * Destroys the text entity.
65  */
66  ~TextEntity();
67 
68  /**
69  * Returns the text of the text entity.
70  */
71  QString text() const;
72 
73  /**
74  * Returns the bounding area of the text entity.
75  */
76  NormalizedRect area() const;
77 
78  /**
79  * Returns the transformed area of the text entity.
80  */
81  NormalizedRect transformedArea(const QTransform &matrix) const;
82 
83 private:
84  QString m_text;
85  NormalizedRect m_area;
86 };
87 
88 /**
89  * @short Represents the textual information of a Page. Makes search and text selection possible.
90  *
91  * A Generator with text support should add a TextPage to every Page.
92  * For every piece of text, a TextEntity is added, holding the string representation and the bounding box.
93  *
94  * Ideally, every TextEntity describes only one glyph.
95  * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers).
96  *
97  * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection.
98  *
99  * @see TextEntity
100  */
101 class OKULARCORE_EXPORT TextPage
102 {
103  /// @cond PRIVATE
104  friend class Page;
105  friend class PagePrivate;
106  /// @endcond
107 
108 public:
109  /**
110  * Defines the behaviour of adding characters to text() result
111  * @since 0.10 (KDE 4.4)
112  */
114  AnyPixelTextAreaInclusionBehaviour, ///< A character is included into text() result if any pixel of his bounding box is in the given area
115  CentralPixelTextAreaInclusionBehaviour ///< A character is included into text() result if the central pixel of his bounding box is in the given area
116  };
117 
118  /**
119  * Creates a new text page.
120  */
121  TextPage();
122 
123  /**
124  * Creates a new text page with the given @p words.
125  */
126  explicit TextPage(const TextEntity::List &words);
127 
128  /**
129  * Destroys the text page.
130  */
131  ~TextPage();
132 
133  /**
134  * Appends the given @p text with the given @p area as new
135  * @ref TextEntity to the page.
136  */
137  void append(const QString &text, const NormalizedRect &area);
138 
139  /**
140  * Returns the bounding rect of the text which matches the following criteria
141  * or 0 if the search is not successful.
142  *
143  * @param searchID An unique id for this search.
144  * @param query The search text.
145  * @param direction The direction of the search (@ref SearchDirection)
146  * @param caseSensitivity If Qt::CaseSensitive, the search is case sensitive; otherwise
147  * the search is case insensitive.
148  * @param area If null the search starts at the beginning of the page, otherwise
149  * right/below the coordinates of the given rect.
150  */
151  RegularAreaRect *findText(int searchID, const QString &query, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area);
152 
153  /**
154  * Text extraction function. Looks for text in the given @p area.
155  *
156  * @return
157  * - If @p area points to a valid null area, a null string.
158  * - If @p area is nullptr, the whole page text as a single string.
159  * - Otherwise, the text which is included by @p area, as a single string.
160  * Uses AnyPixelTextAreaInclusionBehaviour
161  */
162  QString text(const RegularAreaRect *area = nullptr) const;
163 
164  /**
165  * Text extraction function. Looks for text in the given @p area.
166  *
167  * @return
168  * - If @p area points to a valid null area, a null string.
169  * - If @p area is nullptr, the whole page text as a single string.
170  * - Otherwise, the text which is included by @p area, as a single string.
171  * @since 0.10 (KDE 4.4)
172  */
173  QString text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
174 
175  /**
176  * Text entity extraction function. Similar to text() but returns
177  * the words including their bounding rectangles. Note that
178  * ownership of the contents of the returned list belongs to the
179  * caller.
180  * @since 0.14 (KDE 4.8)
181  */
182  TextEntity::List words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const;
183 
184  /**
185  * Returns the area and text of the word at the given point
186  * Note that ownership of the returned area belongs to the caller.
187  * @since 0.15 (KDE 4.9)
188  */
189  RegularAreaRect *wordAt(const NormalizedPoint &p, QString *word = nullptr) const;
190 
191  /**
192  * Returns the rectangular area of the given @p selection.
193  */
194  RegularAreaRect *textArea(TextSelection *selection) const;
195 
196 private:
197  TextPagePrivate *const d;
198 
199  Q_DISABLE_COPY(TextPage)
200 };
201 
202 }
203 
204 #endif
Collector for all the data belonging to a page.
Definition: page.h:47
CaseSensitivity
The documentation to the global Okular namespace.
Definition: action.h:16
SearchDirection
Describes the direction of searching.
Definition: global.h:36
NormalizedPoint is a helper class which stores the coordinates of a normalized point.
Definition: area.h:116
Represents the textual information of a Page.
Definition: textpage.h:101
TextAreaInclusionBehaviour
Defines the behaviour of adding characters to text() result.
Definition: textpage.h:113
Wrapper around the information needed to generate the selection area There are two assumptions inside...
Definition: misc.h:33
This is a list of NormalizedRect, to describe an area consisting of multiple rectangles using normali...
Definition: area.h:932
A NormalizedRect is a rectangle which can be defined by two NormalizedPoints.
Definition: area.h:188
Represents a piece of text on a TextPage, containing its textual representation and its bounding box.
Definition: textpage.h:52
@ AnyPixelTextAreaInclusionBehaviour
A character is included into text() result if any pixel of his bounding box is in the given area.
Definition: textpage.h:114
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Thu Feb 15 2024 03:55:52 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.