MauiKit Image Tools

ocs.cpp
1#include "ocs.h"
2#include <QImage>
3#include <QDebug>
4#include <QtConcurrent>
5#include <QFutureWatcher>
6
7#include <tesseract/baseapi.h>
8#include <leptonica/allheaders.h>
9#include "OCRLanguageModel.h"
10#if TESSERACT_MAJOR_VERSION < 5
11#include <tesseract/strngs.h>
12#include <tesseract/genericvector.h>
13#endif
14
15// #include "preprocessimage.hpp"
16#include <preprocessimage.hpp>
17#include <convertimage.hpp>
18
20
21OCS::OCS(QObject *parent) : QObject(parent)
22 ,m_tesseract(new tesseract::TessBaseAPI())
23 ,m_languages(new OCRLanguageModel(this))
24 ,m_boxesTypes(BoxType::Word | BoxType::Line | BoxType::Paragraph)
25 ,m_confidenceThreshold(50)
26// ,m_whiteList("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz")
27{
28 std::vector<std::string> availableLanguages;
29#if TESSERACT_MAJOR_VERSION < 5
30 GenericVector<STRING> languageVector;
31 m_tesseract->GetAvailableLanguagesAsVector(&languageVector);
32 for (int i = 0; i < languageVector.size(); i++) {
33 availableLanguages.push_back(languageVector[i].c_str());
34 }
35#else
36 m_tesseract->GetAvailableLanguagesAsVector(&availableLanguages);
37#endif
38
39 m_languages->setLanguages(availableLanguages);
40}
41
42OCS::~OCS()
43{
44 m_tesseract->End();
45}
46
47QString OCS::filePath() const
48{
49 return m_filePath;
50}
51
52QRect OCS::area() const
53{
54 return m_area;
55}
56
57bool OCS::autoRead() const
58{
59 return m_autoRead;
60}
61
62void OCS::setAutoRead(bool value)
63{
64 if(m_autoRead == value)
65 return;
66
67 m_autoRead = value;
68 Q_EMIT autoReadChanged();
69}
70
71void OCS::setBoxesType(OCS::BoxesType types)
72{
73 if(m_boxesTypes == types)
74 return;
75
76
77 m_boxesTypes = types;
78 qDebug() << "Setting the boxes types" << m_boxesTypes << types;
79
80 Q_EMIT boxesTypeChanged();
81}
82
83void OCS::setConfidenceThreshold(float value)
84{
85 if(m_confidenceThreshold == value)
86 return;
87
88 m_confidenceThreshold = value;
89 Q_EMIT confidenceThresholdChanged();
90}
91
92int OCS::wordBoxAt(const QPoint point)
93{
94 int i = 0;
95 for(const auto &box : m_wordBoxes)
96 {
97 QRect rect = box["rect"].toRect();
98
99 qDebug() << "Rect: " << rect << "Point: " << point << rect.contains(point, true);
100
101 if(rect.contains(point))
102 return i;
103
104 i++;
105 }
106
107 return i;
108}
109
110QVector<int> OCS::wordBoxesAt(const QRect &rect)
111{
112 QVector<int> res;
113 int i = 0;
114 for(const auto &box : m_wordBoxes)
115 {
116 QRect rect_o = box["rect"].toRect();
117
118 if(rect.intersects(rect_o))
119 res << i;
120
121 i++;
122 }
123
124 return res;
125}
126
127void OCS::setWhiteList(const QString &value)
128{
129 if(value == m_whiteList)
130 return;
131
132 m_whiteList = value;
133 Q_EMIT whiteListChanged();
134}
135
136void OCS::setBlackList(const QString &value)
137{
138 if(value == m_blackList)
139 return;
140
141 m_blackList = value;
142 Q_EMIT blackListChanged();
143}
144
145void OCS::setPreprocessImage(bool value)
146{
147 if(m_preprocessImage == value)
148 return;
149
150 m_preprocessImage = value;
151
152 Q_EMIT preprocessImageChanged();
153}
154
155void OCS::setPageSegMode(PageSegMode value)
156{
157 if(m_segMode == value)
158 return;
159
160 m_segMode = value;
161 Q_EMIT pageSegModeChanged();
162}
163
164QString OCS::versionString()
165{
166 return QString::fromStdString(tesseract::TessBaseAPI::Version());
167}
168
169void OCS::do_preprocessImage(const QImage &image)
170{
171
172
173}
174
175static tesseract::PageSegMode mapPageSegValue(OCS::PageSegMode value)
176{
177 switch(value)
178 {
179 default:
180 case OCS::PageSegMode::Auto: return tesseract::PageSegMode::PSM_AUTO;
181 case OCS::PageSegMode::Auto_OSD: return tesseract::PageSegMode::PSM_AUTO_OSD;
182 case OCS::PageSegMode::SingleColumn: return tesseract::PageSegMode::PSM_SINGLE_COLUMN;
183 case OCS::PageSegMode::SingleLine: return tesseract::PageSegMode::PSM_SINGLE_LINE;
184 case OCS::PageSegMode::SingleBlock: return tesseract::PageSegMode::PSM_SINGLE_BLOCK;
185 case OCS::PageSegMode::SingleWord: return tesseract::PageSegMode::PSM_SINGLE_WORD;
186 }
187}
188
189void OCS::getTextAsync()
190{
191 m_ready = false;
192 Q_EMIT readyChanged();
193 if(!QUrl::fromUserInput(m_filePath).isLocalFile())
194 {
195 qDebug() << "URL is not local :: OCR";
196 return;
197 }
198
199 typedef QMap<BoxType, TextBoxes> Res;
200 auto func = [ocs = this](QUrl url, BoxesType levels) -> Res
201 {
202 tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
203 api->Init(NULL, "eng");
204
205 api->SetVariable("tessedit_char_whitelist",
206 ocs->m_whiteList.toStdString().c_str());
207 api->SetVariable("tessedit_char_blacklist",
208 ocs->m_blackList.toStdString().c_str());
209
210 api->SetPageSegMode(mapPageSegValue(ocs->m_segMode));
211
212 if(ocs->m_preprocessImage)
213 {
214 auto var = new QImage(url.toLocalFile());
215 auto m_imgMat = ConvertImage::qimageToMatRef(*var, CV_8UC4);
216
217 // PreprocessImage::toGray(m_imgMat,1);
218 PreprocessImage::adaptThreshold(m_imgMat, false, 3, 1);
219
220 auto m_ocrImg = ConvertImage::matToQimageRef(m_imgMat, QImage::Format_RGBA8888); //remember to delete
221
222 api->SetImage(m_ocrImg.bits(), m_ocrImg.width(), m_ocrImg.height(), 4, m_ocrImg.bytesPerLine());
223 }else
224 {
225 // Pix *image = pixRead(url.toLocalFile().toStdString().c_str());
226 // api->SetImage(image);
227
228 ocs->m_ocrImg = new QImage(url.toLocalFile());
229 api->SetImage(ocs->m_ocrImg->bits(), ocs->m_ocrImg->width(), ocs->m_ocrImg->height(), 4,
230 ocs->m_ocrImg->bytesPerLine());
231 }
232
233 api->SetSourceResolution(200);
234
235 api->Recognize(0);
236
237 TextBoxes wordBoxes, lineBoxes, paragraphBoxes;
238
239 auto levelFunc = [ocs](tesseract::TessBaseAPI *api, tesseract::PageIteratorLevel level) -> TextBoxes
240 {
241 TextBoxes res;
242 tesseract::ResultIterator* ri = api->GetIterator();
243 if (ri != 0)
244 {
245 qDebug() << "Getting text for level" << level;
246 do
247 {
248 const char* word = ri->GetUTF8Text(level);
249 float conf = ri->Confidence(level);
250 int x1, y1, x2, y2;
251 ri->BoundingBox(level, &x1, &y1, &x2, &y2);
252
253 printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
254 word, conf, x1, y1, x2, y2);
255
256 if(conf > ocs->m_confidenceThreshold && !isspace(*word))
257 res << QVariantMap{{"text", QString::fromStdString(word)}, {"rect", QRect{x1, y1, x2-x1, y2-y1}}};
258 delete[] word;
259 } while (ri->Next(level));
260 }
261
262 return res;
263 };
264
265 if(levels.testFlag(Word))
266 wordBoxes = levelFunc(api, tesseract::RIL_WORD);
267
268 if(levels.testFlag(Line))
269 lineBoxes = levelFunc(api, tesseract::RIL_TEXTLINE);
270
271 if(levels.testFlag(Paragraph))
272 paragraphBoxes = levelFunc(api, tesseract::RIL_PARA);
273
274 api->End();
275
276 delete api;
277 return Res{{Word, wordBoxes}, {Line, lineBoxes}, {Paragraph, paragraphBoxes}};
278 };
279
280 qDebug() << "GEtting text for boxes " << m_boxesTypes << m_boxesTypes.testFlag(Word);
281
282 if(OCRCache.contains(m_filePath))
283 {
284 qDebug() << "OCR retrieved from cached";
285 auto res = OCRCache[m_filePath];
286 m_wordBoxes = res[Word];
287 m_lineBoxes = res[Line];
288 m_paragraphBoxes = res[Paragraph];
289 Q_EMIT wordBoxesChanged();
290 Q_EMIT lineBoxesChanged();
291 Q_EMIT paragraphBoxesChanged();
292 m_ready = true;
293 Q_EMIT readyChanged();
294 }else
295 {
296 auto watcher = new QFutureWatcher<Res>;
297 connect(watcher, &QFutureWatcher<Res>::finished, [this, watcher]()
298 {
299 // Q_EMIT textReady(watcher.future().result());
300 auto res = watcher->result();
301 m_wordBoxes = res[Word];
302 m_lineBoxes = res[Line];
303 m_paragraphBoxes = res[Paragraph];
304 Q_EMIT wordBoxesChanged();
305 Q_EMIT lineBoxesChanged();
306 Q_EMIT paragraphBoxesChanged();
307 m_ready = true;
308 Q_EMIT readyChanged();
309
310 OCRCache.insert(m_filePath, res);
311
312 watcher->deleteLater();
313 });
314
315 QFuture<Res> future = QtConcurrent::run(func, QUrl::fromUserInput(m_filePath), m_boxesTypes);
316 watcher->setFuture(future);
317 }
318}
319
320QString OCS::getText()
321{
322 QUrl url(QUrl::fromUserInput(m_filePath));
323 if(!url.isLocalFile())
324 {
325 qDebug() << "URL is not local :: OCR";
326 return "Error!";
327 }
328
329 if (m_tesseract->Init(nullptr, m_languages->getLanguagesString().c_str()))
330 {
331 qDebug() << "Failed tesseract OCR init";
332 return "Error!";
333 }
334
335 m_tesseract->SetPageSegMode(tesseract::PSM_AUTO);
336
337 QString outText;
338
339 if(!m_area.isEmpty())
340 {
341 QImage img(url.toLocalFile());
342 img = img.copy(m_area);
343 // img = img.convertToFormat(QImage::Format_Grayscale8);
344
345 m_tesseract->SetImage(img.bits(), img.width(), img.height(), 4, img.bytesPerLine());
346
347 }else
348 {
349 Pix* im = pixRead(url.toLocalFile().toStdString().c_str());
350 m_tesseract->SetImage(im);
351 }
352
353 outText = QString::fromStdString(m_tesseract->GetUTF8Text());
354
355 return outText;
356}
357
358void OCS::setFilePath(QString filePath)
359{
360 if (m_filePath == filePath)
361 return;
362
363 m_filePath = filePath;
364 Q_EMIT filePathChanged(m_filePath);
365}
366
367void OCS::setArea(QRect area)
368{
369 if (m_area == area)
370 return;
371
372 m_area = area;
373 Q_EMIT areaChanged(m_area);
374}
375
376
377TextBoxes OCS::wordBoxes() const
378{
379 return m_wordBoxes;
380}
381
382TextBoxes OCS::paragraphBoxes() const
383{
384 return m_paragraphBoxes;
385}
386
387TextBoxes OCS::lineBoxes() const
388{
389 return m_lineBoxes;
390}
391
392OCS::BoxesType OCS::boxesType()
393{
394 return m_boxesTypes;
395}
396
397float OCS::confidenceThreshold()
398{
399 return m_confidenceThreshold;
400}
401
402QString OCS::whiteList() const
403{
404 return m_whiteList;
405}
406
407QString OCS::blackList() const
408{
409 return m_blackList;
410}
411
412OCS::PageSegMode OCS::pageSegMode() const
413{
414 return m_segMode;
415}
416
417bool OCS::preprocessImage() const
418{
419 return m_preprocessImage;
420}
421
422void OCS::classBegin()
423{
424}
425
426void OCS::componentComplete()
427{
428 qDebug() << "OCS CALSS COMPLETED IN QML";
429 connect(this, &OCS::filePathChanged, [this](QString)
430 {
431 if(m_autoRead)
432 {
433 getTextAsync();
434 }
435 });
436 getTextAsync();
437}
438
439bool OCS::ready() const
440{
441 return m_ready;
442}
QStringView level(QStringView ifopt)
Q_EMITQ_EMIT
QMetaObject::Connection connect(const QObject *sender, PointerToMemberFunction signal, Functor functor)
bool contains(const QPoint &point, bool proper) const const
bool intersects(const QRect &rectangle) const const
QString fromStdString(const std::string &str)
QFuture< T > run(Function function,...)
QUrl fromUserInput(const QString &userInput, const QString &workingDirectory, UserInputResolutionOptions options)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri May 2 2025 11:56:08 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.