KTextAddons

voskspeechtotextdevice.cpp
1/*
2 SPDX-FileCopyrightText: 2023-2024 Laurent Montel <montel.org>
3
4 SPDX-License-Identifier: GPL-2.0-or-later
5 based on VoiceAssistant plugin code
6*/
7
8#include "voskspeechtotextdevice.h"
9#include "libvoskspeechtotext_debug.h"
10#if HAVE_VOSK_API_SUPPORT
11#include "vosk_api.h"
12#endif
13#include <QJsonDocument>
14
15VoskSpeechToTextDevice::VoskSpeechToTextDevice(QObject *parent)
16 : QIODevice{parent}
17{
19 qCWarning(LIBVOSKSPEECHTOTEXT_LOG) << "Impossible to open VoskSpeechToTextDevice";
20#if HAVE_VOSK_API_SUPPORT
21 vosk_set_log_level(-1);
22#endif
23 }
24}
25
26VoskSpeechToTextDevice::~VoskSpeechToTextDevice()
27{
28#if HAVE_VOSK_API_SUPPORT
29 vosk_recognizer_free(mRecognizer);
30 vosk_model_free(mModel);
31#endif
32}
33
34bool VoskSpeechToTextDevice::available() const
35{
36#if HAVE_VOSK_API_SUPPORT
37 return true;
38#else
39 return false;
40#endif
41}
42
43bool VoskSpeechToTextDevice::isAsking() const
44{
45 return mIsAsking;
46}
47
48void VoskSpeechToTextDevice::setAsking(bool asking)
49{
50 if (mIsAsking != asking) {
51 mIsAsking = asking;
52 Q_EMIT askingChanged();
53 }
54}
55
56bool VoskSpeechToTextDevice::initialize(VoskSpeechToTextDeviceInfo &&info)
57{
58#if HAVE_VOSK_API_SUPPORT
59 mModel = vosk_model_new(QString(info.modelDir + info.formattedLang).toUtf8().constData());
60 if (mModel) {
61 mRecognizer = vosk_recognizer_new(mModel, info.sampleRate);
62 }
63
64 if (!mModel || !mRecognizer) {
65 return false;
66 }
67#endif
68 return true;
69}
70
71void VoskSpeechToTextDevice::clear()
72{
73#if HAVE_VOSK_API_SUPPORT
74 if (mRecognizer) {
75 vosk_recognizer_reset(mRecognizer);
76 }
77#endif
78}
79
80qint64 VoskSpeechToTextDevice::readData(char *data, qint64 maxlen)
81{
82 Q_UNUSED(data);
83 return maxlen;
84}
85
86qint64 VoskSpeechToTextDevice::writeData(const char *data, qint64 len)
87{
88#if HAVE_VOSK_API_SUPPORT
89 if (vosk_recognizer_accept_waveform(mRecognizer, data, (int)len)) {
90 parseText(vosk_recognizer_result(mRecognizer));
91 } else {
92 parsePartial(vosk_recognizer_partial_result(mRecognizer));
93 }
94#else
95 Q_UNUSED(data);
96#endif
97 return len;
98}
99
100void VoskSpeechToTextDevice::parseText(const char *json)
101{
102 const QJsonDocument obj = QJsonDocument::fromJson(json);
103 QString text = obj[QStringLiteral("text")].toString();
104
105 if (text.isEmpty())
106 return;
107 else if (mIsAsking) {
108 Q_EMIT result(text);
109 return;
110 }
111
112 text.append(u' ');
113
114 if (!text.contains(mWakeWord)) {
115 if (!mIsListiningBecauseOfWakeWord)
116 return;
117
118 Q_EMIT falsePositiveWakeWord();
119 mIsListiningBecauseOfWakeWord = false;
120 return;
121 }
122
123 text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size());
124 text = text.trimmed();
125
126 Q_EMIT result(text);
127 qDebug() << "[debug] Text:" << text;
128 Q_EMIT doneListening();
129}
130
131void VoskSpeechToTextDevice::parsePartial(const char *json)
132{
133 const QJsonDocument obj = QJsonDocument::fromJson(json);
134 QString text = obj[QStringLiteral("partial")].toString();
135 if (text.isEmpty())
136 return;
137 text.append(u' ');
138
139 if (text.contains(mWakeWord)) {
140 Q_EMIT wakeWordDetected();
141 text = text.mid(text.indexOf(mWakeWord) + mWakeWord.size());
142 mIsListiningBecauseOfWakeWord = true;
143 } else if (mIsListiningBecauseOfWakeWord) {
144 Q_EMIT falsePositiveWakeWord();
145 mIsListiningBecauseOfWakeWord = false;
146 return;
147 } else if (!mIsAsking)
148 return;
149
150 Q_EMIT result(text);
151}
152
153QDebug operator<<(QDebug d, const VoskSpeechToTextDevice::VoskSpeechToTextDeviceInfo &t)
154{
155 d.space() << "sampleRate" << t.sampleRate;
156 d.space() << "modelDir" << t.modelDir;
157 d.space() << "formattedLang" << t.formattedLang;
158 return d;
159}
160
161#include "moc_voskspeechtotextdevice.cpp"
const QList< QKeySequence > & open()
QDebug operator<<(QDebug dbg, const PerceptualColor::MultiSpinBoxSection &value)
const char * constData() const const
QDebug & space()
QJsonDocument fromJson(const QByteArray &json, QJsonParseError *error)
Q_EMITQ_EMIT
QString & append(QChar ch)
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
qsizetype indexOf(QChar ch, qsizetype from, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
QString mid(qsizetype position, qsizetype n) const const
qsizetype size() const const
QByteArray toUtf8() const const
QString trimmed() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:19:49 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.