KItinerary

extractorfilter.cpp
1/*
2 SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "extractorfilter.h"
8#include "extractordocumentnode.h"
9#include "extractordocumentprocessor.h"
10#include "extractorresult.h"
11#include "logging.h"
12
13#include <QJsonObject>
14#include <QJSValue>
15#include <QMetaEnum>
16#include <QRegularExpression>
17
18using namespace KItinerary;
19
20namespace KItinerary {
21class ExtractorFilterPrivate : public QSharedData
22{
23public:
24 QString m_mimeType;
25 QString m_fieldName;
28};
29}
30
31ExtractorFilter::ExtractorFilter()
32 : d(new ExtractorFilterPrivate)
33{
34}
35
36ExtractorFilter::ExtractorFilter(const ExtractorFilter&) = default;
37ExtractorFilter::ExtractorFilter(ExtractorFilter&&) noexcept = default;
39ExtractorFilter& ExtractorFilter::operator=(const ExtractorFilter&) = default;
40ExtractorFilter& ExtractorFilter::operator=(ExtractorFilter&&) = default;
41
42QString ExtractorFilter::mimeType() const
43{
44 return d->m_mimeType;
45}
46
47void ExtractorFilter::setMimeType(const QString &mimeType)
48{
49 d.detach();
50 d->m_mimeType = mimeType;
51}
52
54{
55 return d->m_fieldName;
56}
57
58void ExtractorFilter::setFieldName(const QString &fieldName)
59{
60 d.detach();
61 d->m_fieldName = fieldName;
62}
63
64bool ExtractorFilter::matches(const QString &data) const
65{
66 if (!d->m_exp.isValid()) {
67 qCDebug(Log) << d->m_exp.errorString() << d->m_exp.pattern();
68 }
69 return d->m_exp.match(data).hasMatch();
70}
71
72static bool needsFieldName(const QString &mimeType)
73{
74 return mimeType != QLatin1StringView("text/plain") &&
75 mimeType != QLatin1String("application/octet-stream");
76}
77
78template <typename T>
79static T readEnum(const QJsonValue &v, T defaultValue = {})
80{
81 if (!v.isString()) {
82 return defaultValue;
83 }
84
85 const auto me = QMetaEnum::fromType<T>();
86 bool success = false;
87 const auto result = static_cast<T>(me.keyToValue(v.toString().toUtf8().constData(), &success));
88 return success ? result : defaultValue;
89}
90
91bool ExtractorFilter::load(const QJsonObject &obj)
92{
93 d.detach();
94 d->m_mimeType = obj.value(QLatin1StringView("mimeType")).toString();
95 if (d->m_mimeType.isEmpty()) {
96 qCDebug(Log) << "unspecified filter MIME type";
97 }
98 d->m_fieldName = obj.value(QLatin1StringView("field")).toString();
99 d->m_exp.setPattern(obj.value(QLatin1StringView("match")).toString());
100 d->m_scope = readEnum<ExtractorFilter::Scope>(
102 return !d->m_mimeType.isEmpty() && (!d->m_fieldName.isEmpty() || !needsFieldName(d->m_mimeType)) && d->m_exp.isValid();
103}
104
105QJsonObject ExtractorFilter::toJson() const
106{
107 QJsonObject obj;
108 obj.insert(QLatin1StringView("mimeType"), d->m_mimeType);
109 if (needsFieldName(d->m_mimeType)) {
110 obj.insert(QLatin1StringView("field"), d->m_fieldName);
111 }
112 obj.insert(QLatin1StringView("match"), pattern());
113 obj.insert(
114 QLatin1StringView("scope"),
115 QLatin1String(QMetaEnum::fromType<ExtractorFilter::Scope>().valueToKey(
116 d->m_scope)));
117 return obj;
118}
119
121{
122 return d->m_exp.pattern();
123}
124
125void ExtractorFilter::setPattern(const QString &pattern)
126{
127 d.detach();
128 d->m_exp.setPattern(pattern);
129}
130
132{
133 return d->m_scope;
134}
135
136void ExtractorFilter::setScope(Scope scope)
137{
138 d.detach();
139 d->m_scope = scope;
140}
141
142static QString valueForJsonPath(const QJsonObject &obj, const QString &path)
143{
144 const auto pathSections = QStringView(path).split(QLatin1Char('.'));
145 QJsonValue v(obj);
146 for (const auto &pathSection : pathSections) {
147 if (!v.isObject()) {
148 return {};
149 }
150 v = v.toObject().value(pathSection.toString());
151 }
152 return v.toString();
153}
154
155enum MatchMode { Any, All };
156
157static bool filterMachesNode(const ExtractorFilter &filter, ExtractorFilter::Scope scope, const ExtractorDocumentNode &node,
158 std::vector<ExtractorDocumentNode> &matches, MatchMode matchMode)
159{
160 if (node.isNull()) {
161 return false;
162 }
163
164 // filter without field/pattern always match, if the mimetype does
165 if (filter.mimeType() == node.mimeType() && ((filter.fieldName().isEmpty() && filter.pattern().isEmpty()) || node.processor()->matches(filter, node))) {
166 if (matchMode == All) {
167 matches.push_back(node);
168 }
169 return true;
170 }
171
172 if (scope != ExtractorFilter::Ancestors &&
173 filter.mimeType() == QLatin1StringView("application/ld+json") &&
174 !node.result().isEmpty()) {
175 // when collecting all matches for results, we only want the "leaf-most"
176 // ones, not those along the path
177 if (matchMode == All && scope == ExtractorFilter::Descendants) {
178 bool descendantsMatched = false;
179 for (const auto &child : node.childNodes()) {
180 descendantsMatched |= filterMachesNode(
181 filter, ExtractorFilter::Descendants, child, matches, matchMode);
182 }
183 if (descendantsMatched) {
184 return true;
185 }
186 }
187
188 const auto res = node.result().jsonLdResult();
189 for (const auto &elem : res) {
190 const auto property =
191 valueForJsonPath(elem.toObject(), filter.fieldName());
192 if (filter.matches(property)) {
193 if (matchMode == All) {
194 matches.push_back(node);
195 } else {
196 return true;
197 }
198 }
199 }
200 }
201
202 if (scope == ExtractorFilter::Ancestors) {
203 return filterMachesNode(filter, scope, node.parent(), matches, matchMode);
204 }
205 if (scope == ExtractorFilter::Descendants) {
206 for (const auto &child : node.childNodes()) {
207 const auto m = filterMachesNode(filter, ExtractorFilter::Descendants, child, matches, matchMode);
208 if (m && matchMode == Any) {
209 return true;
210 }
211 }
212 }
213
214 return !matches.empty();
215}
216
218{
219 std::vector<ExtractorDocumentNode> matches;
220 switch (d->m_scope) {
222 return filterMachesNode(*this, ExtractorFilter::Current, node, matches, Any);
224 return filterMachesNode(*this, ExtractorFilter::Current, node.parent(), matches, Any);
226 return filterMachesNode(*this, ExtractorFilter::Ancestors, node.parent(), matches, Any);
229 for (const auto &child : node.childNodes()) {
230 if (filterMachesNode(*this, d->m_scope == ExtractorFilter::Descendants ? d->m_scope : ExtractorFilter::Current, child, matches, Any)) {
231 return true;
232 }
233 }
234 }
235 return false;
236}
237
238void ExtractorFilter::allMatches(const ExtractorDocumentNode &node, std::vector<ExtractorDocumentNode>& matches) const
239{
240 switch (d->m_scope) {
242 filterMachesNode(*this, ExtractorFilter::Current, node, matches, All);
243 return;
245 filterMachesNode(*this, ExtractorFilter::Current, node.parent(), matches, All);
246 return;
248 filterMachesNode(*this, ExtractorFilter::Ancestors, node.parent(), matches, All);
249 return;
252 for (const auto &child : node.childNodes()) {
253 filterMachesNode(*this, d->m_scope == ExtractorFilter::Descendants ? d->m_scope : ExtractorFilter::Current, child, matches, All);
254 }
255 return;
256 }
257}
258
259ExtractorFilter ExtractorFilter::fromJSValue(const QJSValue &js)
260{
262 f.setMimeType(js.property(QLatin1StringView("mimeType")).toString());
263 const auto fieldName = js.property(QLatin1StringView("field"));
264 if (fieldName.isString()) {
265 f.setFieldName(fieldName.toString());
266 }
267 const auto match = js.property(QLatin1StringView("match"));
268 if (match.isString()) {
269 f.setPattern(match.toString());
270 }
271 f.setScope(readEnum<ExtractorFilter::Scope>(
272 js.property(QLatin1StringView("scope")).toString(),
274 return f;
275}
276
277#include "moc_extractorfilter.cpp"
A node in the extracted document object tree.
QJsonArray result
Result access for QJSEngine.
QString mimeType
The MIME type of this node.
QVariantList childNodes
Child nodes, for QJSEngine access.
KItinerary::ExtractorDocumentNode parent
The parent node, or a null node if this is the root node.
Determines whether an extractor is applicable to a given email.
QString fieldName() const
The field to filter on.
QString mimeType() const
MIME type of the document part this filter can match.
void allMatches(const ExtractorDocumentNode &node, std::vector< ExtractorDocumentNode > &matches) const
Checks whether this filter applies to node.
bool matches(const QString &data) const
Check if data matches this filter.
Scope
Specifies which document nodes should match this filter, relative to the one being extracted.
@ Current
match the node being extracted
@ Children
match the direct child nodes
@ Descendants
match any direct or indirect child nodes
@ Ancestors
match any direct or indirect parent nodes
@ Parent
match the direct parent node
QString pattern() const
Pattern to match field value against.
Scope scope() const
Evaluation scope of this filter, in relation to the node being extracted.
char * toString(const EngineQuery &query)
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
const char * constData() const const
bool isEmpty() const const
iterator insert(QLatin1StringView key, const QJsonValue &value)
QJsonValue value(QLatin1StringView key) const const
bool isObject() const const
bool isString() const const
QJsonObject toObject() const const
QString toString() const const
QJSValue property(const QString &name) const const
QByteArray toUtf8() const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Jun 14 2024 11:54:39 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.