KItinerary

extractorfilter.cpp
1/*
2 SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "extractorfilter.h"
8#include "extractordocumentnode.h"
9#include "extractordocumentprocessor.h"
10#include "extractorresult.h"
11#include "logging.h"
12
13#include <QJsonObject>
14#include <QJSValue>
15#include <QMetaEnum>
16#include <QRegularExpression>
17
18using namespace Qt::Literals;
19using namespace KItinerary;
20
21namespace KItinerary {
22class ExtractorFilterPrivate : public QSharedData
23{
24public:
25 QString m_mimeType;
26 QString m_fieldName;
29};
30}
31
32ExtractorFilter::ExtractorFilter()
33 : d(new ExtractorFilterPrivate)
34{
35}
36
37ExtractorFilter::ExtractorFilter(const ExtractorFilter&) = default;
38ExtractorFilter::ExtractorFilter(ExtractorFilter&&) noexcept = default;
40ExtractorFilter& ExtractorFilter::operator=(const ExtractorFilter&) = default;
41ExtractorFilter& ExtractorFilter::operator=(ExtractorFilter&&) = default;
42
43QString ExtractorFilter::mimeType() const
44{
45 return d->m_mimeType;
46}
47
48void ExtractorFilter::setMimeType(const QString &mimeType)
49{
50 d.detach();
51 d->m_mimeType = mimeType;
52}
53
55{
56 return d->m_fieldName;
57}
58
59void ExtractorFilter::setFieldName(const QString &fieldName)
60{
61 d.detach();
62 d->m_fieldName = fieldName;
63}
64
65bool ExtractorFilter::matches(const QString &data) const
66{
67 if (!d->m_exp.isValid()) {
68 qCDebug(Log) << d->m_exp.errorString() << d->m_exp.pattern();
69 }
70 return d->m_exp.match(data).hasMatch();
71}
72
73static bool needsFieldName(const QString &mimeType)
74{
75 return mimeType != QLatin1StringView("text/plain") &&
76 mimeType != QLatin1StringView("application/octet-stream");
77}
78
79template <typename T>
80static T readEnum(const QJsonValue &v, T defaultValue = {})
81{
82 if (!v.isString()) {
83 return defaultValue;
84 }
85
86 const auto me = QMetaEnum::fromType<T>();
87 bool success = false;
88 const auto result = static_cast<T>(me.keyToValue(v.toString().toUtf8().constData(), &success));
89 return success ? result : defaultValue;
90}
91
92bool ExtractorFilter::load(const QJsonObject &obj)
93{
94 d.detach();
95 d->m_mimeType = obj.value(QLatin1StringView("mimeType")).toString();
96 if (d->m_mimeType.isEmpty()) {
97 qCDebug(Log) << "unspecified filter MIME type";
98 }
99 d->m_fieldName = obj.value(QLatin1StringView("field")).toString();
100 d->m_exp.setPattern(obj.value(QLatin1StringView("match")).toString());
101 d->m_scope = readEnum<ExtractorFilter::Scope>(
103 return !d->m_mimeType.isEmpty() && (!d->m_fieldName.isEmpty() || !needsFieldName(d->m_mimeType)) && d->m_exp.isValid();
104}
105
106QJsonObject ExtractorFilter::toJson() const
107{
108 QJsonObject obj;
109 obj.insert(QLatin1StringView("mimeType"), d->m_mimeType);
110 if (needsFieldName(d->m_mimeType)) {
111 obj.insert(QLatin1StringView("field"), d->m_fieldName);
112 }
113 obj.insert(QLatin1StringView("match"), pattern());
114 obj.insert(
115 QLatin1StringView("scope"),
117 d->m_scope)));
118 return obj;
119}
120
122{
123 return d->m_exp.pattern();
124}
125
126void ExtractorFilter::setPattern(const QString &pattern)
127{
128 d.detach();
129 d->m_exp.setPattern(pattern);
130}
131
133{
134 return d->m_scope;
135}
136
137void ExtractorFilter::setScope(Scope scope)
138{
139 d.detach();
140 d->m_scope = scope;
141}
142
143static QString valueForJsonPath(const QJsonObject &obj, const QString &path)
144{
145 const auto pathSections = QStringView(path).split(QLatin1Char('.'));
146 QJsonValue v(obj);
147 for (const auto &pathSection : pathSections) {
148 if (!v.isObject()) {
149 return {};
150 }
151 v = v.toObject().value(pathSection.toString());
152 }
153 return v.toString();
154}
155
156enum MatchMode { Any, All };
157
158static bool filterMachesNode(const ExtractorFilter &filter, ExtractorFilter::Scope scope, const ExtractorDocumentNode &node,
159 std::vector<ExtractorDocumentNode> &matches, MatchMode matchMode)
160{
161 if (node.isNull()) {
162 return false;
163 }
164
165 // filter without field/pattern always match, if the mimetype does
166 if (filter.mimeType() == node.mimeType() && ((filter.fieldName().isEmpty() && filter.pattern().isEmpty()) || node.processor()->matches(filter, node))) {
167 if (matchMode == All) {
168 matches.push_back(node);
169 }
170 return true;
171 }
172
173 if (scope != ExtractorFilter::Ancestors && filter.mimeType() == "application/ld+json"_L1 && !node.result().isEmpty()) {
174 // when collecting all matches for results, we only want the "leaf-most"
175 // ones, not those along the path
176 if (matchMode == All && scope == ExtractorFilter::Descendants) {
177 bool descendantsMatched = false;
178 for (const auto &child : node.childNodes()) {
179 descendantsMatched |= filterMachesNode(filter, ExtractorFilter::Descendants, child, matches, matchMode);
180 }
181 if (descendantsMatched) {
182 return true;
183 }
184 }
185
186 const auto res = node.result().jsonLdResult();
187 for (const auto &elem : res) {
188 const auto property = valueForJsonPath(elem.toObject(), filter.fieldName());
189 if (filter.matches(property)) {
190 if (matchMode == All) {
191 matches.push_back(node);
192 } else {
193 return true;
194 }
195 }
196 }
197 }
198
199 if (scope == ExtractorFilter::Ancestors) {
200 return filterMachesNode(filter, scope, node.parent(), matches, matchMode);
201 }
202 if (scope == ExtractorFilter::Descendants) {
203 for (const auto &child : node.childNodes()) {
204 const auto m = filterMachesNode(filter, ExtractorFilter::Descendants, child, matches, matchMode);
205 if (m && matchMode == Any) {
206 return true;
207 }
208 }
209 }
210
211 return !matches.empty();
212}
213
215{
216 std::vector<ExtractorDocumentNode> matches;
217 switch (d->m_scope) {
219 return filterMachesNode(*this, ExtractorFilter::Current, node, matches, Any);
221 return filterMachesNode(*this, ExtractorFilter::Current, node.parent(), matches, Any);
223 return filterMachesNode(*this, ExtractorFilter::Ancestors, node.parent(), matches, Any);
226 for (const auto &child : node.childNodes()) {
227 if (filterMachesNode(*this, d->m_scope == ExtractorFilter::Descendants ? d->m_scope : ExtractorFilter::Current, child, matches, Any)) {
228 return true;
229 }
230 }
231 }
232 return false;
233}
234
235void ExtractorFilter::allMatches(const ExtractorDocumentNode &node, std::vector<ExtractorDocumentNode>& matches) const
236{
237 switch (d->m_scope) {
239 filterMachesNode(*this, ExtractorFilter::Current, node, matches, All);
240 return;
242 filterMachesNode(*this, ExtractorFilter::Current, node.parent(), matches, All);
243 return;
245 filterMachesNode(*this, ExtractorFilter::Ancestors, node.parent(), matches, All);
246 return;
249 for (const auto &child : node.childNodes()) {
250 filterMachesNode(*this, d->m_scope == ExtractorFilter::Descendants ? d->m_scope : ExtractorFilter::Current, child, matches, All);
251 }
252 return;
253 }
254}
255
256ExtractorFilter ExtractorFilter::fromJSValue(const QJSValue &js)
257{
259 f.setMimeType(js.property(QLatin1StringView("mimeType")).toString());
260 const auto fieldName = js.property(QLatin1StringView("field"));
261 if (fieldName.isString()) {
262 f.setFieldName(fieldName.toString());
263 }
264 const auto match = js.property(QLatin1StringView("match"));
265 if (match.isString()) {
266 f.setPattern(match.toString());
267 }
268 f.setScope(readEnum<ExtractorFilter::Scope>(
269 js.property(QLatin1StringView("scope")).toString(),
271 return f;
272}
273
274#include "moc_extractorfilter.cpp"
A node in the extracted document object tree.
QJsonArray result
Result access for QJSEngine.
QString mimeType
The MIME type of this node.
QVariantList childNodes
Child nodes, for QJSEngine access.
KItinerary::ExtractorDocumentNode parent
The parent node, or a null node if this is the root node.
Determines whether an extractor is applicable to a given email.
QString fieldName() const
The field to filter on.
QString mimeType() const
MIME type of the document part this filter can match.
void allMatches(const ExtractorDocumentNode &node, std::vector< ExtractorDocumentNode > &matches) const
Checks whether this filter applies to node.
bool matches(const QString &data) const
Check if data matches this filter.
Scope
Specifies which document nodes should match this filter, relative to the one being extracted.
@ Current
match the node being extracted
@ Children
match the direct child nodes
@ Descendants
match any direct or indirect child nodes
@ Ancestors
match any direct or indirect parent nodes
@ Parent
match the direct parent node
QString pattern() const
Pattern to match field value against.
Scope scope() const
Evaluation scope of this filter, in relation to the node being extracted.
char * toString(const EngineQuery &query)
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
const char * constData() const const
bool isEmpty() const const
iterator insert(QLatin1StringView key, const QJsonValue &value)
QJsonValue value(QLatin1StringView key) const const
bool isObject() const const
bool isString() const const
QJsonObject toObject() const const
QString toString() const const
QJSValue property(const QString &name) const const
QMetaEnum fromType()
QByteArray toUtf8() const const
QList< QStringView > split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Sat Dec 21 2024 16:56:36 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.