KItinerary

extractorrepository.cpp
1/*
2 SPDX-FileCopyrightText: 2017-2021 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "config-kitinerary.h"
8#include "extractorrepository.h"
9
10#include "logging.h"
11#include "extractors/activitypubextractor.h"
12#include "extractors/genericboardingpassextractor.h"
13
14#include <KItinerary/ExtractorDocumentNode>
15#include <KItinerary/ExtractorDocumentProcessor>
16#include <KItinerary/ExtractorFilter>
17#include <KItinerary/ScriptExtractor>
18
19#include <QDirIterator>
20#include <QJsonArray>
21#include <QJsonDocument>
22#include <QJsonObject>
23#include <QMetaProperty>
24#include <QStandardPaths>
25
26using namespace KItinerary;
27
28static void initResources() // must be outside of a namespace
29{
30 Q_INIT_RESOURCE(extractors);
31 Q_INIT_RESOURCE(vdv_certs);
32 Q_INIT_RESOURCE(rsp6_keys);
33}
34
35namespace KItinerary {
36class ExtractorRepositoryPrivate {
37public:
38 ExtractorRepositoryPrivate();
39 void loadAll();
40 void initBuiltInExtractors();
41 void loadScriptExtractors();
42 void addExtractor(std::unique_ptr<AbstractExtractor> &&e);
43
44 std::vector<std::unique_ptr<AbstractExtractor>> m_extractors;
45 QStringList m_extraSearchPaths;
46};
47}
48
49ExtractorRepositoryPrivate::ExtractorRepositoryPrivate()
50{
51 initResources();
52 loadAll();
53}
54
55void ExtractorRepositoryPrivate::loadAll()
56{
57 initBuiltInExtractors();
58 loadScriptExtractors();
59}
60
61void ExtractorRepositoryPrivate::initBuiltInExtractors()
62{
63 addExtractor(std::make_unique<ActivityPubExtractor>());
64 addExtractor(std::make_unique<GenericBoardingPassExtractor>());
65}
66
67ExtractorRepository::ExtractorRepository()
68{
69 static ExtractorRepositoryPrivate repo;
70 d = &repo;
71}
72
73ExtractorRepository::~ExtractorRepository() = default;
74ExtractorRepository::ExtractorRepository(KItinerary::ExtractorRepository &&) noexcept = default;
75
76void ExtractorRepository::reload()
77{
78 d->m_extractors.clear();
79 d->loadAll();
80}
81
82const std::vector<std::unique_ptr<AbstractExtractor>>& ExtractorRepository::extractors() const
83{
84 return d->m_extractors;
85}
86
87void ExtractorRepository::extractorsForNode(const ExtractorDocumentNode &node, std::vector<const AbstractExtractor*> &extractors) const
88{
89 if (node.isNull()) {
90 return;
91 }
92
93 for (const auto &extractor : d->m_extractors) {
94 if (extractor->canHandle(node)) {
95 // while we only would add each extractor at most once, some of them might already be in the list, so de-duplicate
96 const auto it = std::lower_bound(extractors.begin(), extractors.end(), extractor.get(), [](auto lhs, auto rhs) {
97 return lhs < rhs;
98 });
99 if (it == extractors.end() || (*it) != extractor.get()) {
100 extractors.insert(it, extractor.get());
101 }
102 }
103 }
104}
105
107{
108 auto it = std::lower_bound(d->m_extractors.begin(), d->m_extractors.end(), name, [](const auto &lhs, auto rhs) {
109 return lhs->name() < rhs;
110 });
111 if (it != d->m_extractors.end() && (*it)->name() == name) {
112 return (*it).get();
113 }
114 return {};
115}
116
117void ExtractorRepositoryPrivate::loadScriptExtractors()
118{
119 auto searchDirs = m_extraSearchPaths;
121 for (const auto &p : qsp) {
122 searchDirs.push_back(p + QLatin1StringView("/kitinerary/extractors"));
123 }
124 searchDirs += QStringLiteral(":/org.kde.pim/kitinerary/extractors");
125
126 for (const auto &dir : std::as_const(searchDirs)) {
127 QDirIterator it(dir, QDir::Files);
128 while (it.hasNext()) {
129 const auto fileName = it.next();
130 if (!fileName.endsWith(QLatin1StringView(".json"))) {
131 continue;
132 }
133
134 QFile file(fileName);
135 if (!file.open(QFile::ReadOnly)) {
136 continue;
137 }
138
140 const auto doc = QJsonDocument::fromJson(file.readAll(), &error);
141 if (doc.isNull()) {
142 qCWarning(Log) << "Extractor loading error:" << fileName << error.errorString();
143 continue;
144 }
145
146 QFileInfo fi(fileName);
147 const auto name = fi.fileName().left(fi.fileName().size() - 5);
148
149 if (doc.isObject()) {
150 const auto obj = doc.object();
151 auto ext = std::make_unique<ScriptExtractor>();
152 if (ext->load(obj, fi.canonicalFilePath())) {
153 addExtractor(std::move(ext));
154 } else {
155 qCWarning(Log) << "failed to load extractor:" << fi.canonicalFilePath();
156 }
157 } else if (doc.isArray()) {
158 const auto extractorArray = doc.array();
159 int i = 0;
160 for (const auto &v : extractorArray) {
161 auto ext = std::make_unique<ScriptExtractor>();
162 if (ext->load(v.toObject(), fi.canonicalFilePath(), extractorArray.size() == 1 ? -1 : i)) {
163 addExtractor(std::move(ext));
164 } else {
165 qCWarning(Log) << "failed to load extractor:" << fi.canonicalFilePath();
166 }
167 ++i;
168 }
169 } else {
170 qCWarning(Log) << "Invalid extractor meta-data:" << fileName;
171 continue;
172 }
173 }
174 }
175}
176
177void ExtractorRepositoryPrivate::addExtractor(std::unique_ptr<AbstractExtractor> &&e)
178{
179 auto it = std::lower_bound(m_extractors.begin(), m_extractors.end(), e, [](const auto &lhs, const auto &rhs) {
180 return lhs->name() < rhs->name();
181 });
182 if (it == m_extractors.end() || (*it)->name() != e->name()) {
183 m_extractors.insert(it, std::move(e));
184 }
185}
186
188{
189 return d->m_extraSearchPaths;
190}
191
193{
194 d->m_extraSearchPaths = searchPaths;
195}
196
197QJsonValue ExtractorRepository::extractorToJson(const ScriptExtractor *extractor) const
198{
199 QJsonArray a;
200 bool added = false;
201 for (const auto &ext : d->m_extractors) {
202 auto e = dynamic_cast<ScriptExtractor*>(ext.get());
203 if (!e || e->fileName() != extractor->fileName()) {
204 continue;
205 }
206 if (extractor->name() == e->name()) {
207 a.push_back(extractor->toJson());
208 added = true;
209 } else {
210 a.push_back(e->toJson());
211 }
212 }
213 if (!added) {
214 a.push_back(extractor->toJson());
215 }
216
217 if (a.size() == 1) {
218 return a.at(0);
219 }
220 return a;
221}
Abstract base class for data extractors.
A node in the extracted document object tree.
Collection of all known data extractors.
void setAdditionalSearchPaths(const QStringList &searchPaths)
Sets additional search paths to look for extractors.
const std::vector< std::unique_ptr< AbstractExtractor > > & extractors() const
All known extractors.
const AbstractExtractor * extractorByName(QStringView name) const
Returns the extractor with the given identifier.
void extractorsForNode(const ExtractorDocumentNode &node, std::vector< const AbstractExtractor * > &extractors) const
Finds matching extractors for the given document node.
QStringList additionalSearchPaths() const
Returns the list of additional search paths for extractor scripts.
A single unstructured data extraction rule set.
QString name() const override
Identifier for this extractor.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
QJsonValue at(qsizetype i) const const
void push_back(const QJsonValue &value)
qsizetype size() const const
QJsonDocument fromJson(const QByteArray &json, QJsonParseError *error)
QStringList standardLocations(StandardLocation type)
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:50:00 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.