KItinerary

main.cpp
1/*
2 SPDX-FileCopyrightText: 2019 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include <config-kitinerary.h>
8#include <kitinerary_version.h>
9
10#include <KItinerary/CalendarHandler>
11#include <KItinerary/ExtractorCapabilities>
12#include <KItinerary/ExtractorEngine>
13#include <KItinerary/ExtractorPostprocessor>
14#include <KItinerary/ExtractorRepository>
15#include <KItinerary/ExtractorValidator>
16#include <KItinerary/JsonLdDocument>
17#include <KItinerary/MergeUtil>
18#include <KItinerary/Reservation>
19#include <KItinerary/ScriptExtractor>
20
21#include <KCalendarCore/Event>
22#include <KCalendarCore/ICalFormat>
23#include <KCalendarCore/MemoryCalendar>
24
25#include <QCommandLineParser>
26#include <QCoreApplication>
27#include <QDateTime>
28#include <QDebug>
29#include <QDir>
30#include <QFile>
31#include <QJsonArray>
32#include <QJsonDocument>
33#include <QJsonObject>
34#include <QObject>
35
36#include <iostream>
37
38using namespace KItinerary;
39
41batchReservations(const QList<QVariant> &reservations) {
42 using namespace KItinerary;
43
44 QList<QList<QVariant>> batches;
45 QList<QVariant> batch;
46
47 for (const auto &res : reservations) {
48 if (batch.isEmpty()) {
49 batch.push_back(res);
50 continue;
51 }
52
55 const auto trip1 = JsonLd::convert<Reservation>(res).reservationFor();
56 const auto trip2 =
57 JsonLd::convert<Reservation>(batch.at(0)).reservationFor();
58 if (KItinerary::MergeUtil::isSame(trip1, trip2)) {
59 batch.push_back(res);
60 continue;
61 }
62 }
63
64 batches.push_back(batch);
65 batch.clear();
66 batch.push_back(res);
67 }
68
69 if (!batch.isEmpty()) {
70 batches.push_back(batch);
71 }
72 return batches;
73}
74
75static void printCapabilities()
76{
77 std::cout << qPrintable(ExtractorCapabilities::capabilitiesString());
78}
79
80static void printExtractors()
81{
83 for (const auto &ext : repo.extractors()) {
84 std::cout << qPrintable(ext->name());
85 if (auto scriptExt = dynamic_cast<const ScriptExtractor*>(ext.get())) {
86 std::cout << " (" << qPrintable(scriptExt->mimeType()) << ", "
87 << qPrintable(scriptExt->scriptFileName()) << ":"
88 << qPrintable(scriptExt->scriptFunction()) << ")";
89 }
90 std::cout << std::endl;
91 }
92}
93
94int main(int argc, char** argv)
95{
96 QCoreApplication::setApplicationName(QStringLiteral("kitinerary-extractor"));
97 QCoreApplication::setApplicationVersion(QStringLiteral(KITINERARY_VERSION_STRING));
98 QCoreApplication::setOrganizationDomain(QStringLiteral("kde.org"));
99 QCoreApplication::setOrganizationName(QStringLiteral("KDE"));
100 QCoreApplication app(argc, argv);
101
102#ifdef KITINERARY_STANDALONE_CLI_EXTRACTOR
103 // set additional data file search path relative to the current binary location
104 // NOTE: QCoreApplication::applicationDirPath is only valid once QCoreApplication has been created
105 auto xdgDataDirs = qgetenv("XDG_DATA_DIRS");
106 if (!xdgDataDirs.isEmpty()) {
107 xdgDataDirs += QDir::listSeparator().toLatin1();
108 }
112 .toUtf8();
113 qputenv("XDG_DATA_DIRS", xdgDataDirs);
114#endif
115
116 QCommandLineParser parser;
117 parser.setApplicationDescription(QStringLiteral("Command line itinerary extractor."));
118 parser.addHelpOption();
119 parser.addVersionOption();
120 QCommandLineOption capOpt({QStringLiteral("capabilities")}, QStringLiteral("Show available extraction capabilities."));
121 parser.addOption(capOpt);
122 QCommandLineOption listExtOpt({QStringLiteral("list-extractors")}, QStringLiteral("List all available extractors."));
123 parser.addOption(listExtOpt);
124
125 QCommandLineOption ctxOpt({QStringLiteral("c"), QStringLiteral("context-date")}, QStringLiteral("ISO date/time for when this data has been received."), QStringLiteral("date"));
126 parser.addOption(ctxOpt);
127 QCommandLineOption typeOpt({QStringLiteral("t"), QStringLiteral("type")}, QStringLiteral("Deprecated, no longer needed and ignored."), QStringLiteral("type"));
128 parser.addOption(typeOpt);
129 QCommandLineOption extOpt({QStringLiteral("e"), QStringLiteral("extractors")}, QStringLiteral("Additional extractors to apply."), QStringLiteral("extractors"));
130 parser.addOption(extOpt);
131 QCommandLineOption pathsOpt({QStringLiteral("additional-search-path")}, QStringLiteral("Additional search path for extractors."), QStringLiteral("search-path"));
132 parser.addOption(pathsOpt);
133 QCommandLineOption formatOpt({QStringLiteral("o"), QStringLiteral("output")}, QStringLiteral("Output format [JsonLd, iCal]. Default: JsonLd"), QStringLiteral("format"));
134 parser.addOption(formatOpt);
135 QCommandLineOption noValidationOpt({QStringLiteral("no-validation")}, QStringLiteral("Disable result validation."));
136 parser.addOption(noValidationOpt);
137
138 parser.addPositionalArgument(QStringLiteral("input"), QStringLiteral("File to extract data from, omit for using stdin."));
139 parser.process(app);
140
142 if (parser.isSet(pathsOpt)) {
143 repo.setAdditionalSearchPaths(parser.values(pathsOpt));
144 repo.reload();
145 }
146
147 if (parser.isSet(capOpt)) {
148 printCapabilities();
149 return 0;
150 }
151 if (parser.isSet(listExtOpt)) {
152 printExtractors();
153 return 0;
154 }
155
156 ExtractorEngine engine;
157 engine.setUseSeparateProcess(false); // we are the external extractor
158 ExtractorPostprocessor postproc;
159
160 auto contextDt = QDateTime::fromString(parser.value(ctxOpt), Qt::ISODate);
161 if (!contextDt.isValid()) {
162 contextDt = QDateTime::currentDateTime();
163 }
164 postproc.setContextDate(contextDt);
165
166 const auto files = parser.positionalArguments().isEmpty() ? QStringList(QString()) : parser.positionalArguments();
167 for (const auto &arg : files) {
168 QFile f;
169 if (!arg.isEmpty()) {
170 f.setFileName(arg);
171 if (!f.open(QFile::ReadOnly)) {
172 std::cerr << qPrintable(f.errorString()) << std::endl;
173 return 1;
174 }
175 } else {
176 f.open(stdin, QFile::ReadOnly);
177 }
178
179 auto fileName = f.fileName();
180
181 engine.clear();
182 engine.setContextDate(contextDt);
183
184 if (!parser.value(extOpt).isEmpty()) {
185 const auto extNames = parser.value(extOpt).split(QLatin1Char(';'),
187 std::vector<const AbstractExtractor*> exts;
188 exts.reserve(extNames.size());
189 for (const auto &name : extNames) {
190 const auto ext = repo.extractorByName(name);
191 exts.push_back(ext);
192 }
193 engine.setAdditionalExtractors(std::move(exts));
194 }
195
196 engine.setData(f.readAll(), fileName);
197 const auto result = JsonLdDocument::fromJson(engine.extract());
198 postproc.process(result);
199 }
200
201 auto result = postproc.result();
202 if (!parser.isSet(noValidationOpt)) {
203 ExtractorValidator validator;
204 result.erase(std::remove_if(result.begin(), result.end(), [&validator](const auto &elem) {
205 return !validator.isValidElement(elem);
206 }), result.end());
207 }
208
209 if (parser.value(formatOpt).compare(QLatin1StringView("ical"),
210 Qt::CaseInsensitive) == 0) {
211 const auto batches = batchReservations(result);
214 for (const auto &batch : batches) {
216 CalendarHandler::fillEvent(batch, event);
217 cal->addEvent(event);
218 }
220 std::cout << qPrintable(format.toString(cal));
221 } else {
222 const auto postProcResult = JsonLdDocument::toJson(result);
223 std::cout << QJsonDocument(postProcResult).toJson().constData()
224 << std::endl;
225 }
226}
QString toString(const Calendar::Ptr &calendar) override
Semantic data extraction engine.
void setAdditionalExtractors(std::vector< const AbstractExtractor * > &&extractors)
Sets additional extractors to run on the given data.
void setData(const QByteArray &data, QStringView fileName={}, QStringView mimeType={})
Set raw data to extract from.
void clear()
Resets the internal state, call before processing new input data.
void setContextDate(const QDateTime &dt)
Set the date the extracted document has been issued at.
QJsonArray extract()
Perform the actual extraction, and return the JSON-LD data that has been found.
void setUseSeparateProcess(bool separateProcess)
Perform extraction of "risky" content such as PDF files in a separate process.
Post-process extracted data to filter out garbage and augment data from other sources.
void setContextDate(const QDateTime &dt)
The date the reservation(s) processed here have been made, if known.
QList< QVariant > result() const
This returns the final result of all previously executed processing steps followed by sorting and fil...
void process(const QList< QVariant > &data)
This will normalize and augment the given data elements and merge them with already added data elemen...
Collection of all known data extractors.
void setAdditionalSearchPaths(const QStringList &searchPaths)
Sets additional search paths to look for extractors.
const std::vector< std::unique_ptr< AbstractExtractor > > & extractors() const
All known extractors.
void reload()
Reload the extractor repository.
const AbstractExtractor * extractorByName(QStringView name) const
Returns the extractor with the given identifier.
static QJsonArray toJson(const QList< QVariant > &data)
Serialize instantiated data types to JSON-LD.
static QList< QVariant > fromJson(const QJsonArray &array)
Convert JSON-LD array into instantiated data types.
static bool isSame(const QVariant &lhs, const QVariant &rhs)
Checks if two Reservation or Trip values refer to the same booking element.
A single unstructured data extraction rule set.
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
void fillEvent(const QList< QVariant > &reservations, const QSharedPointer< KCalendarCore::Event > &event)
Fills event with details of reservations.
QString capabilitiesString()
Textual representation, mainly useful for bug reports/support.
bool canConvert(const QVariant &value)
Checks if the given value can be up-cast to T.
Definition datatypes.h:31
T convert(const QVariant &value)
Up-cast value to T.
Definition datatypes.h:47
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
const char * constData() const const
char toLatin1() const const
QCommandLineOption addHelpOption()
bool addOption(const QCommandLineOption &option)
void addPositionalArgument(const QString &name, const QString &description, const QString &syntax)
QCommandLineOption addVersionOption()
bool isSet(const QCommandLineOption &option) const const
QStringList positionalArguments() const const
void process(const QCoreApplication &app)
void setApplicationDescription(const QString &description)
QString value(const QCommandLineOption &option) const const
QStringList values(const QCommandLineOption &option) const const
QString applicationDirPath()
void setApplicationName(const QString &application)
void setApplicationVersion(const QString &version)
void setOrganizationDomain(const QString &orgDomain)
void setOrganizationName(const QString &orgName)
QDateTime currentDateTime()
QDateTime fromString(QStringView string, QStringView format, QCalendar cal)
QChar listSeparator()
QChar separator()
virtual QString fileName() const const override
bool open(FILE *fh, OpenMode mode, FileHandleFlags handleFlags)
void setFileName(const QString &name)
QString errorString() const const
QByteArray readAll()
QByteArray toJson(JsonFormat format) const const
const_reference at(qsizetype i) const const
void clear()
iterator end()
bool isEmpty() const const
void push_back(parameter_type value)
void reserve(qsizetype size)
int compare(QLatin1StringView s1, const QString &s2, Qt::CaseSensitivity cs)
bool isEmpty() const const
QStringList split(QChar sep, Qt::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
QByteArray toUtf8() const const
CaseInsensitive
SkipEmptyParts
QTimeZone systemTimeZone()
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Oct 4 2024 12:00:24 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.