KItinerary

jsonldimportfilter.cpp
1/*
2 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "jsonldimportfilter.h"
8#include "json/jsonld.h"
9#include "json/jsonldfilterengine.h"
10#include "logging.h"
11
12#include <QDate>
13#include <QDebug>
14#include <QJsonArray>
15#include <QJsonDocument>
16#include <QJsonObject>
17#include <QUrl>
18
19#include <cstring>
20
21using namespace Qt::Literals::StringLiterals;
22using namespace KItinerary;
23
24// type normalization from full schema.org type hierarchy to our simplified subset
25// IMPORTANT: keep alphabetically sorted by fromType!
26static constexpr const JsonLdFilterEngine::TypeMapping type_mapping[] = {
27 { "AutoDealer", "LocalBusiness" },
28 { "AutoRepair", "LocalBusiness" },
29 { "AutomotiveBusiness", "LocalBusiness" },
30 { "Bakery", "FoodEstablishment" },
31 { "BarOrPub", "FoodEstablishment" },
32 { "BedAndBreakfast", "LodgingBusiness" },
33 { "Brewery", "FoodEstablishment" },
34 { "BusStop", "BusStation" },
35 { "BusinessEvent", "Event" },
36 { "CafeOrCoffeeShop", "FoodEstablishment" },
37 { "Campground", "LodgingBusiness" },
38 { "ChildrensEvent", "Event" },
39 { "ComedyEvent", "Event" },
40 { "ComputerStore", "LocalBusiness" },
41 { "DanceEvent", "Event" },
42 { "Distillery", "FoodEstablishment" },
43 { "EditAction", "UpdateAction" },
44 { "EducationEvent", "Event" },
45 { "ElectronicsStore", "LocalBusiness" },
46 { "EntertainmentBusiness", "LocalBusiness" },
47 { "ExhibitionEvent", "Event" },
48 { "FastFoodRestaurant", "FoodEstablishment" },
49 { "Festival", "Event" },
50 { "HobbyShop", "LocalBusiness" },
51 { "HomeAndConstructionBusiness", "LocalBusiness" },
52 { "Hostel", "LodgingBusiness" },
53 { "Hotel", "LodgingBusiness" },
54 { "IceCreamShop", "FoodEstablishment" },
55 { "LiteraryEvent", "Event" },
56 { "Motel", "LodgingBusiness" },
57 { "MovieTheater", "LocalBusiness" },
58 { "MusicEvent", "Event" },
59 { "Resort", "LodgingBusiness" },
60 { "Restaurant", "FoodEstablishment" },
61 { "SaleEvent", "Event" },
62 { "ScreeningEvent", "Event" },
63 { "SocialEvent", "Event" },
64 { "SportsEvent", "Event" },
65 { "Store", "LocalBusiness" },
66 { "TheaterEvent", "Event" },
67 { "VisualArtsEvent", "Event" },
68 { "Winery", "FoodEstablishment" },
69};
70
71static void unpackArray(QJsonObject &obj, QLatin1StringView key) {
72 const auto val = obj.value(key);
73 if (!val.isArray()) {
74 return;
75 }
76 const auto arr = val.toArray();
77 if (arr.isEmpty()) {
78 return;
79 }
80 obj.insert(key, arr.at(0));
81}
82
83static void migrateToAction(QJsonObject &obj, const char *propName, const char *typeName, bool remove)
84{
85 const auto value = obj.value(QLatin1StringView(propName));
86 if (value.isNull() || value.isUndefined()) {
87 return;
88 }
89
90 const auto actionsVal = obj.value("potentialAction"_L1);
91 QJsonArray actions;
92 if (actionsVal.isArray()) {
93 actions = actionsVal.toArray();
94 } else if (actionsVal.isObject()) {
95 actions = { actionsVal };
96 }
97
98 for (const auto &act : actions) {
99 if (JsonLd::typeName(act.toObject()) == QLatin1StringView(typeName)) {
100 return;
101 }
102 }
103
104 QJsonObject action;
105 action.insert(QStringLiteral("@type"), QLatin1StringView(typeName));
106 action.insert(QStringLiteral("target"), value);
107 actions.push_back(action);
108 obj.insert(QStringLiteral("potentialAction"), actions);
109
110 if (remove) {
111 obj.remove(QLatin1StringView(propName));
112 }
113}
114
115static void filterPlace(QJsonObject &obj)
116{
117 // convert text address to PostalAddress
118 if (const auto addr = obj.value("address"_L1); addr.isString()) {
119 obj.insert("address"_L1, QJsonObject{
120 {"@type"_L1, "PostalAddress"_L1},
121 {"streetAddress"_L1, addr.toString()},
122 });
123 }
124 // same for geo coordinates
125 const auto lat = obj.value("latitude"_L1);
126 const auto lon = obj.value("longitude"_L1);
127 if (lat.isDouble() && lon.isDouble()) {
128 auto geo = obj.value("geo"_L1).toObject();
129 if (!geo.contains("@type"_L1)) {
130 geo.insert("@type"_L1, "GeoCoordinates"_L1);
131 }
132 if (!geo.contains("latitude"_L1)) {
133 geo.insert("latitude"_L1, lat);
134 }
135 if (!geo.contains("longitude"_L1)) {
136 geo.insert("longitude"_L1, lon);
137 }
138 obj.insert("geo"_L1, geo);
139 }
140}
141
142static void filterFlight(QJsonObject &res)
143{
144 // move incomplete departureTime (ie. just ISO date, no time) to departureDay
145 if (res.value(QLatin1StringView("departureTime")).toString().size() == 10) {
146 JsonLd::renameProperty(res, "departureTime", "departureDay");
147 }
148}
149
150static void filterReservation(QJsonObject &res)
151{
152 // move ticketToken and ticketNumber to Ticket (Google vs. schema.org difference)
153 for (const auto key : {"ticketToken"_L1, "ticketNumber"_L1}) {
154 const auto v = res.value(key).toString();
155 if (!v.isEmpty()) {
156 auto ticket = res.value("reservedTicket"_L1).toObject();
157 if (ticket.isEmpty()) {
158 ticket.insert("@type"_L1, u"Ticket"_s);
159 }
160 if (!ticket.contains(key)) {
161 ticket.insert(key, v);
162 res.insert("reservedTicket"_L1, ticket);
163 res.remove(key);
164 }
165 }
166 }
167
168 // normalize reservationStatus enum
169 auto resStat = res.value("reservationStatus"_L1).toString();
170 if (!resStat.isEmpty()) {
171 if (resStat.startsWith("https:"_L1)) {
172 resStat.remove(4, 1);
173 }
174 if (!resStat.contains("/Reservation"_L1)) {
175 res.insert("reservationStatus"_L1, resStat.replace("http://schema.org/"_L1, "http://schema.org/Reservation"_L1));
176 }
177 }
178
179 // legacy properties
180 JsonLd::renameProperty(res, "programMembership", "programMembershipUsed");
181 JsonLd::renameProperty(res, "price", "totalPrice");
182
183 // legacy potentialAction property
184 JsonLd::renameProperty(res, "action", "potentialAction");
185
186 // move Google xxxUrl properties to Action instances
187 migrateToAction(res, "cancelReservationUrl", "CancelAction", true);
188 migrateToAction(res, "checkinUrl", "CheckInAction", true);
189 migrateToAction(res, "modifyReservationUrl", "UpdateAction", true);
190 migrateToAction(res, "ticketDownloadUrl", "DownloadAction", true);
191 migrateToAction(res, "url", "ViewAction", false);
192
193 // technically the wrong way (reservationId is the current schema.org standard), but hardly used anywhere (yet)
194 JsonLd::renameProperty(res, "reservationId", "reservationNumber");
195
196 // "typos"
197 JsonLd::renameProperty(res, "Url", "url");
198}
199
200static void filterFoodEstablishment(QJsonObject &restaurant)
201{
202 // This can be a bool, "Yes"/"No", or a URL.
203 auto reservationsValue =
204 restaurant.value(QLatin1StringView("acceptsReservations"));
205 if (reservationsValue.isString()) {
206 const QString reservations = reservationsValue.toString();
207 if (reservations == QLatin1StringView("Yes")) {
208 restaurant.insert(QLatin1StringView("acceptsReservations"), true);
209 } else if (reservations == QLatin1StringView("No")) {
210 restaurant.insert(QLatin1StringView("acceptsReservations"), false);
211 } else {
212 migrateToAction(restaurant, "acceptsReservations", "ReserveAction",
213 true);
214 }
215 }
216
217 filterPlace(restaurant);
218}
219
220static void filterActionTarget(QJsonObject &action)
221{
222 QJsonArray targets;
223 QString filteredTargetUrlString;
224
225 const QJsonValue oldTarget = action.value(QLatin1StringView("target"));
226 if (oldTarget.isArray()) {
227 targets = oldTarget.toArray();
228 } else if (oldTarget.isObject()) {
229 targets.push_back(oldTarget);
230 }
231
232 for (auto it = targets.begin(); it != targets.end(); ++it) {
233 auto target = (*it).toObject();
234
235 QJsonArray platforms;
236
237 const QJsonValue actionPlatform =
238 target.value(QLatin1StringView("actionPlatform"));
239 if (actionPlatform.isArray()) {
240 platforms = actionPlatform.toArray();
241 } else {
242 platforms.push_back(actionPlatform);
243 }
244
245 // Always return at least one URL but prefer the current platform if possible
246 if (!filteredTargetUrlString.isEmpty()) {
247 const bool hasPreferredPlatform = std::any_of(platforms.begin(), platforms.end(), [](const QJsonValue &platformValue) {
248 const QString platform = platformValue.toString();
249 // FIXME android
250 return platform == QLatin1StringView(
251 "http://schema.org/DesktopWebPlatform");
252 });
253
254 if (!hasPreferredPlatform) {
255 continue;
256 }
257 }
258
259 const QUrl url(
260 target.value(QLatin1StringView("urlTemplate")).toString());
261 // It could also be a "URL template"
262 if (!url.isValid()) {
263 continue;
264 }
265
266 filteredTargetUrlString = url.toString();
267 }
268
269 if (filteredTargetUrlString.isEmpty()) {
270 JsonLd::renameProperty(action, "url", "target");
271 } else {
272 action.insert(QStringLiteral("target"), filteredTargetUrlString);
273 }
274}
275
276static QJsonArray filterActions(const QJsonValue &v)
277{
278 QJsonArray actions;
279 if (v.isArray()) {
280 actions = v.toArray();
281 } else {
282 actions.push_back(v);
283 }
284
285 for (auto it = actions.begin(); it != actions.end(); ++it) {
286 auto action = (*it).toObject();
287 filterActionTarget(action);
288 *it = action;
289 }
290
291 return actions;
292}
293
294static void filterEvent(QJsonObject &obj)
295{
296 unpackArray(obj, "location"_L1);
297
298 // date only end: set time to end of day
299 if (const auto endDate = obj.value("endDate"_L1).toString(); endDate.size() == 10) {
300 const auto date = QDate::fromString(endDate, Qt::ISODate);
301 if (date.isValid()) {
302 obj.insert("endDate"_L1, date.endOfDay().toString(Qt::ISODate));
303 }
304 }
305}
306
307static void filterPostalAddress(QJsonObject &obj)
308{
309 // unpack country objects
310 auto country = obj.value(QLatin1StringView("addressCountry"));
311 if (country.isObject()) {
312 obj.insert(QLatin1StringView("addressCountry"),
313 country.toObject().value(QLatin1StringView("name")));
314 }
315}
316
317// filter functions applied to objects of the corresponding (already normalized) type
318// IMPORTANT: keep alphabetically sorted by type!
319static constexpr const JsonLdFilterEngine::TypeFilter type_filters[] = {
320 { "Event", filterEvent },
321 { "Flight", filterFlight },
322 { "FoodEstablishment", filterFoodEstablishment },
323 { "LocalBusiness", filterPlace },
324 { "Organization", filterPlace },
325 { "Place", filterPlace },
326 { "PostalAddress", filterPostalAddress },
327};
328
329// property renaming
330// IMPORTANT: keep alphabetically sorted by type!
331static constexpr const JsonLdFilterEngine::PropertyMapping property_mappings[] = {
332 { "BusTrip", "arrivalStation", "arrivalBusStop" },
333 { "BusTrip", "busCompany", "provider" },
334 { "BusTrip", "departureStation", "departureBusStop" },
335
336 // technically the wrong way around, but we still use the much more common old name
337 { "Flight", "provider", "airline" },
338
339 // check[in|out]Date -> check[in|out]Time (legacy Google format)
340 { "LodgingReservation", "checkinDate", "checkinTime" },
341 { "LodgingReservation", "checkoutDate", "checkoutTime" },
342
343 { "ProgramMembership", "program", "programName" },
344 { "ProgramMembership", "memberNumber", "membershipNumber" },
345
346 { "Reservation", "price", "totalPrice" },
347 { "Ticket", "price", "totalPrice" },
348
349 // move TrainTrip::trainCompany to TrainTrip::provider (as defined by schema.org)
350 { "TrainTrip", "trainCompany", "provider" },
351};
352
353static QJsonArray graphExpand(const QJsonObject &obj)
354{
355 QJsonArray result;
356
357 const auto graph = obj.value(QLatin1StringView("@graph")).toArray();
358 for (const auto &o : graph) {
359 const auto a = JsonLdImportFilter::filterObject(o.toObject());
360 std::copy(a.begin(), a.end(), std::back_inserter(result));
361 }
362
363 return result;
364}
365
367{
368 QStringList types;
369 const auto typeVal = obj.value(QLatin1StringView("@type"));
370 if (typeVal.isString()) {
371 types.push_back(typeVal.toString());
372 } else if (typeVal.isArray()) {
373 const auto typeNames = typeVal.toArray();
374 for (const auto &t : typeNames) {
375 if (t.isString()) {
376 types.push_back(t.toString());
377 }
378 }
379 }
380 // TODO consider additionalTypes property
381
382 if (types.isEmpty()) {
383 return graphExpand(obj);
384 }
385
386 QJsonArray results;
387
388 JsonLdFilterEngine filterEngine;
389 filterEngine.setTypeMappings(type_mapping);
390 filterEngine.setTypeFilters(type_filters);
391 filterEngine.setPropertyMappings(property_mappings);
392 for (const auto &type : types) {
393 QJsonObject res(obj);
394 res.insert(QStringLiteral("@type"), type);
395 filterEngine.filterRecursive(res);
396
397 // fold mainEntityOfPage into res
398 if (const auto mainEntityOfPage =
399 res.value(QLatin1StringView("mainEntityOfPage")).toObject();
400 !mainEntityOfPage.isEmpty()) {
401 res.remove(QLatin1StringView("mainEntityOfPage"));
402 for (auto it = mainEntityOfPage.begin(); it != mainEntityOfPage.end();
403 ++it) {
404 if (it.key().startsWith(QLatin1Char('@')) ||
405 res.contains(it.key())) {
406 continue;
407 }
408 res.insert(it.key(), it.value());
409 }
410 }
411
412 if (type.endsWith(QLatin1StringView("Reservation"))) {
413 filterReservation(res);
414 }
415
416 auto actions = res.value(QLatin1StringView("potentialAction"));
417 if (!actions.isUndefined()) {
418 res.insert(QStringLiteral("potentialAction"), filterActions(actions));
419 }
420
421 unpackArray(res, QLatin1StringView("image"));
422 const auto image = res.value(QLatin1StringView("image"));
423 if (image.isObject()) {
424 const auto imageObject = image.toObject();
425 if (JsonLd::typeName(imageObject) ==
426 QLatin1StringView("ImageObject")) {
427 res.insert(QStringLiteral("image"),
428 imageObject.value(QLatin1StringView("url")));
429 }
430 }
431
432 // unpack reservationFor array - multiply the result for each entry in here
433 const auto resFor = res.value(QLatin1StringView("reservationFor"));
434 if (const auto a = resFor.toArray(); !a.isEmpty()) {
435 for (const auto &entry : a) {
436 res.insert(QLatin1StringView("reservationFor"), entry);
437 results.push_back(res);
438 }
439 } else {
440 results.push_back(res);
441 }
442 }
443
444 return results;
445}
JSON-LD filtering for input normalization or type transforms.
void filterRecursive(QJsonObject &obj)
Recursively apply filtering rules to obj.
char * toString(const EngineQuery &query)
QJsonArray filterObject(const QJsonObject &obj)
Filter the top-level object obj for loading with JsonLdDocument.
QString typeName(const QJsonObject &obj)
Normalized type name from object.
void renameProperty(QJsonObject &obj, const char *oldName, const char *newName)
Rename a property, if present and the new name isn't in use already.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
QStringView country(QStringView ifopt)
QDate fromString(QStringView string, QStringView format, QCalendar cal)
iterator begin()
iterator end()
void push_back(const QJsonValue &value)
bool contains(QLatin1StringView key) const const
iterator insert(QLatin1StringView key, const QJsonValue &value)
void remove(QLatin1StringView key)
QJsonValue value(QLatin1StringView key) const const
bool isArray() const const
bool isObject() const const
bool isString() const const
QJsonArray toArray() const const
QJsonObject toObject() const const
QString toString() const const
bool isEmpty() const const
void push_back(parameter_type value)
bool isEmpty() const const
qsizetype size() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 4 2024 16:28:48 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.