KItinerary

jsonldimportfilter.cpp
1/*
2 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "datatypes/place.h"
8#include "jsonldimportfilter.h"
9#include "locationutil.h"
10#include "json/jsonld.h"
11#include "json/jsonldfilterengine.h"
12#include "logging.h"
13
14#include <QDate>
15#include <QDebug>
16#include <QJsonArray>
17#include <QJsonDocument>
18#include <QJsonObject>
19#include <QUrl>
20
21#include <cstring>
22#include <optional>
23
24using namespace Qt::Literals::StringLiterals;
25using namespace KItinerary;
26
27// type normalization from full schema.org type hierarchy to our simplified subset
28// IMPORTANT: keep alphabetically sorted by fromType!
29static constexpr const JsonLdFilterEngine::TypeMapping type_mapping[] = {
30 { "AutoDealer", "LocalBusiness" },
31 { "AutoRepair", "LocalBusiness" },
32 { "AutomotiveBusiness", "LocalBusiness" },
33 { "Bakery", "FoodEstablishment" },
34 { "BarOrPub", "FoodEstablishment" },
35 { "BedAndBreakfast", "LodgingBusiness" },
36 { "Brewery", "FoodEstablishment" },
37 { "BusStop", "BusStation" },
38 { "BusinessEvent", "Event" },
39 { "CafeOrCoffeeShop", "FoodEstablishment" },
40 { "Campground", "LodgingBusiness" },
41 { "ChildrensEvent", "Event" },
42 { "ComedyEvent", "Event" },
43 { "ComputerStore", "LocalBusiness" },
44 { "DanceEvent", "Event" },
45 { "Distillery", "FoodEstablishment" },
46 { "EditAction", "UpdateAction" },
47 { "EducationEvent", "Event" },
48 { "ElectronicsStore", "LocalBusiness" },
49 { "EntertainmentBusiness", "LocalBusiness" },
50 { "ExhibitionEvent", "Event" },
51 { "FastFoodRestaurant", "FoodEstablishment" },
52 { "Festival", "Event" },
53 { "HobbyShop", "LocalBusiness" },
54 { "HomeAndConstructionBusiness", "LocalBusiness" },
55 { "Hostel", "LodgingBusiness" },
56 { "Hotel", "LodgingBusiness" },
57 { "IceCreamShop", "FoodEstablishment" },
58 { "LiteraryEvent", "Event" },
59 { "Motel", "LodgingBusiness" },
60 { "MovieTheater", "LocalBusiness" },
61 { "MusicEvent", "Event" },
62 { "Resort", "LodgingBusiness" },
63 { "Restaurant", "FoodEstablishment" },
64 { "SaleEvent", "Event" },
65 { "ScreeningEvent", "Event" },
66 { "SocialEvent", "Event" },
67 { "SportsEvent", "Event" },
68 { "Store", "LocalBusiness" },
69 { "TheaterEvent", "Event" },
70 { "VisualArtsEvent", "Event" },
71 { "Winery", "FoodEstablishment" },
72};
73
74static void unpackArray(QJsonObject &obj, QLatin1StringView key) {
75 const auto val = obj.value(key);
76 if (!val.isArray()) {
77 return;
78 }
79 const auto arr = val.toArray();
80 if (arr.isEmpty()) {
81 return;
82 }
83 obj.insert(key, arr.at(0));
84}
85
86static void migrateToAction(QJsonObject &obj, const char *propName, const char *typeName, bool remove)
87{
88 const auto value = obj.value(QLatin1StringView(propName));
89 if (value.isNull() || value.isUndefined()) {
90 return;
91 }
92
93 const auto actionsVal = obj.value("potentialAction"_L1);
94 QJsonArray actions;
95 if (actionsVal.isArray()) {
96 actions = actionsVal.toArray();
97 } else if (actionsVal.isObject()) {
98 actions = { actionsVal };
99 }
100
101 for (const auto &act : actions) {
102 if (JsonLd::typeName(act.toObject()) == QLatin1StringView(typeName)) {
103 return;
104 }
105 }
106
107 QJsonObject action;
108 action.insert(QStringLiteral("@type"), QLatin1StringView(typeName));
109 action.insert(QStringLiteral("target"), value);
110 actions.push_back(action);
111 obj.insert(QStringLiteral("potentialAction"), actions);
112
113 if (remove) {
114 obj.remove(QLatin1StringView(propName));
115 }
116}
117
118static void filterPlace(QJsonObject &obj)
119{
120 // convert text address to PostalAddress
121 if (const auto addr = obj.value("address"_L1); addr.isString()) {
122 obj.insert("address"_L1, QJsonObject{
123 {"@type"_L1, "PostalAddress"_L1},
124 {"streetAddress"_L1, addr.toString()},
125 });
126 }
127 // same for geo coordinates
128 std::optional<double> lat;
129 std::optional<double> lon;
130
131 const auto latValue = obj.value("latitude"_L1);
132 const auto lonValue = obj.value("longitude"_L1);
133 if (latValue.isDouble() && lonValue.isDouble()) {
134 lat = latValue.toDouble();
135 lon = lonValue.toDouble();
136 } else {
137 // Try to convert map links to geo coordinates.
138 for (const auto &key : {"hasMap"_L1, "maps"_L1, "map"_L1}) {
139 const QUrl url = QUrl(obj.value(key).toString());
140 if (!url.isValid()) {
141 continue;
142 }
143
144 const auto geo = LocationUtil::geoFromUrl(url);
145 if (!geo.isValid()) {
146 continue;
147 }
148
149 lat = geo.latitude();
150 lon = geo.longitude();
151 break;
152 }
153 }
154
155 if (lat.has_value() && lon.has_value()) {
156 auto geo = obj.value("geo"_L1).toObject();
157 if (!geo.contains("@type"_L1)) {
158 geo.insert("@type"_L1, "GeoCoordinates"_L1);
159 }
160 if (!geo.contains("latitude"_L1)) {
161 geo.insert("latitude"_L1, *lat);
162 }
163 if (!geo.contains("longitude"_L1)) {
164 geo.insert("longitude"_L1, *lon);
165 }
166 obj.insert("geo"_L1, geo);
167 }
168}
169
170static void filterFlight(QJsonObject &res)
171{
172 // move incomplete departureTime (ie. just ISO date, no time) to departureDay
173 if (res.value(QLatin1StringView("departureTime")).toString().size() == 10) {
174 JsonLd::renameProperty(res, "departureTime", "departureDay");
175 }
176}
177
178static void filterReservation(QJsonObject &res)
179{
180 // move ticketToken and ticketNumber to Ticket (Google vs. schema.org difference)
181 for (const auto key : {"ticketToken"_L1, "ticketNumber"_L1}) {
182 const auto v = res.value(key).toString();
183 if (!v.isEmpty()) {
184 auto ticket = res.value("reservedTicket"_L1).toObject();
185 if (ticket.isEmpty()) {
186 ticket.insert("@type"_L1, u"Ticket"_s);
187 }
188 if (!ticket.contains(key)) {
189 ticket.insert(key, v);
190 res.insert("reservedTicket"_L1, ticket);
191 res.remove(key);
192 }
193 }
194 }
195
196 // normalize reservationStatus enum
197 auto resStat = res.value("reservationStatus"_L1).toString();
198 if (!resStat.isEmpty()) {
199 if (resStat.startsWith("https:"_L1)) {
200 resStat.remove(4, 1);
201 }
202 if (!resStat.contains("/Reservation"_L1)) {
203 res.insert("reservationStatus"_L1, resStat.replace("http://schema.org/"_L1, "http://schema.org/Reservation"_L1));
204 }
205 }
206
207 // legacy properties
208 JsonLd::renameProperty(res, "programMembership", "programMembershipUsed");
209 JsonLd::renameProperty(res, "price", "totalPrice");
210
211 // legacy potentialAction property
212 JsonLd::renameProperty(res, "action", "potentialAction");
213
214 // move Google xxxUrl properties to Action instances
215 migrateToAction(res, "cancelReservationUrl", "CancelAction", true);
216 migrateToAction(res, "checkinUrl", "CheckInAction", true);
217 migrateToAction(res, "modifyReservationUrl", "UpdateAction", true);
218 migrateToAction(res, "ticketDownloadUrl", "DownloadAction", true);
219 migrateToAction(res, "url", "ViewAction", false);
220
221 // technically the wrong way (reservationId is the current schema.org standard), but hardly used anywhere (yet)
222 JsonLd::renameProperty(res, "reservationId", "reservationNumber");
223
224 // "typos"
225 JsonLd::renameProperty(res, "Url", "url");
226}
227
228static void filterFoodEstablishment(QJsonObject &restaurant)
229{
230 // This can be a bool, "Yes"/"No", or a URL.
231 auto reservationsValue =
232 restaurant.value(QLatin1StringView("acceptsReservations"));
233 if (reservationsValue.isString()) {
234 const QString reservations = reservationsValue.toString();
235 if (reservations == QLatin1StringView("Yes")) {
236 restaurant.insert(QLatin1StringView("acceptsReservations"), true);
237 } else if (reservations == QLatin1StringView("No")) {
238 restaurant.insert(QLatin1StringView("acceptsReservations"), false);
239 } else {
240 migrateToAction(restaurant, "acceptsReservations", "ReserveAction",
241 true);
242 }
243 }
244
245 filterPlace(restaurant);
246}
247
248static void filterActionTarget(QJsonObject &action)
249{
250 QJsonArray targets;
251 QString filteredTargetUrlString;
252
253 const QJsonValue oldTarget = action.value(QLatin1StringView("target"));
254 if (oldTarget.isArray()) {
255 targets = oldTarget.toArray();
256 } else if (oldTarget.isObject()) {
257 targets.push_back(oldTarget);
258 }
259
260 for (auto it = targets.begin(); it != targets.end(); ++it) {
261 auto target = (*it).toObject();
262
263 QJsonArray platforms;
264
265 const QJsonValue actionPlatform =
266 target.value(QLatin1StringView("actionPlatform"));
267 if (actionPlatform.isArray()) {
268 platforms = actionPlatform.toArray();
269 } else {
270 platforms.push_back(actionPlatform);
271 }
272
273 // Always return at least one URL but prefer the current platform if possible
274 if (!filteredTargetUrlString.isEmpty()) {
275 const bool hasPreferredPlatform = std::any_of(platforms.begin(), platforms.end(), [](const QJsonValue &platformValue) {
276 const QString platform = platformValue.toString();
277 // FIXME android
278 return platform == QLatin1StringView(
279 "http://schema.org/DesktopWebPlatform");
280 });
281
282 if (!hasPreferredPlatform) {
283 continue;
284 }
285 }
286
287 const QUrl url(
288 target.value(QLatin1StringView("urlTemplate")).toString());
289 // It could also be a "URL template"
290 if (!url.isValid()) {
291 continue;
292 }
293
294 filteredTargetUrlString = url.toString();
295 }
296
297 if (filteredTargetUrlString.isEmpty()) {
298 JsonLd::renameProperty(action, "url", "target");
299 } else {
300 action.insert(QStringLiteral("target"), filteredTargetUrlString);
301 }
302}
303
304static QJsonArray filterActions(const QJsonValue &v)
305{
306 QJsonArray actions;
307 if (v.isArray()) {
308 actions = v.toArray();
309 } else {
310 actions.push_back(v);
311 }
312
313 for (auto it = actions.begin(); it != actions.end(); ++it) {
314 auto action = (*it).toObject();
315 filterActionTarget(action);
316 *it = action;
317 }
318
319 return actions;
320}
321
322static void filterEvent(QJsonObject &obj)
323{
324 unpackArray(obj, "location"_L1);
325
326 // date only end: set time to end of day
327 if (const auto endDate = obj.value("endDate"_L1).toString(); endDate.size() == 10) {
328 const auto date = QDate::fromString(endDate, Qt::ISODate);
329 if (date.isValid()) {
330 obj.insert("endDate"_L1, date.endOfDay().toString(Qt::ISODate));
331 }
332 }
333}
334
335static void filterPostalAddress(QJsonObject &obj)
336{
337 // unpack country objects
338 auto country = obj.value(QLatin1StringView("addressCountry"));
339 if (country.isObject()) {
340 obj.insert(QLatin1StringView("addressCountry"),
341 country.toObject().value(QLatin1StringView("name")));
342 }
343}
344
345// filter functions applied to objects of the corresponding (already normalized) type
346// IMPORTANT: keep alphabetically sorted by type!
347static constexpr const JsonLdFilterEngine::TypeFilter type_filters[] = {
348 { "Event", filterEvent },
349 { "Flight", filterFlight },
350 { "FoodEstablishment", filterFoodEstablishment },
351 { "LocalBusiness", filterPlace },
352 { "LodgingBusiness", filterPlace },
353 { "Organization", filterPlace },
354 { "Place", filterPlace },
355 { "PostalAddress", filterPostalAddress },
356};
357
358// property renaming
359// IMPORTANT: keep alphabetically sorted by type!
360static constexpr const JsonLdFilterEngine::PropertyMapping property_mappings[] = {
361 { "BusTrip", "arrivalStation", "arrivalBusStop" },
362 { "BusTrip", "busCompany", "provider" },
363 { "BusTrip", "departureStation", "departureBusStop" },
364
365 // technically the wrong way around, but we still use the much more common old name
366 { "Flight", "provider", "airline" },
367
368 // check[in|out]Date -> check[in|out]Time (legacy Google format)
369 { "LodgingReservation", "checkinDate", "checkinTime" },
370 { "LodgingReservation", "checkoutDate", "checkoutTime" },
371
372 { "ProgramMembership", "program", "programName" },
373 { "ProgramMembership", "memberNumber", "membershipNumber" },
374
375 { "Reservation", "price", "totalPrice" },
376 { "Ticket", "price", "totalPrice" },
377
378 // move TrainTrip::trainCompany to TrainTrip::provider (as defined by schema.org)
379 { "TrainTrip", "trainCompany", "provider" },
380};
381
382static QJsonArray graphExpand(const QJsonObject &obj)
383{
384 QJsonArray result;
385
386 const auto graph = obj.value(QLatin1StringView("@graph")).toArray();
387 for (const auto &o : graph) {
388 const auto a = JsonLdImportFilter::filterObject(o.toObject());
389 std::copy(a.begin(), a.end(), std::back_inserter(result));
390 }
391
392 return result;
393}
394
396{
397 QStringList types;
398 const auto typeVal = obj.value(QLatin1StringView("@type"));
399 if (typeVal.isString()) {
400 types.push_back(typeVal.toString());
401 } else if (typeVal.isArray()) {
402 const auto typeNames = typeVal.toArray();
403 for (const auto &t : typeNames) {
404 if (t.isString()) {
405 types.push_back(t.toString());
406 }
407 }
408 }
409 // TODO consider additionalTypes property
410
411 if (types.isEmpty()) {
412 return graphExpand(obj);
413 }
414
415 QJsonArray results;
416
417 JsonLdFilterEngine filterEngine;
418 filterEngine.setTypeMappings(type_mapping);
419 filterEngine.setTypeFilters(type_filters);
420 filterEngine.setPropertyMappings(property_mappings);
421 for (const auto &type : types) {
422 QJsonObject res(obj);
423 res.insert(QStringLiteral("@type"), type);
424 filterEngine.filterRecursive(res);
425
426 // fold mainEntityOfPage into res
427 if (const auto mainEntityOfPage =
428 res.value(QLatin1StringView("mainEntityOfPage")).toObject();
429 !mainEntityOfPage.isEmpty()) {
430 res.remove(QLatin1StringView("mainEntityOfPage"));
431 for (auto it = mainEntityOfPage.begin(); it != mainEntityOfPage.end();
432 ++it) {
433 if (it.key().startsWith(QLatin1Char('@')) ||
434 res.contains(it.key())) {
435 continue;
436 }
437 res.insert(it.key(), it.value());
438 }
439 }
440
441 if (type.endsWith(QLatin1StringView("Reservation"))) {
442 filterReservation(res);
443 }
444
445 auto actions = res.value(QLatin1StringView("potentialAction"));
446 if (!actions.isUndefined()) {
447 res.insert(QStringLiteral("potentialAction"), filterActions(actions));
448 }
449
450 unpackArray(res, QLatin1StringView("image"));
451 const auto image = res.value(QLatin1StringView("image"));
452 if (image.isObject()) {
453 const auto imageObject = image.toObject();
454 if (JsonLd::typeName(imageObject) ==
455 QLatin1StringView("ImageObject")) {
456 res.insert(QStringLiteral("image"),
457 imageObject.value(QLatin1StringView("url")));
458 }
459 }
460
461 // unpack reservationFor array - multiply the result for each entry in here
462 const auto resFor = res.value(QLatin1StringView("reservationFor"));
463 if (const auto a = resFor.toArray(); !a.isEmpty()) {
464 for (const auto &entry : a) {
465 res.insert(QLatin1StringView("reservationFor"), entry);
466 results.push_back(res);
467 }
468 } else {
469 results.push_back(res);
470 }
471 }
472
473 return results;
474}
JSON-LD filtering for input normalization or type transforms.
void filterRecursive(QJsonObject &obj)
Recursively apply filtering rules to obj.
char * toString(const EngineQuery &query)
QJsonArray filterObject(const QJsonObject &obj)
Filter the top-level object obj for loading with JsonLdDocument.
QString typeName(const QJsonObject &obj)
Normalized type name from object.
void renameProperty(QJsonObject &obj, const char *oldName, const char *newName)
Rename a property, if present and the new name isn't in use already.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
GeoCoordinates geoFromUrl(const QUrl &url)
Parses geo coordinates from a given mapping service URLs, such as Google Maps links.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KI18NLOCALEDATA_EXPORT KCountry country(const char *ianaId)
QDate fromString(QStringView string, QStringView format, QCalendar cal)
iterator begin()
iterator end()
void push_back(const QJsonValue &value)
bool contains(QLatin1StringView key) const const
iterator insert(QLatin1StringView key, const QJsonValue &value)
void remove(QLatin1StringView key)
QJsonValue value(QLatin1StringView key) const const
bool isArray() const const
bool isObject() const const
bool isString() const const
QJsonArray toArray() const const
QJsonObject toObject() const const
QString toString() const const
bool isEmpty() const const
void push_back(parameter_type value)
bool isEmpty() const const
qsizetype size() const const
bool isValid() const const
QString toString(FormattingOptions options) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Sat Dec 21 2024 16:56:36 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.