KItinerary

jsonldimportfilter.cpp
1/*
2 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "jsonldimportfilter.h"
8#include "json/jsonld.h"
9#include "json/jsonldfilterengine.h"
10#include "logging.h"
11
12#include <QDate>
13#include <QDebug>
14#include <QJsonArray>
15#include <QJsonDocument>
16#include <QJsonObject>
17#include <QUrl>
18
19#include <cstring>
20
21using namespace Qt::Literals::StringLiterals;
22using namespace KItinerary;
23
24// type normalization from full schema.org type hierarchy to our simplified subset
25// IMPORTANT: keep alphabetically sorted by fromType!
26static constexpr const JsonLdFilterEngine::TypeMapping type_mapping[] = {
27 { "AutoDealer", "LocalBusiness" },
28 { "AutoRepair", "LocalBusiness" },
29 { "AutomotiveBusiness", "LocalBusiness" },
30 { "Bakery", "FoodEstablishment" },
31 { "BarOrPub", "FoodEstablishment" },
32 { "BedAndBreakfast", "LodgingBusiness" },
33 { "Brewery", "FoodEstablishment" },
34 { "BusStop", "BusStation" },
35 { "BusinessEvent", "Event" },
36 { "CafeOrCoffeeShop", "FoodEstablishment" },
37 { "Campground", "LodgingBusiness" },
38 { "ChildrensEvent", "Event" },
39 { "ComedyEvent", "Event" },
40 { "ComputerStore", "LocalBusiness" },
41 { "DanceEvent", "Event" },
42 { "Distillery", "FoodEstablishment" },
43 { "EditAction", "UpdateAction" },
44 { "EducationEvent", "Event" },
45 { "ElectronicsStore", "LocalBusiness" },
46 { "EntertainmentBusiness", "LocalBusiness" },
47 { "ExhibitionEvent", "Event" },
48 { "FastFoodRestaurant", "FoodEstablishment" },
49 { "Festival", "Event" },
50 { "HobbyShop", "LocalBusiness" },
51 { "HomeAndConstructionBusiness", "LocalBusiness" },
52 { "Hostel", "LodgingBusiness" },
53 { "Hotel", "LodgingBusiness" },
54 { "IceCreamShop", "FoodEstablishment" },
55 { "LiteraryEvent", "Event" },
56 { "Motel", "LodgingBusiness" },
57 { "MovieTheater", "LocalBusiness" },
58 { "MusicEvent", "Event" },
59 { "Resort", "LodgingBusiness" },
60 { "Restaurant", "FoodEstablishment" },
61 { "SaleEvent", "Event" },
62 { "ScreeningEvent", "Event" },
63 { "SocialEvent", "Event" },
64 { "SportsEvent", "Event" },
65 { "Store", "LocalBusiness" },
66 { "TheaterEvent", "Event" },
67 { "VisualArtsEvent", "Event" },
68 { "Winery", "FoodEstablishment" },
69};
70
71static void unpackArray(QJsonObject &obj, QLatin1StringView key) {
72 const auto val = obj.value(key);
73 if (!val.isArray()) {
74 return;
75 }
76 const auto arr = val.toArray();
77 if (arr.isEmpty()) {
78 return;
79 }
80 obj.insert(key, arr.at(0));
81}
82
83static void migrateToAction(QJsonObject &obj, const char *propName, const char *typeName, bool remove)
84{
85 const auto value = obj.value(QLatin1StringView(propName));
86 if (value.isNull() || value.isUndefined()) {
87 return;
88 }
89
90 const auto actionsVal = obj.value("potentialAction"_L1);
91 QJsonArray actions;
92 if (actionsVal.isArray()) {
93 actions = actionsVal.toArray();
94 } else if (actionsVal.isObject()) {
95 actions = { actionsVal };
96 }
97
98 for (const auto &act : actions) {
99 if (JsonLd::typeName(act.toObject()) == QLatin1StringView(typeName)) {
100 return;
101 }
102 }
103
104 QJsonObject action;
105 action.insert(QStringLiteral("@type"), QLatin1StringView(typeName));
106 action.insert(QStringLiteral("target"), value);
107 actions.push_back(action);
108 obj.insert(QStringLiteral("potentialAction"), actions);
109
110 if (remove) {
111 obj.remove(QLatin1StringView(propName));
112 }
113}
114
115static void filterPlace(QJsonObject &obj)
116{
117 // convert text address to PostalAddress
118 if (const auto addr = obj.value("address"_L1); addr.isString()) {
119 obj.insert("address"_L1, QJsonObject{
120 {"@type"_L1, "PostalAddress"_L1},
121 {"streetAddress"_L1, addr.toString()},
122 });
123 }
124 // same for geo coordinates
125 const auto lat = obj.value("latitude"_L1);
126 const auto lon = obj.value("longitude"_L1);
127 if (lat.isDouble() && lon.isDouble()) {
128 auto geo = obj.value("geo"_L1).toObject();
129 if (!geo.contains("@type"_L1)) {
130 geo.insert("@type"_L1, "GeoCoordinates"_L1);
131 }
132 if (!geo.contains("latitude"_L1)) {
133 geo.insert("latitude"_L1, lat);
134 }
135 if (!geo.contains("longitude"_L1)) {
136 geo.insert("longitude"_L1, lat);
137 }
138 obj.insert("geo"_L1, geo);
139 }
140}
141
142static void filterFlight(QJsonObject &res)
143{
144 // move incomplete departureTime (ie. just ISO date, no time) to departureDay
145 if (res.value(QLatin1StringView("departureTime")).toString().size() == 10) {
146 JsonLd::renameProperty(res, "departureTime", "departureDay");
147 }
148}
149
150static void filterReservation(QJsonObject &res)
151{
152 // move ticketToken and ticketNumber to Ticket (Google vs. schema.org difference)
153 for (const auto key : {"ticketToken"_L1, "ticketNumber"_L1}) {
154 const auto v = res.value(key).toString();
155 if (!v.isEmpty()) {
156 auto ticket = res.value("reservedTicket"_L1).toObject();
157 if (ticket.isEmpty()) {
158 ticket.insert("@type"_L1, u"Ticket"_s);
159 }
160 if (!ticket.contains(key)) {
161 ticket.insert(key, v);
162 res.insert("reservedTicket"_L1, ticket);
163 res.remove(key);
164 }
165 }
166 }
167
168 // normalize reservationStatus enum
169 auto resStat = res.value("reservationStatus"_L1).toString();
170 if (!resStat.isEmpty()) {
171 if (resStat.startsWith("https:"_L1)) {
172 resStat.remove(4, 1);
173 }
174 if (!resStat.contains("/Reservation"_L1)) {
175 res.insert("reservationStatus"_L1, resStat.replace("http://schema.org/"_L1, "http://schema.org/Reservation"_L1));
176 }
177 }
178
179 // legacy properties
180 JsonLd::renameProperty(res, "programMembership", "programMembershipUsed");
181
182 // legacy potentialAction property
183 JsonLd::renameProperty(res, "action", "potentialAction");
184
185 // move Google xxxUrl properties to Action instances
186 migrateToAction(res, "cancelReservationUrl", "CancelAction", true);
187 migrateToAction(res, "checkinUrl", "CheckInAction", true);
188 migrateToAction(res, "modifyReservationUrl", "UpdateAction", true);
189 migrateToAction(res, "ticketDownloadUrl", "DownloadAction", true);
190 migrateToAction(res, "url", "ViewAction", false);
191
192 // technically the wrong way (reservationId is the current schema.org standard), but hardly used anywhere (yet)
193 JsonLd::renameProperty(res, "reservationId", "reservationNumber");
194
195 // "typos"
196 JsonLd::renameProperty(res, "Url", "url");
197}
198
199static void filterFoodEstablishment(QJsonObject &restaurant)
200{
201 // This can be a bool, "Yes"/"No", or a URL.
202 auto reservationsValue =
203 restaurant.value(QLatin1StringView("acceptsReservations"));
204 if (reservationsValue.isString()) {
205 const QString reservations = reservationsValue.toString();
206 if (reservations == QLatin1StringView("Yes")) {
207 restaurant.insert(QLatin1StringView("acceptsReservations"), true);
208 } else if (reservations == QLatin1StringView("No")) {
209 restaurant.insert(QLatin1StringView("acceptsReservations"), false);
210 } else {
211 migrateToAction(restaurant, "acceptsReservations", "ReserveAction",
212 true);
213 }
214 }
215
216 filterPlace(restaurant);
217}
218
219static void filterActionTarget(QJsonObject &action)
220{
221 QJsonArray targets;
222 QString filteredTargetUrlString;
223
224 const QJsonValue oldTarget = action.value(QLatin1StringView("target"));
225 if (oldTarget.isArray()) {
226 targets = oldTarget.toArray();
227 } else if (oldTarget.isObject()) {
228 targets.push_back(oldTarget);
229 }
230
231 for (auto it = targets.begin(); it != targets.end(); ++it) {
232 auto target = (*it).toObject();
233
234 QJsonArray platforms;
235
236 const QJsonValue actionPlatform =
237 target.value(QLatin1StringView("actionPlatform"));
238 if (actionPlatform.isArray()) {
239 platforms = actionPlatform.toArray();
240 } else {
241 platforms.push_back(actionPlatform);
242 }
243
244 // Always return at least one URL but prefer the current platform if possible
245 if (!filteredTargetUrlString.isEmpty()) {
246 const bool hasPreferredPlatform = std::any_of(platforms.begin(), platforms.end(), [](const QJsonValue &platformValue) {
247 const QString platform = platformValue.toString();
248 // FIXME android
249 return platform == QLatin1StringView(
250 "http://schema.org/DesktopWebPlatform");
251 });
252
253 if (!hasPreferredPlatform) {
254 continue;
255 }
256 }
257
258 const QUrl url(
259 target.value(QLatin1StringView("urlTemplate")).toString());
260 // It could also be a "URL template"
261 if (!url.isValid()) {
262 continue;
263 }
264
265 filteredTargetUrlString = url.toString();
266 }
267
268 if (filteredTargetUrlString.isEmpty()) {
269 JsonLd::renameProperty(action, "url", "target");
270 } else {
271 action.insert(QStringLiteral("target"), filteredTargetUrlString);
272 }
273}
274
275static QJsonArray filterActions(const QJsonValue &v)
276{
277 QJsonArray actions;
278 if (v.isArray()) {
279 actions = v.toArray();
280 } else {
281 actions.push_back(v);
282 }
283
284 for (auto it = actions.begin(); it != actions.end(); ++it) {
285 auto action = (*it).toObject();
286 filterActionTarget(action);
287 *it = action;
288 }
289
290 return actions;
291}
292
293static void filterEvent(QJsonObject &obj)
294{
295 unpackArray(obj, "location"_L1);
296
297 // date only end: set time to end of day
298 if (const auto endDate = obj.value("endDate"_L1).toString(); endDate.size() == 10) {
299 const auto date = QDate::fromString(endDate, Qt::ISODate);
300 if (date.isValid()) {
301 obj.insert("endDate"_L1, date.endOfDay().toString(Qt::ISODate));
302 }
303 }
304}
305
306static void filterPostalAddress(QJsonObject &obj)
307{
308 // unpack country objects
309 auto country = obj.value(QLatin1StringView("addressCountry"));
310 if (country.isObject()) {
311 obj.insert(QLatin1StringView("addressCountry"),
312 country.toObject().value(QLatin1String("name")));
313 }
314}
315
316// filter functions applied to objects of the corresponding (already normalized) type
317// IMPORTANT: keep alphabetically sorted by type!
318static constexpr const JsonLdFilterEngine::TypeFilter type_filters[] = {
319 { "Event", filterEvent },
320 { "Flight", filterFlight },
321 { "FoodEstablishment", filterFoodEstablishment },
322 { "LocalBusiness", filterPlace },
323 { "Organization", filterPlace },
324 { "Place", filterPlace },
325 { "PostalAddress", filterPostalAddress },
326};
327
328// property renaming
329// IMPORTANT: keep alphabetically sorted by type!
330static constexpr const JsonLdFilterEngine::PropertyMapping property_mappings[] = {
331 { "BusTrip", "arrivalStation", "arrivalBusStop" },
332 { "BusTrip", "busCompany", "provider" },
333 { "BusTrip", "departureStation", "departureBusStop" },
334
335 // technically the wrong way around, but we still use the much more common old name
336 { "Flight", "provider", "airline" },
337
338 // check[in|out]Date -> check[in|out]Time (legacy Google format)
339 { "LodgingReservation", "checkinDate", "checkinTime" },
340 { "LodgingReservation", "checkoutDate", "checkoutTime" },
341
342 { "ProgramMembership", "program", "programName" },
343 { "ProgramMembership", "memberNumber", "membershipNumber" },
344
345 { "Reservation", "price", "totalPrice" },
346 { "Ticket", "price", "totalPrice" },
347
348 // move TrainTrip::trainCompany to TrainTrip::provider (as defined by schema.org)
349 { "TrainTrip", "trainCompany", "provider" },
350};
351
352static QJsonArray graphExpand(const QJsonObject &obj)
353{
354 QJsonArray result;
355
356 const auto graph = obj.value(QLatin1StringView("@graph")).toArray();
357 for (const auto &o : graph) {
358 const auto a = JsonLdImportFilter::filterObject(o.toObject());
359 std::copy(a.begin(), a.end(), std::back_inserter(result));
360 }
361
362 return result;
363}
364
366{
367 QStringList types;
368 const auto typeVal = obj.value(QLatin1StringView("@type"));
369 if (typeVal.isString()) {
370 types.push_back(typeVal.toString());
371 } else if (typeVal.isArray()) {
372 const auto typeNames = typeVal.toArray();
373 for (const auto &t : typeNames) {
374 if (t.isString()) {
375 types.push_back(t.toString());
376 }
377 }
378 }
379 // TODO consider additionalTypes property
380
381 if (types.isEmpty()) {
382 return graphExpand(obj);
383 }
384
385 QJsonArray results;
386
387 JsonLdFilterEngine filterEngine;
388 filterEngine.setTypeMappings(type_mapping);
389 filterEngine.setTypeFilters(type_filters);
390 filterEngine.setPropertyMappings(property_mappings);
391 for (const auto &type : types) {
392 QJsonObject res(obj);
393 res.insert(QStringLiteral("@type"), type);
394 filterEngine.filterRecursive(res);
395
396 // fold mainEntityOfPage into res
397 if (const auto mainEntityOfPage =
398 res.value(QLatin1StringView("mainEntityOfPage")).toObject();
399 !mainEntityOfPage.isEmpty()) {
400 res.remove(QLatin1StringView("mainEntityOfPage"));
401 for (auto it = mainEntityOfPage.begin(); it != mainEntityOfPage.end();
402 ++it) {
403 if (it.key().startsWith(QLatin1Char('@')) ||
404 res.contains(it.key())) {
405 continue;
406 }
407 res.insert(it.key(), it.value());
408 }
409 }
410
411 if (type.endsWith(QLatin1StringView("Reservation"))) {
412 filterReservation(res);
413 }
414
415 auto actions = res.value(QLatin1StringView("potentialAction"));
416 if (!actions.isUndefined()) {
417 res.insert(QStringLiteral("potentialAction"), filterActions(actions));
418 }
419
420 unpackArray(res, QLatin1StringView("image"));
421 const auto image = res.value(QLatin1StringView("image"));
422 if (image.isObject()) {
423 const auto imageObject = image.toObject();
424 if (JsonLd::typeName(imageObject) ==
425 QLatin1StringView("ImageObject")) {
426 res.insert(QStringLiteral("image"),
427 imageObject.value(QLatin1StringView("url")));
428 }
429 }
430
431 // unpack reservationFor array - multiply the result for each entry in here
432 const auto resFor = res.value(QLatin1StringView("reservationFor"));
433 if (const auto a = resFor.toArray(); !a.isEmpty()) {
434 for (const auto &entry : a) {
435 res.insert(QLatin1StringView("reservationFor"), entry);
436 results.push_back(res);
437 }
438 } else {
439 results.push_back(res);
440 }
441 }
442
443 return results;
444}
JSON-LD filtering for input normalization or type transforms.
void filterRecursive(QJsonObject &obj)
Recursively apply filtering rules to obj.
char * toString(const EngineQuery &query)
QJsonArray filterObject(const QJsonObject &obj)
Filter the top-level object obj for loading with JsonLdDocument.
QString typeName(const QJsonObject &obj)
Normalized type name from object.
void renameProperty(QJsonObject &obj, const char *oldName, const char *newName)
Rename a property, if present and the new name isn't in use already.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KI18NLOCALEDATA_EXPORT KCountry country(const char *ianaId)
QDate fromString(QStringView string, QStringView format, QCalendar cal)
iterator begin()
iterator end()
void push_back(const QJsonValue &value)
bool contains(QLatin1StringView key) const const
iterator insert(QLatin1StringView key, const QJsonValue &value)
void remove(QLatin1StringView key)
QJsonValue value(QLatin1StringView key) const const
bool isArray() const const
bool isObject() const const
QJsonArray toArray() const const
QJsonObject toObject() const const
QString toString() const const
bool isEmpty() const const
void push_back(parameter_type value)
bool isEmpty() const const
QString & remove(QChar ch, Qt::CaseSensitivity cs)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:14:49 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.