KItinerary

extractorpostprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "config-kitinerary.h"
8#include "extractorpostprocessor.h"
9#include "extractorpostprocessor_p.h"
10#include "extractorvalidator.h"
11#include "flightpostprocessor_p.h"
12#include "stringutil.h"
13
14#include "iata/iatabcbpparser.h"
15#include "jsonlddocument.h"
16#include "logging.h"
17#include "mergeutil.h"
18#include "sortutil.h"
19#include "text/addressparser_p.h"
20
21#include "knowledgedb/airportdb.h"
22#include "knowledgedb/timezonedb_p.h"
23#include "knowledgedb/trainstationdb.h"
24
25#include <KItinerary/Action>
26#include <KItinerary/BoatTrip>
27#include <KItinerary/BusTrip>
28#include <KItinerary/Event>
29#include <KItinerary/Flight>
30#include <KItinerary/Organization>
31#include <KItinerary/Person>
32#include <KItinerary/Place>
33#include <KItinerary/ProgramMembership>
34#include <KItinerary/RentalCar>
35#include <KItinerary/Reservation>
36#include <KItinerary/Taxi>
37#include <KItinerary/Ticket>
38#include <KItinerary/TrainTrip>
39#include <KItinerary/Visit>
40
41#include <KCountry>
42
43#include <QDebug>
44#include <QJsonArray>
45#include <QJsonDocument>
46#include <QTimeZone>
47#include <QUrl>
48
49#if HAVE_PHONENUMBER
50#include <phonenumbers/phonenumberutil.h>
51#endif
52
53#include <algorithm>
54#include <cmath>
55#include <cstring>
56
57using namespace Qt::Literals::StringLiterals;
58using namespace KItinerary;
59
60ExtractorPostprocessor::ExtractorPostprocessor()
61 : d(new ExtractorPostprocessorPrivate)
62{
63}
64
65ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default;
67
68void ExtractorPostprocessor::process(const QList<QVariant> &data) {
69 d->m_resultFinalized = false;
70 d->m_data.reserve(d->m_data.size() + data.size());
71 for (auto elem : data) {
72 // reservation types
74 elem = d->processFlightReservation(elem.value<FlightReservation>());
75 } else if (JsonLd::isA<TrainReservation>(elem)) {
76 elem = d->processTrainReservation(elem.value<TrainReservation>());
77 } else if (JsonLd::isA<LodgingReservation>(elem)) {
78 elem = d->processLodgingReservation(elem.value<LodgingReservation>());
80 elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>());
81 } else if (JsonLd::isA<TouristAttractionVisit>(elem)) {
82 elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>());
83 } else if (JsonLd::isA<BusReservation>(elem)) {
84 elem = d->processBusReservation(elem.value<BusReservation>());
85 } else if (JsonLd::isA<BoatReservation>(elem)) {
86 elem = d->processBoatReservation(elem.value<BoatReservation>());
87 } else if (JsonLd::isA<EventReservation>(elem)) {
88 elem = d->processEventReservation(elem.value<EventReservation>());
89 } else if (JsonLd::isA<RentalCarReservation>(elem)) {
90 elem = d->processRentalCarReservation(elem.value<RentalCarReservation>());
91 } else if (JsonLd::isA<TaxiReservation>(elem)) {
92 elem = d->processTaxiReservation(elem.value<TaxiReservation>());
93 }
94
95 // "reservationFor" types
96 else if (JsonLd::isA<LodgingBusiness>(elem)) {
97 elem = d->processPlace(elem.value<LodgingBusiness>());
98 } else if (JsonLd::isA<FoodEstablishment>(elem)) {
99 elem = d->processPlace(elem.value<FoodEstablishment>());
100 } else if (JsonLd::isA<Event>(elem)) {
101 elem = d->processEvent(elem.value<Event>());
102 } else if (JsonLd::isA<LocalBusiness>(elem)) {
103 elem = d->processPlace(elem.value<LocalBusiness>());
104 }
105
106 // non-reservation types
107 else if (JsonLd::isA<ProgramMembership>(elem)) {
108 elem = d->processProgramMembership(elem.value<ProgramMembership>());
109 } else if (JsonLd::isA<Ticket>(elem)) {
110 elem = d->processTicket(elem.value<Ticket>());
111 }
112
113 d->mergeOrAppend(elem);
114 }
115}
116
117[[nodiscard]] static QVariant mergeTicket(QVariant lhs, const QVariant &rhs)
118{
119 const auto rhsTicket = JsonLdDocument::readProperty(rhs, "reservedTicket");
120 const auto lhsTicket = JsonLdDocument::readProperty(lhs, "reservedTicket");
121 JsonLdDocument::writeProperty(lhs, "reservedTicket", MergeUtil::merge(lhsTicket, rhsTicket));
122 return lhs;
123}
124
126 if (!d->m_resultFinalized) {
127 // fold elements we have reservations for into those reservations
128 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
129 if (JsonLd::isA<Reservation>(*it)) {
130 ++it;
131 continue;
132 }
133
134 bool merged = false;
135 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
136 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor");
137 if (MergeUtil::isSame(resFor, *it)) {
138 JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it));
139 merged = true;
140 }
141 }
142
143 if (merged) {
144 it = d->m_data.erase(it);
145 } else {
146 ++it;
147 }
148 }
149
150 // search for "triangular" patterns, ie. a location change element that has a matching departure
151 // and matching arrival to two different other location change elements (A->C vs A->B + B->C).
152 // we remove those, as the fine-granular results are better
153 if (d->m_data.size() >= 3) {
154 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
155 auto depIt = it;
156 auto arrIt = it;
157 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
158 if (it == it2) {
159 continue;
160 }
161 if (MergeUtil::hasSameDeparture(*it, *it2)) {
162 depIt = it2;
163 }
164 if (MergeUtil::hasSameArrival(*it, *it2)) {
165 arrIt = it2;
166 }
167 }
168
169 if (depIt != it && arrIt != it && depIt != arrIt) {
170 (*depIt) = mergeTicket(*depIt, *it);
171 (*arrIt) = mergeTicket(*arrIt, *it);
172 it = d->m_data.erase(it);
173 } else {
174 ++it;
175 }
176 }
177 }
178
179 d->m_resultFinalized = true;
180 }
181
182 std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore);
183 return d->m_data;
184}
185
187{
188 d->m_contextDate = dt;
189}
190
191void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate)
192{
193}
194
195void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem)
196{
197 const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) {
198 return MergeUtil::isSame(elem, other);
199 });
200
201 if (it == m_data.end()) {
202 m_data.push_back(elem);
203 } else {
204 *it = MergeUtil::merge(*it, elem);
205 }
206}
207
208QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const
209{
210 // expand ticketToken for IATA BCBP data
211 const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString();
212 if (!bcbp.isEmpty()) {
213 const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate);
214 if (bcbpData.size() == 1) {
215 res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>();
216 // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example
217 res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber());
218 } else {
219 for (const auto &data : bcbpData) {
220 if (MergeUtil::isSame(res, data)) {
222 break;
223 }
224 }
225 }
226 }
227
228 res.setBoardingGroup(StringUtil::simplifiedNoPlaceholder(res.boardingGroup()));
229 if (res.reservationFor().isValid()) {
230 FlightPostProcessor p;
231 res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>()));
232 }
233 return processReservation(res);
234}
235
236TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const
237{
238 if (res.reservationFor().isValid()) {
239 res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>()));
240 }
241 return processReservation(res);
242}
243
244TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const
245{
246 trip.setArrivalPlatform(trip.arrivalPlatform().trimmed());
247 trip.setDeparturePlatform(trip.departurePlatform().trimmed());
248 trip.setDepartureStation(processStation(trip.departureStation()));
249 trip.setArrivalStation(processStation(trip.arrivalStation()));
250 trip.setDepartureTime(processTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation()));
251 trip.setArrivalTime(processTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation()));
252 trip.setTrainNumber(trip.trainNumber().simplified());
253 trip.setTrainName(trip.trainName().simplified());
254
255 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
256 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
257 if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) {
258 trip.setArrivalTime(trip.arrivalTime().addDays(1));
259 }
260
261 return trip;
262}
263
264template <typename T>
265static void applyStationData(const KnowledgeDb::TrainStation &record, T &station)
266{
267 if (!station.geo().isValid() && record.coordinate.isValid()) {
269 geo.setLatitude(record.coordinate.latitude);
270 geo.setLongitude(record.coordinate.longitude);
271 station.setGeo(geo);
272 }
273 auto addr = station.address();
274 if (addr.addressCountry().isEmpty() && record.country.isValid()) {
275 addr.setAddressCountry(record.country.toString());
276 station.setAddress(addr);
277 }
278}
279
280template <typename T>
281static void applyStationCountry(const QString &isoCode, T &station)
282{
283 auto addr = station.address();
284 if (addr.addressCountry().isEmpty()) {
285 addr.setAddressCountry(isoCode.toUpper());
286 station.setAddress(addr);
287 }
288}
289
290template<typename T>
291T ExtractorPostprocessorPrivate::processStation(T station)
292{
293 const auto id = station.identifier();
294 if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
295 station.setIdentifier(QString());
296 } else if (id.startsWith("sncf:"_L1) && id.size() == 10) {
298 applyStationData(record, station);
299 applyStationCountry(id.mid(5, 2).toUpper(), station);
300 } else if (id.startsWith("ibnr:"_L1) && id.size() == 12) {
301 const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()});
302 applyStationData(record, station);
303 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort()) .toString();
304 applyStationCountry(country, station);
305 } else if (id.startsWith("uic:"_L1) && id.size() == 11) {
306 const auto record = KnowledgeDb::stationForUic( KnowledgeDb::UICStation{id.mid(4).toUInt()});
307 applyStationData(record, station);
308 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort()) .toString();
309 applyStationCountry(country, station);
310 } else if (id.startsWith("ir:"_L1) && id.size() > 4) {
311 const auto record = KnowledgeDb::stationForIndianRailwaysStationCode(id.mid(3));
312 applyStationData(record, station);
313 } else if (id.startsWith("benerail:"_L1) && id.size() == 14) {
315 applyStationData(record, station);
316 applyStationCountry(id.mid(9, 2).toUpper(), station);
317 } else if (id.startsWith("vrfi:"_L1) && id.size() >= 7 && id.size() <= 9) {
319 applyStationData(record, station);
320 } else if (id.startsWith("iata:"_L1) && id.size() == 8) {
321 const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5));
322 const auto record = KnowledgeDb::stationForIataCode(iataCode);
323 applyStationData(record, station);
324 // fall back to the airport with the matching IATA code for the country
325 // information we cannot use the coordinate though, as that points to the
326 // actual airport, not the station
327 applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(), station);
328 } else if (id.startsWith("amtrak:"_L1) && id.size() == 10) {
329 const auto record = KnowledgeDb::stationForAmtrakStationCode(KnowledgeDb::AmtrakStationCode(QStringView(id).mid(7)));
330 applyStationData(record, station);
331 } else if (id.startsWith("via:"_L1) && id.size() == 8) {
332 const auto record = KnowledgeDb::stationForViaRailStationCode(KnowledgeDb::ViaRailStationCode(QStringView(id).mid(4)));
333 applyStationData(record, station);
334 } else if (id.startsWith("uk:"_L1) && id.size() == 6) {
335 const auto record = KnowledgeDb::stationForUkRailwayStationCode(KnowledgeDb::UKRailwayStationCode(QStringView(id).mid(3)));
336 applyStationData(record, station);
337 }
338
339 return processPlace(station);
340}
341
342template <typename T>
343QDateTime ExtractorPostprocessorPrivate::processTripTime(QDateTime dt, QDate departureDay, const T& place) const
344{
345 if (!dt.isValid()) {
346 return dt;
347 }
348
349 if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day
350 dt.setDate(departureDay);
351 }
352 return processTimeForLocation(dt, place);
353}
354
355BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const
356{
357 if (res.reservationFor().isValid()) {
358 res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>()));
359 }
360 return processReservation(res);
361}
362
363BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const
364{
365 trip.setDepartureBusStop(processStation(trip.departureBusStop()));
366 trip.setArrivalBusStop(processStation(trip.arrivalBusStop()));
367 trip.setDepartureTime(processTripTime(trip.departureTime(), trip.departureDay(), trip.departureBusStop()));
368 trip.setArrivalTime(processTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalBusStop()));
369 trip.setBusNumber(trip.busNumber().simplified());
370 trip.setBusName(trip.busName().simplified());
371 return trip;
372}
373
374BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const
375{
376 if (res.reservationFor().isValid()) {
377 res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>()));
378 }
379 return processReservation(res);
380}
381
382BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const
383{
384 trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal()));
385 trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal()));
386 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal()));
387 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal()));
388
389 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
390 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
391 if (duration < 0 && duration > -3600*24) {
392 trip.setArrivalTime(trip.arrivalTime().addDays(1));
393 }
394
395 return trip;
396}
397
398LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const
399{
400 if (res.reservationFor().isValid()) {
401 res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>()));
402 res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>()));
403 res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>()));
404 }
405 return processReservation(res);
406}
407
408TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const
409{
410 res.setPickupLocation(processPlace(res.pickupLocation()));
411 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
412 return processReservation(res);
413}
414
415RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const
416{
417 if (res.reservationFor().isValid()) {
418 res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>()));
419 }
420 res.setPickupLocation(processPlace(res.pickupLocation()));
421 res.setDropoffLocation(processPlace(res.dropoffLocation()));
422 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
423 res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation()));
424 return processReservation(res);
425}
426
427RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const
428{
429 car.setName(car.name().trimmed());
430 return car;
431}
432
433FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const
434{
435 if (res.reservationFor().isValid()) {
436 res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>()));
437 res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>()));
438 res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>()));
439 }
440 return processReservation(res);
441}
442
443TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const
444{
445 visit.setTouristAttraction(processPlace(visit.touristAttraction()));
446 visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction()));
447 visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction()));
448 return visit;
449}
450
451EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const
452{
453 if (res.reservationFor().isValid()) {
454 res.setReservationFor(processEvent(res.reservationFor().value<Event>()));
455 }
456 return processReservation(res);
457}
458
459KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const
460{
461 event.setName(StringUtil::clean(event.name()));
462
463 // normalize location to be a Place
464 if (JsonLd::isA<PostalAddress>(event.location())) {
465 Place place;
466 place.setAddress(event.location().value<PostalAddress>());
467 event.setLocation(place);
468 }
469
470 if (JsonLd::isA<Place>(event.location())) {
471 event.setLocation(processPlace(event.location().value<Place>()));
472
473 // try to obtain timezones if we have a location
474 event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>()));
475 event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>()));
476 event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>()));
477 }
478
479 return event;
480}
481
482static QString processCurrency(const QString &currency)
483{
484 if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) {
485 return {};
486 }
487 return currency;
488}
489
490Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const
491{
492 ticket.setName(StringUtil::clean(ticket.name()));
493 ticket.setTicketNumber(ticket.ticketNumber().simplified());
494 ticket.setUnderName(processPerson(ticket.underName()));
495 ticket.setTicketedSeat(processSeat(ticket.ticketedSeat()));
496 ticket.setPriceCurrency(processCurrency(ticket.priceCurrency()));
497 return ticket;
498}
499
500ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const
501{
502 // remove empty dummy entries found eg. in ERA FCB data
503 if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) {
504 program.setProgramName(QString());
505 }
506
507 program.setProgramName(program.programName().simplified());
508 // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input
509 if (program.programName().isEmpty() && !program.programName().isNull()) {
510 program.setProgramName(QString());
511 }
512 program.setMember(processPerson(program.member()));
513 return program;
514}
515
516Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const
517{
518 seat.setSeatSection(seat.seatSection().simplified());
519 seat.setSeatRow(seat.seatRow().simplified());
520 seat.setSeatNumber(seat.seatNumber().simplified());
521 seat.setSeatingType(seat.seatingType().simplified());
522 return seat;
523}
524
525template <typename T>
526T ExtractorPostprocessorPrivate::processReservation(T res) const
527{
528 res.setUnderName(processPerson(res.underName().template value<Person>()));
529 res.setPotentialAction(processActions(res.potentialAction()));
530 res.setReservationNumber(res.reservationNumber().trimmed());
531 res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed()));
532 res.setPriceCurrency(processCurrency(res.priceCurrency()));
533
534 if (JsonLd::isA<Ticket>(res.reservedTicket())) {
535 // move information that can exist in Ticket and Reservation up to the latter
536 auto ticket = processTicket(res.reservedTicket().template value<Ticket>());
537 if (res.underName().isNull() && !ticket.name().isEmpty()) {
538 res.setUnderName(ticket.underName());
539 ticket.setUnderName({});
540 } else if (ticket.underName() == res.underName().template value<Person>()) {
541 ticket.setUnderName({});
542 }
543
544 if ((!res.priceCurrency().isEmpty() && res.priceCurrency() == ticket.priceCurrency())
545 && (!std::isnan(res.totalPrice()) && res.totalPrice() == ticket.totalPrice())) {
546 ticket.setPriceCurrency({});
547 ticket.setTotalPrice(NAN);
548 }
549
550 res.setReservedTicket(processTicket(ticket));
551 }
552 return res;
553}
554
555static constexpr const char* name_prefixes[] = {
556 "DR", "MR", "MRS", "MS"
557};
558
559static bool isSeparator(QChar c)
560{
561 return c == QLatin1Char(' ') || c == QLatin1Char('/');
562}
563
564static QString simplifyNamePart(QString n)
565{
566 n = n.simplified();
567
568 for (auto prefix : name_prefixes) {
569 const int prefixLen = std::strlen(prefix);
570 if (n.size() > prefixLen + 2 &&
571 n.startsWith(QLatin1StringView(prefix, prefixLen),
573 isSeparator(n[prefixLen])) {
574 return n.mid(prefixLen + 1);
575 }
576 if (n.size() > prefixLen + 2 &&
577 n.endsWith(QLatin1StringView(prefix, prefixLen),
579 isSeparator(n[n.size() - prefixLen - 1])) {
580 return n.left(n.size() - prefixLen - 1);
581 }
582 }
583
584 return n;
585}
586
587KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const
588{
589 person.setName(simplifyNamePart(person.name()));
590 person.setFamilyName(simplifyNamePart(person.familyName()));
591 person.setGivenName(simplifyNamePart(person.givenName()));
592
593 // fill name with name parts, if it's empty
594 if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName())
595 && !person.familyName().isEmpty() && !person.givenName().isEmpty())
596 {
597 person.setName(person.givenName() + QLatin1Char(' ') + person.familyName());
598 }
599
600 return person;
601}
602
603PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo)
604{
605 addr.setAddressCountry(addr.addressCountry().simplified());
606
607 // convert to ISO 3166-1 alpha-2 country codes
608 if (addr.addressCountry().size() > 2) {
609 QString alpha2Code;
610
611 // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus
612 if (addr.addressCountry().size() == 3) {
613 alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2();
614 }
615 if (alpha2Code.isEmpty()) {
616 alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2();
617 }
618 if (!alpha2Code.isEmpty()) {
619 addr.setAddressCountry(alpha2Code);
620 }
621 }
622
623 // upper case country codes
624 if (addr.addressCountry().size() == 2) {
625 addr.setAddressCountry(addr.addressCountry().toUpper());
626 }
627
628 // normalize strings
629 addr.setStreetAddress(addr.streetAddress().simplified());
630 addr.setPostalCode(addr.postalCode().simplified());
631 addr.setAddressLocality(addr.addressLocality().simplified());
632 addr.setAddressRegion(addr.addressRegion().simplified());
633
634#if HAVE_PHONENUMBER
635 // recover country from phone number, if we have that
636 if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) {
637 const auto phoneStr = phoneNumber.toStdString();
638 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
639 i18n::phonenumbers::PhoneNumber number;
640 if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
641 std::string isoCode;
642 util->GetRegionCodeForNumber(number, &isoCode);
643 if (!isoCode.empty() && isoCode != "ZZ") {
644 addr.setAddressCountry(QString::fromStdString(isoCode));
645 }
646 }
647 }
648#endif
649
650 if (geo.isValid() && addr.addressCountry().size() != 2) {
651 const auto country = KCountry::fromLocation(geo.latitude(), geo.longitude());
652 if (country.isValid()) {
653 addr.setAddressCountry(country.alpha2());
654 }
655 }
656
657 AddressParser addrParser;
658 addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2());
659 addrParser.parse(addr);
660 addr = addrParser.result();
661 return addr;
662}
663
664QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr)
665{
666#if HAVE_PHONENUMBER
667 // or complete the phone number if we know the country
668 if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) {
669 auto phoneStr = phoneNumber.toStdString();
670 const auto isoCode = addr.addressCountry().toStdString();
671 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
672 i18n::phonenumbers::PhoneNumber number;
673 if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
674 if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) {
675 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr);
676 return QString::fromStdString(phoneStr);
677 }
678 }
679 }
680#else
681 Q_UNUSED(addr)
682#endif
683 return phoneNumber.simplified();
684}
685
686QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const
687{
688 // remove non-actions and actions with invalid URLs
689 QUrl viewUrl;
690 for (auto it = actions.begin(); it != actions.end();) {
691 if (!JsonLd::canConvert<Action>(*it)) {
692 it = actions.erase(it);
693 continue;
694 }
695
696 const auto action = JsonLd::convert<Action>(*it);
697 if (!action.target().isValid()) {
698 it = actions.erase(it);
699 continue;
700 }
701
702 if (JsonLd::isA<ViewAction>(*it)) {
703 viewUrl = action.target();
704 }
705 ++it;
706 }
707
708 // normalize the order, so JSON comparison still yields correct results
709 std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) {
710 return strcmp(lhs.typeName(), rhs.typeName()) < 0;
711 });
712
713 // remove actions that don't actually have their own target, or duplicates
714 QUrl prevUrl;
715 const char* prevType = nullptr;
716 for (auto it = actions.begin(); it != actions.end();) {
717 const auto action = JsonLd::convert<Action>(*it);
718 const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false);
719 if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) {
720 prevUrl = action.target();
721 prevType = (*it).typeName();
722 ++it;
723 } else {
724 it = actions.erase(it);
725 }
726 }
727
728 return actions;
729}
730
731template <typename T>
732QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const
733{
734 if (!dt.isValid() ) {
735 return dt;
736 }
737 if ((dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) {
738 if (KnowledgeDb::isPlausibleTimeZone(dt.timeZone(), place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion())) {
739 return dt;
740 }
741 // drop timezones where we are sure they don't match the location
743 }
744
745 const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion());
746 if (!tz.isValid()) {
747 return dt;
748 }
749
750 // prefer our timezone over externally provided UTC offset, if they match
751 if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) {
752 qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt);
753 return dt;
754 }
755
756 if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) {
757 dt.setTimeZone(tz);
758 } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) {
759 dt = dt.toTimeZone(tz);
760 }
761 return dt;
762}
static KCountry fromLocation(float latitude, float longitude)
bool isValid() const
static KCountry fromName(QStringView name)
static KCountry fromQLocale(QLocale::Country country)
QString alpha2() const
static KCountry fromAlpha3(const char *alpha3Code)
A boat or ferry reservation.
A boat or ferry trip.
Definition boattrip.h:23
A bus reservation.
A bus trip.
Definition bustrip.h:22
QDate departureDay
The scheduled day of departure.
Definition bustrip.h:40
An event reservation.
An event.
Definition event.h:21
Post-process extracted data to filter out garbage and augment data from other sources.
void setContextDate(const QDateTime &dt)
The date the reservation(s) processed here have been made, if known.
QList< QVariant > result() const
This returns the final result of all previously executed processing steps followed by sorting and fil...
void setValidationEnabled(bool validate)
Enable or disable validation.
void process(const QList< QVariant > &data)
This will normalize and augment the given data elements and merge them with already added data elemen...
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
Food-related business (such as a restaurant, or a bakery).
Geographic coordinates.
Definition place.h:23
static void writeProperty(QVariant &obj, const char *name, const QVariant &value)
Set property name on object obj to value value.
static QVariant apply(const QVariant &lhs, const QVariant &rhs)
Apply all properties of rhs on to lhs.
static QVariant readProperty(const QVariant &obj, const char *name)
Read property name on object obj.
constexpr bool isValid() const
Returns true if this is a valid identifier.
Definition alphaid.h:56
QString toString() const
Returns a string representation of this identifier.
Definition alphaid.h:75
A hotel reservation.
Definition reservation.h:77
static QVariant merge(const QVariant &lhs, const QVariant &rhs)
Merge the two given objects.
static bool hasSameArrival(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same arrival.
static bool isSame(const QVariant &lhs, const QVariant &rhs)
Checks if two Reservation or Trip values refer to the same booking element.
static bool hasSameDeparture(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same departure.
Base class for places.
Definition place.h:69
Postal address.
Definition place.h:46
QString addressCountry
The country this address is in, as ISO 3166-1 alpha 2 code.
Definition place.h:53
A frequent traveler, bonus points or discount scheme program membership.
A Rental Car reservation.
A car rental.
Definition rentalcar.h:22
A reserved seat.
Definition ticket.h:23
A Taxi reservation.
A booked ticket.
Definition ticket.h:41
A train reservation.
A train trip.
Definition traintrip.h:24
QDate departureDay
The scheduled day of departure.
Definition traintrip.h:42
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
char * toString(const EngineQuery &query)
KIOCORE_EXPORT QString number(KIO::filesize_t size)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
Parses the bar coded boarding pass message message into a list of FlightReservation instances.
bool isA(const QVariant &value)
Returns true if value is of type T.
Definition datatypes.h:24
bool canConvert(const QVariant &value)
Checks if the given value can be up-cast to T.
Definition datatypes.h:31
T convert(const QVariant &value)
Up-cast value to T.
Definition datatypes.h:47
AlphaId< UnalignedNumber< 3 >, 4 > ViaRailStationCode
Via Rail station code.
TrainStation stationForViaRailStationCode(ViaRailStationCode code)
Lookup train station data by Via Rail station code.
CountryId countryIdForUicCode(uint16_t uicCountryCode)
Look up country ISO code from a UIC country code.
Definition countrydb.cpp:82
TrainStation stationForUkRailwayStationCode(UKRailwayStationCode code)
Lookup train station data by UK railway station code.
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
TrainStation stationForIbnr(IBNR ibnr)
Lookup train station data by IBNR.
TrainStation stationForAmtrakStationCode(AmtrakStationCode code)
Lookup train station data by Amtrak station code.
AlphaId< uint16_t, 3 > UKRailwayStationCode
UK railway station code.
TrainStation stationForUic(UICStation uic)
Lookup train station data by UIC station id.
TrainStation stationForIndianRailwaysStationCode(const QString &code)
Lookup train station data by Indian Railways station code.
TrainStation stationForSncfStationId(SncfStationId sncfId)
Lookup train station data by SNCF station id.
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition airportdb.cpp:50
TrainStation stationForVRStationCode(VRStationCode vrStation)
Lookup train station data by VR (Finland) station code.
TrainStation stationForIataCode(IataCode iataCode)
Lookup train station data by IATA location code.
TrainStation stationForBenerailId(BenerailStationId id)
Lookup train station data by Benerail station identifier.
AlphaId< uint16_t, 3 > AmtrakStationCode
Amtrak staion codes.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
bool isBefore(const QVariant &lhs, const QVariant &rhs)
Sorting function for top-level reservation/visit/event elements.
Definition sortutil.cpp:169
QString clean(const QString &s)
Cleans up extra white spaces and XML entities from s.
QString simplifiedNoPlaceholder(const QString &s)
Same as QString::simplified() and dropping everything that just contains punctuation or dash characer...
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KI18NLOCALEDATA_EXPORT KCountry country(const char *ianaId)
bool isValid(int year, int month, int day)
int year() const const
QDateTime addDays(qint64 ndays) const const
QDate date() const const
bool isValid() const const
int offsetFromUtc() const const
qint64 secsTo(const QDateTime &other) const const
void setDate(QDate date)
void setTimeZone(const QTimeZone &toZone)
Qt::TimeSpec timeSpec() const const
QTimeZone timeZone() const const
QDateTime toTimeZone(const QTimeZone &timeZone) const const
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromStdString(const std::string &str)
bool isEmpty() const const
bool isNull() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString simplified() const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
std::string toStdString() const const
QString toUpper() const const
QString trimmed() const const
CaseInsensitive
TimeZone
QTimeZone utc()
bool isValid() const const
T value() const const
Train station entry in the station table.
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Mar 28 2025 11:59:49 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.