KItinerary

extractorpostprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "config-kitinerary.h"
8#include "extractorpostprocessor.h"
9#include "extractorpostprocessor_p.h"
10#include "extractorvalidator.h"
11#include "flightpostprocessor_p.h"
12#include "stringutil.h"
13
14#include "iata/iatabcbpparser.h"
15#include "jsonlddocument.h"
16#include "logging.h"
17#include "mergeutil.h"
18#include "sortutil.h"
19#include "text/addressparser_p.h"
20
21#include "knowledgedb/airportdb.h"
22#include "knowledgedb/timezonedb_p.h"
23#include "knowledgedb/trainstationdb.h"
24
25#include <KItinerary/Action>
26#include <KItinerary/BoatTrip>
27#include <KItinerary/BusTrip>
28#include <KItinerary/Event>
29#include <KItinerary/Flight>
30#include <KItinerary/Organization>
31#include <KItinerary/Person>
32#include <KItinerary/Place>
33#include <KItinerary/ProgramMembership>
34#include <KItinerary/RentalCar>
35#include <KItinerary/Reservation>
36#include <KItinerary/Taxi>
37#include <KItinerary/Ticket>
38#include <KItinerary/TrainTrip>
39#include <KItinerary/Visit>
40
41#include <KCountry>
42
43#include <QDebug>
44#include <QJsonArray>
45#include <QJsonDocument>
46#include <QTimeZone>
47#include <QUrl>
48
49#if HAVE_PHONENUMBER
50#include <phonenumbers/phonenumberutil.h>
51#endif
52
53#include <algorithm>
54#include <cstring>
55
56using namespace Qt::Literals::StringLiterals;
57using namespace KItinerary;
58
59ExtractorPostprocessor::ExtractorPostprocessor()
60 : d(new ExtractorPostprocessorPrivate)
61{
62}
63
64ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default;
66
67void ExtractorPostprocessor::process(const QList<QVariant> &data) {
68 d->m_resultFinalized = false;
69 d->m_data.reserve(d->m_data.size() + data.size());
70 for (auto elem : data) {
71 // reservation types
73 elem = d->processFlightReservation(elem.value<FlightReservation>());
74 } else if (JsonLd::isA<TrainReservation>(elem)) {
75 elem = d->processTrainReservation(elem.value<TrainReservation>());
76 } else if (JsonLd::isA<LodgingReservation>(elem)) {
77 elem = d->processLodgingReservation(elem.value<LodgingReservation>());
79 elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>());
80 } else if (JsonLd::isA<TouristAttractionVisit>(elem)) {
81 elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>());
82 } else if (JsonLd::isA<BusReservation>(elem)) {
83 elem = d->processBusReservation(elem.value<BusReservation>());
84 } else if (JsonLd::isA<BoatReservation>(elem)) {
85 elem = d->processBoatReservation(elem.value<BoatReservation>());
86 } else if (JsonLd::isA<EventReservation>(elem)) {
87 elem = d->processEventReservation(elem.value<EventReservation>());
88 } else if (JsonLd::isA<RentalCarReservation>(elem)) {
89 elem = d->processRentalCarReservation(elem.value<RentalCarReservation>());
90 } else if (JsonLd::isA<TaxiReservation>(elem)) {
91 elem = d->processTaxiReservation(elem.value<TaxiReservation>());
92 }
93
94 // "reservationFor" types
95 else if (JsonLd::isA<LodgingBusiness>(elem)) {
96 elem = d->processPlace(elem.value<LodgingBusiness>());
97 } else if (JsonLd::isA<FoodEstablishment>(elem)) {
98 elem = d->processPlace(elem.value<FoodEstablishment>());
99 } else if (JsonLd::isA<Event>(elem)) {
100 elem = d->processEvent(elem.value<Event>());
101 } else if (JsonLd::isA<LocalBusiness>(elem)) {
102 elem = d->processPlace(elem.value<LocalBusiness>());
103 }
104
105 // non-reservation types
106 else if (JsonLd::isA<ProgramMembership>(elem)) {
107 elem = d->processProgramMembership(elem.value<ProgramMembership>());
108 } else if (JsonLd::isA<Ticket>(elem)) {
109 elem = d->processTicket(elem.value<Ticket>());
110 }
111
112 d->mergeOrAppend(elem);
113 }
114}
115
117 if (!d->m_resultFinalized) {
118 // fold elements we have reservations for into those reservations
119 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
120 if (JsonLd::isA<Reservation>(*it)) {
121 ++it;
122 continue;
123 }
124
125 bool merged = false;
126 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
127 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor");
128 if (MergeUtil::isSame(resFor, *it)) {
129 JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it));
130 merged = true;
131 }
132 }
133
134 if (merged) {
135 it = d->m_data.erase(it);
136 } else {
137 ++it;
138 }
139 }
140
141 // search for "triangular" patterns, ie. a location change element that has a matching departure
142 // and matching arrival to two different other location change elements (A->C vs A->B + B->C).
143 // we remove those, as the fine-granular results are better
144 if (d->m_data.size() >= 3) {
145 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
146 auto depIt = it;
147 auto arrIt = it;
148 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
149 if (it == it2) {
150 continue;
151 }
152 if (MergeUtil::hasSameDeparture(*it, *it2)) {
153 depIt = it2;
154 }
155 if (MergeUtil::hasSameArrival(*it, *it2)) {
156 arrIt = it2;
157 }
158 }
159
160 if (depIt != it && arrIt != it && depIt != arrIt) {
161 it = d->m_data.erase(it);
162 } else {
163 ++it;
164 }
165 }
166 }
167
168 d->m_resultFinalized = true;
169 }
170
171 std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore);
172 return d->m_data;
173}
174
176{
177 d->m_contextDate = dt;
178}
179
180void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate)
181{
182}
183
184void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem)
185{
186 const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) {
187 return MergeUtil::isSame(elem, other);
188 });
189
190 if (it == m_data.end()) {
191 m_data.push_back(elem);
192 } else {
193 *it = MergeUtil::merge(*it, elem);
194 }
195}
196
197QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const
198{
199 // expand ticketToken for IATA BCBP data
200 const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString();
201 if (!bcbp.isEmpty()) {
202 const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate);
203 if (bcbpData.size() == 1) {
204 res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>();
205 // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example
206 res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber());
207 } else {
208 for (const auto &data : bcbpData) {
209 if (MergeUtil::isSame(res, data)) {
211 break;
212 }
213 }
214 }
215 }
216
217 res.setBoardingGroup(StringUtil::simplifiedNoPlaceholder(res.boardingGroup()));
218 if (res.reservationFor().isValid()) {
219 FlightPostProcessor p;
220 res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>()));
221 }
222 return processReservation(res);
223}
224
225TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const
226{
227 if (res.reservationFor().isValid()) {
228 res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>()));
229 }
230 return processReservation(res);
231}
232
233TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const
234{
235 trip.setArrivalPlatform(trip.arrivalPlatform().trimmed());
236 trip.setDeparturePlatform(trip.departurePlatform().trimmed());
237 trip.setDepartureStation(processTrainStation(trip.departureStation()));
238 trip.setArrivalStation(processTrainStation(trip.arrivalStation()));
239 trip.setDepartureTime(processTrainTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation()));
240 trip.setArrivalTime(processTrainTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation()));
241 trip.setTrainNumber(trip.trainNumber().simplified());
242 trip.setTrainName(trip.trainName().simplified());
243
244 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
245 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
246 if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) {
247 trip.setArrivalTime(trip.arrivalTime().addDays(1));
248 }
249
250 return trip;
251}
252
253static void applyStationData(const KnowledgeDb::TrainStation &record, TrainStation &station)
254{
255 if (!station.geo().isValid() && record.coordinate.isValid()) {
257 geo.setLatitude(record.coordinate.latitude);
258 geo.setLongitude(record.coordinate.longitude);
259 station.setGeo(geo);
260 }
261 auto addr = station.address();
262 if (addr.addressCountry().isEmpty() && record.country.isValid()) {
263 addr.setAddressCountry(record.country.toString());
264 station.setAddress(addr);
265 }
266}
267
268static void applyStationCountry(const QString &isoCode, TrainStation &station)
269{
270 auto addr = station.address();
271 if (addr.addressCountry().isEmpty()) {
272 addr.setAddressCountry(isoCode.toUpper());
273 station.setAddress(addr);
274 }
275}
276
277TrainStation ExtractorPostprocessorPrivate::processTrainStation(TrainStation station) const
278{
279 const auto id = station.identifier();
280 if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
281 station.setIdentifier(QString());
282 } else if (id.startsWith("sncf:"_L1) && id.size() == 10) {
284 applyStationData(record, station);
285 applyStationCountry(id.mid(5, 2).toUpper(), station);
286 } else if (id.startsWith("ibnr:"_L1) && id.size() == 12) {
287 const auto record = KnowledgeDb::stationForIbnr(KnowledgeDb::IBNR{id.mid(5).toUInt()});
288 applyStationData(record, station);
289 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort()) .toString();
290 applyStationCountry(country, station);
291 } else if (id.startsWith("uic:"_L1) && id.size() == 11) {
292 const auto record = KnowledgeDb::stationForUic( KnowledgeDb::UICStation{id.mid(4).toUInt()});
293 applyStationData(record, station);
294 const auto country = KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort()) .toString();
295 applyStationCountry(country, station);
296 } else if (id.startsWith("ir:"_L1) && id.size() > 4) {
297 const auto record = KnowledgeDb::stationForIndianRailwaysStationCode(id.mid(3));
298 applyStationData(record, station);
299 } else if (id.startsWith("benerail:"_L1) && id.size() == 14) {
301 applyStationData(record, station);
302 applyStationCountry(id.mid(9, 2).toUpper(), station);
303 } else if (id.startsWith("vrfi:"_L1) && id.size() >= 7 && id.size() <= 9) {
305 applyStationData(record, station);
306 } else if (id.startsWith("iata:"_L1) && id.size() == 8) {
307 const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5));
308 const auto record = KnowledgeDb::stationForIataCode(iataCode);
309 applyStationData(record, station);
310 // fall back to the airport with the matching IATA code for the country
311 // information we cannot use the coordinate though, as that points to the
312 // actual airport, not the station
313 applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(), station);
314 } else if (id.startsWith("amtrak:"_L1) && id.size() == 10) {
315 const auto record = KnowledgeDb::stationForAmtrakStationCode(KnowledgeDb::AmtrakStationCode(QStringView(id).mid(7)));
316 applyStationData(record, station);
317 } else if (id.startsWith("via:"_L1) && id.size() == 8) {
318 const auto record = KnowledgeDb::stationForViaRailStationCode(KnowledgeDb::ViaRailStationCode(QStringView(id).mid(4)));
319 applyStationData(record, station);
320 } else if (id.startsWith("uk:"_L1) && id.size() == 6) {
321 const auto record = KnowledgeDb::stationForUkRailwayStationCode(KnowledgeDb::UKRailwayStationCode(QStringView(id).mid(3)));
322 applyStationData(record, station);
323 }
324
325 return processPlace(station);
326}
327
328QDateTime ExtractorPostprocessorPrivate::processTrainTripTime(QDateTime dt, QDate departureDay, const TrainStation& station) const
329{
330 if (!dt.isValid()) {
331 return dt;
332 }
333
334 if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day
335 dt.setDate(departureDay);
336 }
337 return processTimeForLocation(dt, station);
338}
339
340BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const
341{
342 if (res.reservationFor().isValid()) {
343 res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>()));
344 }
345 return processReservation(res);
346}
347
348BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const
349{
350 trip.setDepartureBusStop(processPlace(trip.departureBusStop()));
351 trip.setArrivalBusStop(processPlace(trip.arrivalBusStop()));
352 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBusStop()));
353 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBusStop()));
354 trip.setBusNumber(trip.busNumber().simplified());
355 trip.setBusName(trip.busName().simplified());
356 return trip;
357}
358
359BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const
360{
361 if (res.reservationFor().isValid()) {
362 res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>()));
363 }
364 return processReservation(res);
365}
366
367BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const
368{
369 trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal()));
370 trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal()));
371 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal()));
372 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal()));
373
374 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
375 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
376 if (duration < 0 && duration > -3600*24) {
377 trip.setArrivalTime(trip.arrivalTime().addDays(1));
378 }
379
380 return trip;
381}
382
383LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const
384{
385 if (res.reservationFor().isValid()) {
386 res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>()));
387 res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>()));
388 res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>()));
389 }
390 return processReservation(res);
391}
392
393TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const
394{
395 res.setPickupLocation(processPlace(res.pickupLocation()));
396 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
397 return processReservation(res);
398}
399
400RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const
401{
402 if (res.reservationFor().isValid()) {
403 res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>()));
404 }
405 res.setPickupLocation(processPlace(res.pickupLocation()));
406 res.setDropoffLocation(processPlace(res.dropoffLocation()));
407 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
408 res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation()));
409 return processReservation(res);
410}
411
412RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const
413{
414 car.setName(car.name().trimmed());
415 return car;
416}
417
418FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const
419{
420 if (res.reservationFor().isValid()) {
421 res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>()));
422 res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>()));
423 res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>()));
424 }
425 return processReservation(res);
426}
427
428TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const
429{
430 visit.setTouristAttraction(processPlace(visit.touristAttraction()));
431 visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction()));
432 visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction()));
433 return visit;
434}
435
436EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const
437{
438 if (res.reservationFor().isValid()) {
439 res.setReservationFor(processEvent(res.reservationFor().value<Event>()));
440 }
441 return processReservation(res);
442}
443
444KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const
445{
446 event.setName(StringUtil::clean(event.name()));
447
448 // normalize location to be a Place
449 if (JsonLd::isA<PostalAddress>(event.location())) {
450 Place place;
451 place.setAddress(event.location().value<PostalAddress>());
452 event.setLocation(place);
453 }
454
455 if (JsonLd::isA<Place>(event.location())) {
456 event.setLocation(processPlace(event.location().value<Place>()));
457
458 // try to obtain timezones if we have a location
459 event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>()));
460 event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>()));
461 event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>()));
462 }
463
464 return event;
465}
466
467static QString processCurrency(const QString &currency)
468{
469 if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) {
470 return {};
471 }
472 return currency;
473}
474
475Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const
476{
477 ticket.setName(StringUtil::clean(ticket.name()));
478 ticket.setTicketNumber(ticket.ticketNumber().simplified());
479 ticket.setUnderName(processPerson(ticket.underName()));
480 ticket.setTicketedSeat(processSeat(ticket.ticketedSeat()));
481 ticket.setPriceCurrency(processCurrency(ticket.priceCurrency()));
482 return ticket;
483}
484
485ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const
486{
487 // remove empty dummy entries found eg. in ERA FCB data
488 if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) {
489 program.setProgramName(QString());
490 }
491
492 program.setProgramName(program.programName().simplified());
493 // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input
494 if (program.programName().isEmpty() && !program.programName().isNull()) {
495 program.setProgramName(QString());
496 }
497 program.setMember(processPerson(program.member()));
498 return program;
499}
500
501Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const
502{
503 seat.setSeatSection(seat.seatSection().simplified());
504 seat.setSeatRow(seat.seatRow().simplified());
505 seat.setSeatNumber(seat.seatNumber().simplified());
506 seat.setSeatingType(seat.seatingType().simplified());
507 return seat;
508}
509
510template <typename T>
511T ExtractorPostprocessorPrivate::processReservation(T res) const
512{
513 res.setUnderName(processPerson(res.underName().template value<Person>()));
514 res.setPotentialAction(processActions(res.potentialAction()));
515 res.setReservationNumber(res.reservationNumber().trimmed());
516 res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed()));
517 res.setPriceCurrency(processCurrency(res.priceCurrency()));
518
519 if (JsonLd::isA<Ticket>(res.reservedTicket())) {
520 res.setReservedTicket(processTicket(res.reservedTicket().template value<Ticket>()));
521 }
522 return res;
523}
524
525static constexpr const char* name_prefixes[] = {
526 "DR", "MR", "MRS", "MS"
527};
528
529static bool isSeparator(QChar c)
530{
531 return c == QLatin1Char(' ') || c == QLatin1Char('/');
532}
533
534static QString simplifyNamePart(QString n)
535{
536 n = n.simplified();
537
538 for (auto prefix : name_prefixes) {
539 const int prefixLen = std::strlen(prefix);
540 if (n.size() > prefixLen + 2 &&
541 n.startsWith(QLatin1StringView(prefix, prefixLen),
543 isSeparator(n[prefixLen])) {
544 return n.mid(prefixLen + 1);
545 }
546 if (n.size() > prefixLen + 2 &&
547 n.endsWith(QLatin1StringView(prefix, prefixLen),
549 isSeparator(n[n.size() - prefixLen - 1])) {
550 return n.left(n.size() - prefixLen - 1);
551 }
552 }
553
554 return n;
555}
556
557KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const
558{
559 person.setName(simplifyNamePart(person.name()));
560 person.setFamilyName(simplifyNamePart(person.familyName()));
561 person.setGivenName(simplifyNamePart(person.givenName()));
562
563 // fill name with name parts, if it's empty
564 if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName())
565 && !person.familyName().isEmpty() && !person.givenName().isEmpty())
566 {
567 person.setName(person.givenName() + QLatin1Char(' ') + person.familyName());
568 }
569
570 return person;
571}
572
573PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo)
574{
575 addr.setAddressCountry(addr.addressCountry().simplified());
576
577 // convert to ISO 3166-1 alpha-2 country codes
578 if (addr.addressCountry().size() > 2) {
579 QString alpha2Code;
580
581 // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus
582 if (addr.addressCountry().size() == 3) {
583 alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2();
584 }
585 if (alpha2Code.isEmpty()) {
586 alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2();
587 }
588 if (!alpha2Code.isEmpty()) {
589 addr.setAddressCountry(alpha2Code);
590 }
591 }
592
593 // upper case country codes
594 if (addr.addressCountry().size() == 2) {
595 addr.setAddressCountry(addr.addressCountry().toUpper());
596 }
597
598 // normalize strings
599 addr.setStreetAddress(addr.streetAddress().simplified());
600 addr.setPostalCode(addr.postalCode().simplified());
601 addr.setAddressLocality(addr.addressLocality().simplified());
602 addr.setAddressRegion(addr.addressRegion().simplified());
603
604#if HAVE_PHONENUMBER
605 // recover country from phone number, if we have that
606 if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) {
607 const auto phoneStr = phoneNumber.toStdString();
608 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
609 i18n::phonenumbers::PhoneNumber number;
610 if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
611 std::string isoCode;
612 util->GetRegionCodeForNumber(number, &isoCode);
613 if (!isoCode.empty() && isoCode != "ZZ") {
614 addr.setAddressCountry(QString::fromStdString(isoCode));
615 }
616 }
617 }
618#endif
619
620 if (geo.isValid() && addr.addressCountry().size() != 2) {
621 const auto country = KCountry::fromLocation(geo.latitude(), geo.longitude());
622 if (country.isValid()) {
623 addr.setAddressCountry(country.alpha2());
624 }
625 }
626
627 AddressParser addrParser;
628 addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2());
629 addrParser.parse(addr);
630 addr = addrParser.result();
631 return addr;
632}
633
634QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr)
635{
636#if HAVE_PHONENUMBER
637 // or complete the phone number if we know the country
638 if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) {
639 auto phoneStr = phoneNumber.toStdString();
640 const auto isoCode = addr.addressCountry().toStdString();
641 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
642 i18n::phonenumbers::PhoneNumber number;
643 if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
644 if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) {
645 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr);
646 return QString::fromStdString(phoneStr);
647 }
648 }
649 }
650#else
651 Q_UNUSED(addr)
652#endif
653 return phoneNumber.simplified();
654}
655
656QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const
657{
658 // remove non-actions and actions with invalid URLs
659 QUrl viewUrl;
660 for (auto it = actions.begin(); it != actions.end();) {
661 if (!JsonLd::canConvert<Action>(*it)) {
662 it = actions.erase(it);
663 continue;
664 }
665
666 const auto action = JsonLd::convert<Action>(*it);
667 if (!action.target().isValid()) {
668 it = actions.erase(it);
669 continue;
670 }
671
672 if (JsonLd::isA<ViewAction>(*it)) {
673 viewUrl = action.target();
674 }
675 ++it;
676 }
677
678 // normalize the order, so JSON comparison still yields correct results
679 std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) {
680 return strcmp(lhs.typeName(), rhs.typeName()) < 0;
681 });
682
683 // remove actions that don't actually have their own target, or duplicates
684 QUrl prevUrl;
685 const char* prevType = nullptr;
686 for (auto it = actions.begin(); it != actions.end();) {
687 const auto action = JsonLd::convert<Action>(*it);
688 const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false);
689 if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) {
690 prevUrl = action.target();
691 prevType = (*it).typeName();
692 ++it;
693 } else {
694 it = actions.erase(it);
695 }
696 }
697
698 return actions;
699}
700
701template <typename T>
702QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const
703{
704 if (!dt.isValid() ) {
705 return dt;
706 }
707 if ((dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) {
708 if (KnowledgeDb::isPlausibleTimeZone(dt.timeZone(), place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion())) {
709 return dt;
710 }
711 // drop timezones where we are sure they don't match the location
713 }
714
715 const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion());
716 if (!tz.isValid()) {
717 return dt;
718 }
719
720 // prefer our timezone over externally provided UTC offset, if they match
721 if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) {
722 qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt);
723 return dt;
724 }
725
726 if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) {
727 dt.setTimeZone(tz);
728 } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) {
729 dt = dt.toTimeZone(tz);
730 }
731 return dt;
732}
static KCountry fromLocation(float latitude, float longitude)
static KCountry fromName(QStringView name)
static KCountry fromQLocale(QLocale::Country country)
QString alpha2() const
static KCountry fromAlpha3(const char *alpha3Code)
A boat or ferry reservation.
A boat or ferry trip.
Definition boattrip.h:23
A bus reservation.
A bus trip.
Definition bustrip.h:22
An event reservation.
An event.
Definition event.h:21
Post-process extracted data to filter out garbage and augment data from other sources.
void setContextDate(const QDateTime &dt)
The date the reservation(s) processed here have been made, if known.
QList< QVariant > result() const
This returns the final result of all previously executed processing steps followed by sorting and fil...
void setValidationEnabled(bool validate)
Enable or disable validation.
void process(const QList< QVariant > &data)
This will normalize and augment the given data elements and merge them with already added data elemen...
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
Food-related business (such as a restaurant, or a bakery).
Geographic coordinates.
Definition place.h:23
static void writeProperty(QVariant &obj, const char *name, const QVariant &value)
Set property name on object obj to value value.
static QVariant apply(const QVariant &lhs, const QVariant &rhs)
Apply all properties of rhs on to lhs.
static QVariant readProperty(const QVariant &obj, const char *name)
Read property name on object obj.
constexpr bool isValid() const
Returns true if this is a valid identifier.
Definition alphaid.h:56
QString toString() const
Returns a string representation of this identifier.
Definition alphaid.h:75
A hotel reservation.
Definition reservation.h:77
static QVariant merge(const QVariant &lhs, const QVariant &rhs)
Merge the two given objects.
static bool hasSameArrival(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same arrival.
static bool isSame(const QVariant &lhs, const QVariant &rhs)
Checks if two Reservation or Trip values refer to the same booking element.
static bool hasSameDeparture(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same departure.
Base class for places.
Definition place.h:69
QString identifier
Identifier.
Definition place.h:85
Postal address.
Definition place.h:46
QString addressCountry
The country this address is in, as ISO 3166-1 alpha 2 code.
Definition place.h:53
A frequent traveler, bonus points or discount scheme program membership.
A Rental Car reservation.
A car rental.
Definition rentalcar.h:22
A reserved seat.
Definition ticket.h:23
A Taxi reservation.
A booked ticket.
Definition ticket.h:41
A train reservation.
Train station.
Definition place.h:126
A train trip.
Definition traintrip.h:24
QDate departureDay
The scheduled day of departure.
Definition traintrip.h:42
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
char * toString(const EngineQuery &query)
KIOCORE_EXPORT QString number(KIO::filesize_t size)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
Parses the bar coded boarding pass message message into a list of FlightReservation instances.
bool isA(const QVariant &value)
Returns true if value is of type T.
Definition datatypes.h:24
bool canConvert(const QVariant &value)
Checks if the given value can be up-cast to T.
Definition datatypes.h:31
T convert(const QVariant &value)
Up-cast value to T.
Definition datatypes.h:47
AlphaId< UnalignedNumber< 3 >, 4 > ViaRailStationCode
Via Rail station code.
TrainStation stationForViaRailStationCode(ViaRailStationCode code)
Lookup train station data by Via Rail station code.
CountryId countryIdForUicCode(uint16_t uicCountryCode)
Look up country ISO code from a UIC country code.
Definition countrydb.cpp:82
TrainStation stationForUkRailwayStationCode(UKRailwayStationCode code)
Lookup train station data by UK railway station code.
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
TrainStation stationForIbnr(IBNR ibnr)
Lookup train station data by IBNR.
TrainStation stationForAmtrakStationCode(AmtrakStationCode code)
Lookup train station data by Amtrak station code.
AlphaId< uint16_t, 3 > UKRailwayStationCode
UK railway station code.
TrainStation stationForUic(UICStation uic)
Lookup train station data by UIC station id.
TrainStation stationForIndianRailwaysStationCode(const QString &code)
Lookup train station data by Indian Railways station code.
TrainStation stationForSncfStationId(SncfStationId sncfId)
Lookup train station data by SNCF station id.
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition airportdb.cpp:50
TrainStation stationForVRStationCode(VRStationCode vrStation)
Lookup train station data by VR (Finland) station code.
TrainStation stationForIataCode(IataCode iataCode)
Lookup train station data by IATA location code.
TrainStation stationForBenerailId(BenerailStationId id)
Lookup train station data by Benerail station identifier.
AlphaId< uint16_t, 3 > AmtrakStationCode
Amtrak staion codes.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
bool isBefore(const QVariant &lhs, const QVariant &rhs)
Sorting function for top-level reservation/visit/event elements.
Definition sortutil.cpp:169
QString clean(const QString &s)
Cleans up extra white spaces and XML entities from s.
QString simplifiedNoPlaceholder(const QString &s)
Same as QString::simplified() and dropping everything that just contains punctuation or dash characer...
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
QStringView country(QStringView ifopt)
bool isValid(int year, int month, int day)
int year() const const
QDateTime addDays(qint64 ndays) const const
QDate date() const const
bool isValid() const const
int offsetFromUtc() const const
qint64 secsTo(const QDateTime &other) const const
void setDate(QDate date)
void setTimeZone(const QTimeZone &toZone)
Qt::TimeSpec timeSpec() const const
QTimeZone timeZone() const const
QDateTime toTimeZone(const QTimeZone &timeZone) const const
iterator begin()
iterator end()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromStdString(const std::string &str)
bool isEmpty() const const
bool isNull() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString simplified() const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
std::string toStdString() const const
QString toUpper() const const
QString trimmed() const const
CaseInsensitive
TimeZone
QTimeZone utc()
bool isValid() const const
T value() const const
Train station entry in the station table.
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 24 2025 11:52:35 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.