KItinerary

extractorpostprocessor.cpp
1/*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "config-kitinerary.h"
8#include "extractorpostprocessor.h"
9#include "extractorpostprocessor_p.h"
10#include "extractorvalidator.h"
11#include "flightpostprocessor_p.h"
12#include "stringutil.h"
13
14#include "iata/iatabcbpparser.h"
15#include "jsonlddocument.h"
16#include "logging.h"
17#include "mergeutil.h"
18#include "sortutil.h"
19#include "text/addressparser_p.h"
20
21#include "knowledgedb/airportdb.h"
22#include "knowledgedb/timezonedb_p.h"
23#include "knowledgedb/trainstationdb.h"
24
25#include <KItinerary/Action>
26#include <KItinerary/BoatTrip>
27#include <KItinerary/BusTrip>
28#include <KItinerary/Event>
29#include <KItinerary/Flight>
30#include <KItinerary/Organization>
31#include <KItinerary/Person>
32#include <KItinerary/Place>
33#include <KItinerary/ProgramMembership>
34#include <KItinerary/RentalCar>
35#include <KItinerary/Reservation>
36#include <KItinerary/Taxi>
37#include <KItinerary/Ticket>
38#include <KItinerary/TrainTrip>
39#include <KItinerary/Visit>
40
41#include <KCountry>
42
43#include <QDebug>
44#include <QJsonArray>
45#include <QJsonDocument>
46#include <QTimeZone>
47#include <QUrl>
48
49#if HAVE_PHONENUMBER
50#include <phonenumbers/phonenumberutil.h>
51#endif
52
53#include <algorithm>
54#include <cstring>
55
56using namespace KItinerary;
57
58ExtractorPostprocessor::ExtractorPostprocessor()
59 : d(new ExtractorPostprocessorPrivate)
60{
61}
62
63ExtractorPostprocessor::ExtractorPostprocessor(ExtractorPostprocessor &&) noexcept = default;
65
66void ExtractorPostprocessor::process(const QList<QVariant> &data) {
67 d->m_resultFinalized = false;
68 d->m_data.reserve(d->m_data.size() + data.size());
69 for (auto elem : data) {
70 // reservation types
72 elem = d->processFlightReservation(elem.value<FlightReservation>());
73 } else if (JsonLd::isA<TrainReservation>(elem)) {
74 elem = d->processTrainReservation(elem.value<TrainReservation>());
75 } else if (JsonLd::isA<LodgingReservation>(elem)) {
76 elem = d->processLodgingReservation(elem.value<LodgingReservation>());
78 elem = d->processFoodEstablishmentReservation(elem.value<FoodEstablishmentReservation>());
79 } else if (JsonLd::isA<TouristAttractionVisit>(elem)) {
80 elem = d->processTouristAttractionVisit(elem.value<TouristAttractionVisit>());
81 } else if (JsonLd::isA<BusReservation>(elem)) {
82 elem = d->processBusReservation(elem.value<BusReservation>());
83 } else if (JsonLd::isA<BoatReservation>(elem)) {
84 elem = d->processBoatReservation(elem.value<BoatReservation>());
85 } else if (JsonLd::isA<EventReservation>(elem)) {
86 elem = d->processEventReservation(elem.value<EventReservation>());
87 } else if (JsonLd::isA<RentalCarReservation>(elem)) {
88 elem = d->processRentalCarReservation(elem.value<RentalCarReservation>());
89 } else if (JsonLd::isA<TaxiReservation>(elem)) {
90 elem = d->processTaxiReservation(elem.value<TaxiReservation>());
91 }
92
93 // "reservationFor" types
94 else if (JsonLd::isA<LodgingBusiness>(elem)) {
95 elem = d->processPlace(elem.value<LodgingBusiness>());
96 } else if (JsonLd::isA<FoodEstablishment>(elem)) {
97 elem = d->processPlace(elem.value<FoodEstablishment>());
98 } else if (JsonLd::isA<Event>(elem)) {
99 elem = d->processEvent(elem.value<Event>());
100 } else if (JsonLd::isA<LocalBusiness>(elem)) {
101 elem = d->processPlace(elem.value<LocalBusiness>());
102 }
103
104 // non-reservation types
105 else if (JsonLd::isA<ProgramMembership>(elem)) {
106 elem = d->processProgramMembership(elem.value<ProgramMembership>());
107 } else if (JsonLd::isA<Ticket>(elem)) {
108 elem = d->processTicket(elem.value<Ticket>());
109 }
110
111 d->mergeOrAppend(elem);
112 }
113}
114
116 if (!d->m_resultFinalized) {
117 // fold elements we have reservations for into those reservations
118 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
119 if (JsonLd::isA<Reservation>(*it)) {
120 ++it;
121 continue;
122 }
123
124 bool merged = false;
125 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
126 const auto resFor = JsonLdDocument::readProperty(*it2, "reservationFor");
127 if (MergeUtil::isSame(resFor, *it)) {
128 JsonLdDocument::writeProperty(*it2, "reservationFor", MergeUtil::merge(resFor, *it));
129 merged = true;
130 }
131 }
132
133 if (merged) {
134 it = d->m_data.erase(it);
135 } else {
136 ++it;
137 }
138 }
139
140 // search for "triangular" patterns, ie. a location change element that has a matching departure
141 // and matching arrival to two different other location change elements (A->C vs A->B + B->C).
142 // we remove those, as the fine-granular results are better
143 if (d->m_data.size() >= 3) {
144 for (auto it = d->m_data.begin(); it != d->m_data.end();) {
145 auto depIt = it;
146 auto arrIt = it;
147 for (auto it2 = d->m_data.begin(); it2 != d->m_data.end(); ++it2) {
148 if (it == it2) {
149 continue;
150 }
151 if (MergeUtil::hasSameDeparture(*it, *it2)) {
152 depIt = it2;
153 }
154 if (MergeUtil::hasSameArrival(*it, *it2)) {
155 arrIt = it2;
156 }
157 }
158
159 if (depIt != it && arrIt != it && depIt != arrIt) {
160 it = d->m_data.erase(it);
161 } else {
162 ++it;
163 }
164 }
165 }
166
167 d->m_resultFinalized = true;
168 }
169
170 std::stable_sort(d->m_data.begin(), d->m_data.end(), SortUtil::isBefore);
171 return d->m_data;
172}
173
175{
176 d->m_contextDate = dt;
177}
178
179void ExtractorPostprocessor::setValidationEnabled([[maybe_unused]] bool validate)
180{
181}
182
183void ExtractorPostprocessorPrivate::mergeOrAppend(const QVariant &elem)
184{
185 const auto it = std::find_if(m_data.begin(), m_data.end(), [elem](const QVariant &other) {
186 return MergeUtil::isSame(elem, other);
187 });
188
189 if (it == m_data.end()) {
190 m_data.push_back(elem);
191 } else {
192 *it = MergeUtil::merge(*it, elem);
193 }
194}
195
196QVariant ExtractorPostprocessorPrivate::processFlightReservation(FlightReservation res) const
197{
198 // expand ticketToken for IATA BCBP data
199 const auto bcbp = res.reservedTicket().value<Ticket>().ticketTokenData().toString();
200 if (!bcbp.isEmpty()) {
201 const auto bcbpData = IataBcbpParser::parse(bcbp, m_contextDate);
202 if (bcbpData.size() == 1) {
203 res = JsonLdDocument::apply(bcbpData.at(0), res).value<FlightReservation>();
204 // standardize on the BCBP booking reference, not some secondary one we might have in structured data for example
205 res.setReservationNumber(bcbpData.at(0).value<FlightReservation>().reservationNumber());
206 } else {
207 for (const auto &data : bcbpData) {
208 if (MergeUtil::isSame(res, data)) {
210 break;
211 }
212 }
213 }
214 }
215
216 if (res.reservationFor().isValid()) {
217 FlightPostProcessor p;
218 res.setReservationFor(p.processFlight(res.reservationFor().value<Flight>()));
219 }
220 return processReservation(res);
221}
222
223TrainReservation ExtractorPostprocessorPrivate::processTrainReservation(TrainReservation res) const
224{
225 if (res.reservationFor().isValid()) {
226 res.setReservationFor(processTrainTrip(res.reservationFor().value<TrainTrip>()));
227 }
228 return processReservation(res);
229}
230
231TrainTrip ExtractorPostprocessorPrivate::processTrainTrip(TrainTrip trip) const
232{
233 trip.setArrivalPlatform(trip.arrivalPlatform().trimmed());
234 trip.setDeparturePlatform(trip.departurePlatform().trimmed());
235 trip.setDepartureStation(processTrainStation(trip.departureStation()));
236 trip.setArrivalStation(processTrainStation(trip.arrivalStation()));
237 trip.setDepartureTime(processTrainTripTime(trip.departureTime(), trip.departureDay(), trip.departureStation()));
238 trip.setArrivalTime(processTrainTripTime(trip.arrivalTime(), trip.departureDay(), trip.arrivalStation()));
239 trip.setTrainNumber(trip.trainNumber().simplified());
240 trip.setTrainName(trip.trainName().simplified());
241
242 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
243 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
244 if (duration < 0 && duration > -3600*24 && trip.departureTime().timeSpec() == trip.arrivalTime().timeSpec()) {
245 trip.setArrivalTime(trip.arrivalTime().addDays(1));
246 }
247
248 return trip;
249}
250
251static void applyStationData(const KnowledgeDb::TrainStation &record, TrainStation &station)
252{
253 if (!station.geo().isValid() && record.coordinate.isValid()) {
255 geo.setLatitude(record.coordinate.latitude);
256 geo.setLongitude(record.coordinate.longitude);
257 station.setGeo(geo);
258 }
259 auto addr = station.address();
260 if (addr.addressCountry().isEmpty() && record.country.isValid()) {
261 addr.setAddressCountry(record.country.toString());
262 station.setAddress(addr);
263 }
264}
265
266static void applyStationCountry(const QString &isoCode, TrainStation &station)
267{
268 auto addr = station.address();
269 if (addr.addressCountry().isEmpty()) {
270 addr.setAddressCountry(isoCode.toUpper());
271 station.setAddress(addr);
272 }
273}
274
275TrainStation ExtractorPostprocessorPrivate::processTrainStation(TrainStation station) const
276{
277 const auto id = station.identifier();
278 if (id.isEmpty()) { // empty -> null cleanup, to have more compact json-ld output
279 station.setIdentifier(QString());
280 } else if (id.startsWith(QLatin1StringView("sncf:")) && id.size() == 10) {
281 const auto record = KnowledgeDb::stationForSncfStationId(
282 KnowledgeDb::SncfStationId{id.mid(5)});
283 applyStationData(record, station);
284 applyStationCountry(id.mid(5, 2).toUpper(), station);
285 } else if (id.startsWith(QLatin1StringView("ibnr:")) && id.size() == 12) {
286 const auto record =
288 applyStationData(record, station);
289 const auto country =
290 KnowledgeDb::countryIdForUicCode(QStringView(id).mid(5, 2).toUShort())
291 .toString();
292 applyStationCountry(country, station);
293 } else if (id.startsWith(QLatin1StringView("uic:")) && id.size() == 11) {
294 const auto record = KnowledgeDb::stationForUic(
295 KnowledgeDb::UICStation{id.mid(4).toUInt()});
296 applyStationData(record, station);
297 const auto country =
298 KnowledgeDb::countryIdForUicCode(QStringView(id).mid(4, 2).toUShort())
299 .toString();
300 applyStationCountry(country, station);
301 } else if (id.startsWith(QLatin1StringView("ir:")) && id.size() > 4) {
302 const auto record =
304 applyStationData(record, station);
305 } else if (id.startsWith(QLatin1StringView("benerail:")) &&
306 id.size() == 14) {
307 const auto record = KnowledgeDb::stationForBenerailId(
309 applyStationData(record, station);
310 applyStationCountry(id.mid(9, 2).toUpper(), station);
311 } else if (id.startsWith(QLatin1StringView("vrfi:")) && id.size() >= 7 &&
312 id.size() <= 9) {
313 const auto record = KnowledgeDb::stationForVRStationCode(
314 KnowledgeDb::VRStationCode(id.mid(5)));
315 applyStationData(record, station);
316 } else if (id.startsWith(QLatin1StringView("iata:")) && id.size() == 8) {
317 const auto iataCode = KnowledgeDb::IataCode(QStringView(id).mid(5));
318 const auto record = KnowledgeDb::stationForIataCode(iataCode);
319 applyStationData(record, station);
320 // fall back to the airport with the matching IATA code for the country
321 // information we cannot use the coordinate though, as that points to the
322 // actual airport, not the station
323 applyStationCountry(KnowledgeDb::countryForAirport(iataCode).toString(),
324 station);
325 } else if (id.startsWith(QLatin1StringView("amtrak:")) && id.size() == 10) {
328 applyStationData(record, station);
329 } else if (id.startsWith(QLatin1StringView("via:")) && id.size() == 8) {
332 applyStationData(record, station);
333 } else if (id.startsWith(QLatin1StringView("uk:")) && id.size() == 6) {
336 applyStationData(record, station);
337 }
338
339 return processPlace(station);
340}
341
342QDateTime ExtractorPostprocessorPrivate::processTrainTripTime(QDateTime dt, QDate departureDay, const TrainStation& station) const
343{
344 if (!dt.isValid()) {
345 return dt;
346 }
347
348 if (dt.date().year() <= 1970 && departureDay.isValid()) { // we just have the time, but not the day
349 dt.setDate(departureDay);
350 }
351 return processTimeForLocation(dt, station);
352}
353
354BusReservation ExtractorPostprocessorPrivate::processBusReservation(BusReservation res) const
355{
356 if (res.reservationFor().isValid()) {
357 res.setReservationFor(processBusTrip(res.reservationFor().value<BusTrip>()));
358 }
359 return processReservation(res);
360}
361
362BusTrip ExtractorPostprocessorPrivate::processBusTrip(BusTrip trip) const
363{
364 trip.setDepartureBusStop(processPlace(trip.departureBusStop()));
365 trip.setArrivalBusStop(processPlace(trip.arrivalBusStop()));
366 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBusStop()));
367 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBusStop()));
368 trip.setBusNumber(trip.busNumber().simplified());
369 trip.setBusName(trip.busName().simplified());
370 return trip;
371}
372
373BoatReservation ExtractorPostprocessorPrivate::processBoatReservation(BoatReservation res) const
374{
375 if (res.reservationFor().isValid()) {
376 res.setReservationFor(processBoatTrip(res.reservationFor().value<BoatTrip>()));
377 }
378 return processReservation(res);
379}
380
381BoatTrip ExtractorPostprocessorPrivate::processBoatTrip(BoatTrip trip) const
382{
383 trip.setDepartureBoatTerminal(processPlace(trip.departureBoatTerminal()));
384 trip.setArrivalBoatTerminal(processPlace(trip.arrivalBoatTerminal()));
385 trip.setDepartureTime(processTimeForLocation(trip.departureTime(), trip.departureBoatTerminal()));
386 trip.setArrivalTime(processTimeForLocation(trip.arrivalTime(), trip.arrivalBoatTerminal()));
387
388 // arrival less than a day before departure is an indication of the extractor failing to detect day rollover
389 const auto duration = trip.departureTime().secsTo(trip.arrivalTime());
390 if (duration < 0 && duration > -3600*24) {
391 trip.setArrivalTime(trip.arrivalTime().addDays(1));
392 }
393
394 return trip;
395}
396
397LodgingReservation ExtractorPostprocessorPrivate::processLodgingReservation(LodgingReservation res) const
398{
399 if (res.reservationFor().isValid()) {
400 res.setReservationFor(processPlace(res.reservationFor().value<LodgingBusiness>()));
401 res.setCheckinTime(processTimeForLocation(res.checkinTime(), res.reservationFor().value<LodgingBusiness>()));
402 res.setCheckoutTime(processTimeForLocation(res.checkoutTime(), res.reservationFor().value<LodgingBusiness>()));
403 }
404 return processReservation(res);
405}
406
407TaxiReservation ExtractorPostprocessorPrivate::processTaxiReservation(TaxiReservation res) const
408{
409 res.setPickupLocation(processPlace(res.pickupLocation()));
410 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
411 return processReservation(res);
412}
413
414RentalCarReservation ExtractorPostprocessorPrivate::processRentalCarReservation(RentalCarReservation res) const
415{
416 if (res.reservationFor().isValid()) {
417 res.setReservationFor(processRentalCar(res.reservationFor().value<RentalCar>()));
418 }
419 res.setPickupLocation(processPlace(res.pickupLocation()));
420 res.setDropoffLocation(processPlace(res.dropoffLocation()));
421 res.setPickupTime(processTimeForLocation(res.pickupTime(), res.pickupLocation()));
422 res.setDropoffTime(processTimeForLocation(res.dropoffTime(), res.dropoffLocation()));
423 return processReservation(res);
424}
425
426RentalCar ExtractorPostprocessorPrivate::processRentalCar(RentalCar car) const
427{
428 car.setName(car.name().trimmed());
429 return car;
430}
431
432FoodEstablishmentReservation ExtractorPostprocessorPrivate::processFoodEstablishmentReservation(FoodEstablishmentReservation res) const
433{
434 if (res.reservationFor().isValid()) {
435 res.setReservationFor(processPlace(res.reservationFor().value<FoodEstablishment>()));
436 res.setStartTime(processTimeForLocation(res.startTime(), res.reservationFor().value<FoodEstablishment>()));
437 res.setEndTime(processTimeForLocation(res.endTime(), res.reservationFor().value<FoodEstablishment>()));
438 }
439 return processReservation(res);
440}
441
442TouristAttractionVisit ExtractorPostprocessorPrivate::processTouristAttractionVisit(TouristAttractionVisit visit) const
443{
444 visit.setTouristAttraction(processPlace(visit.touristAttraction()));
445 visit.setArrivalTime(processTimeForLocation(visit.arrivalTime(), visit.touristAttraction()));
446 visit.setDepartureTime(processTimeForLocation(visit.departureTime(), visit.touristAttraction()));
447 return visit;
448}
449
450EventReservation ExtractorPostprocessorPrivate::processEventReservation(EventReservation res) const
451{
452 if (res.reservationFor().isValid()) {
453 res.setReservationFor(processEvent(res.reservationFor().value<Event>()));
454 }
455 return processReservation(res);
456}
457
458KItinerary::Event ExtractorPostprocessorPrivate::processEvent(KItinerary::Event event) const
459{
460 event.setName(StringUtil::clean(event.name()));
461
462 // normalize location to be a Place
463 if (JsonLd::isA<PostalAddress>(event.location())) {
464 Place place;
465 place.setAddress(event.location().value<PostalAddress>());
466 event.setLocation(place);
467 }
468
469 if (JsonLd::isA<Place>(event.location())) {
470 event.setLocation(processPlace(event.location().value<Place>()));
471
472 // try to obtain timezones if we have a location
473 event.setStartDate(processTimeForLocation(event.startDate(), event.location().value<Place>()));
474 event.setEndDate(processTimeForLocation(event.endDate(), event.location().value<Place>()));
475 event.setDoorTime(processTimeForLocation(event.doorTime(), event.location().value<Place>()));
476 }
477
478 return event;
479}
480
481static QString processCurrency(const QString &currency)
482{
483 if (currency.size() != 3 || !std::all_of(currency.begin(), currency.end(), [](QChar c) { return c.isUpper(); })) {
484 return {};
485 }
486 return currency;
487}
488
489Ticket ExtractorPostprocessorPrivate::processTicket(Ticket ticket) const
490{
491 ticket.setName(StringUtil::clean(ticket.name()));
492 ticket.setTicketNumber(ticket.ticketNumber().simplified());
493 ticket.setUnderName(processPerson(ticket.underName()));
494 ticket.setTicketedSeat(processSeat(ticket.ticketedSeat()));
495 ticket.setPriceCurrency(processCurrency(ticket.priceCurrency()));
496 return ticket;
497}
498
499ProgramMembership ExtractorPostprocessorPrivate::processProgramMembership(ProgramMembership program) const
500{
501 // remove empty dummy entries found eg. in ERA FCB data
502 if (const auto name = program.programName(); std::none_of(name.begin(), name.end(), [](QChar c) { return c.isLetter(); })) {
503 program.setProgramName(QString());
504 }
505
506 program.setProgramName(program.programName().simplified());
507 // avoid emitting spurious empty ProgramMembership objects caused by empty elements in JSON-LD/Microdata input
508 if (program.programName().isEmpty() && !program.programName().isNull()) {
509 program.setProgramName(QString());
510 }
511 program.setMember(processPerson(program.member()));
512 return program;
513}
514
515Seat ExtractorPostprocessorPrivate::processSeat(Seat seat) const
516{
517 seat.setSeatSection(seat.seatSection().simplified());
518 seat.setSeatRow(seat.seatRow().simplified());
519 seat.setSeatNumber(seat.seatNumber().simplified());
520 seat.setSeatingType(seat.seatingType().simplified());
521 return seat;
522}
523
524template <typename T>
525T ExtractorPostprocessorPrivate::processReservation(T res) const
526{
527 res.setUnderName(processPerson(res.underName().template value<Person>()));
528 res.setPotentialAction(processActions(res.potentialAction()));
529 res.setReservationNumber(res.reservationNumber().trimmed());
530 res.setProgramMembershipUsed(processProgramMembership(res.programMembershipUsed()));
531 res.setPriceCurrency(processCurrency(res.priceCurrency()));
532
533 if (JsonLd::isA<Ticket>(res.reservedTicket())) {
534 res.setReservedTicket(processTicket(res.reservedTicket().template value<Ticket>()));
535 }
536 return res;
537}
538
539static constexpr const char* name_prefixes[] = {
540 "DR", "MR", "MRS", "MS"
541};
542
543static bool isSeparator(QChar c)
544{
545 return c == QLatin1Char(' ') || c == QLatin1Char('/');
546}
547
548static QString simplifyNamePart(QString n)
549{
550 n = n.simplified();
551
552 for (auto prefix : name_prefixes) {
553 const int prefixLen = std::strlen(prefix);
554 if (n.size() > prefixLen + 2 &&
555 n.startsWith(QLatin1StringView(prefix, prefixLen),
557 isSeparator(n[prefixLen])) {
558 return n.mid(prefixLen + 1);
559 }
560 if (n.size() > prefixLen + 2 &&
561 n.endsWith(QLatin1StringView(prefix, prefixLen),
563 isSeparator(n[n.size() - prefixLen - 1])) {
564 return n.left(n.size() - prefixLen - 1);
565 }
566 }
567
568 return n;
569}
570
571KItinerary::Person ExtractorPostprocessorPrivate::processPerson(KItinerary::Person person) const
572{
573 person.setName(simplifyNamePart(person.name()));
574 person.setFamilyName(simplifyNamePart(person.familyName()));
575 person.setGivenName(simplifyNamePart(person.givenName()));
576
577 // fill name with name parts, if it's empty
578 if ((person.name().isEmpty() || person.name() == person.familyName() || person.name() == person.givenName())
579 && !person.familyName().isEmpty() && !person.givenName().isEmpty())
580 {
581 person.setName(person.givenName() + QLatin1Char(' ') + person.familyName());
582 }
583
584 return person;
585}
586
587PostalAddress ExtractorPostprocessorPrivate::processAddress(PostalAddress addr, const QString &phoneNumber, const GeoCoordinates &geo)
588{
589 // convert to ISO 3166-1 alpha-2 country codes
590 if (addr.addressCountry().size() > 2) {
591 QString alpha2Code;
592
593 // try ISO 3166-1 alpha-3, we get that e.g. from Flixbus
594 if (addr.addressCountry().size() == 3) {
595 alpha2Code = KCountry::fromAlpha3(addr.addressCountry()).alpha2();
596 }
597 if (alpha2Code.isEmpty()) {
598 alpha2Code = KCountry::fromName(addr.addressCountry()).alpha2();
599 }
600 if (!alpha2Code.isEmpty()) {
601 addr.setAddressCountry(alpha2Code);
602 }
603 }
604
605 // upper case country codes
606 if (addr.addressCountry().size() == 2) {
607 addr.setAddressCountry(addr.addressCountry().toUpper());
608 }
609
610 // normalize strings
611 addr.setStreetAddress(addr.streetAddress().simplified());
612 addr.setPostalCode(addr.postalCode().simplified());
613 addr.setAddressLocality(addr.addressLocality().simplified());
614 addr.setAddressRegion(addr.addressRegion().simplified());
615
616#if HAVE_PHONENUMBER
617 // recover country from phone number, if we have that
618 if (!phoneNumber.isEmpty() && addr.addressCountry().size() != 2) {
619 const auto phoneStr = phoneNumber.toStdString();
620 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
621 i18n::phonenumbers::PhoneNumber number;
622 if (util->ParseAndKeepRawInput(phoneStr, "ZZ", &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
623 std::string isoCode;
624 util->GetRegionCodeForNumber(number, &isoCode);
625 if (!isoCode.empty() && isoCode != "ZZ") {
626 addr.setAddressCountry(QString::fromStdString(isoCode));
627 }
628 }
629 }
630#endif
631
632 if (geo.isValid() && addr.addressCountry().isEmpty()) {
633 addr.setAddressCountry(KCountry::fromLocation(geo.latitude(), geo.longitude()).alpha2());
634 }
635
636 AddressParser addrParser;
637 addrParser.setFallbackCountry(KCountry::fromQLocale(QLocale().territory()).alpha2());
638 addrParser.parse(addr);
639 addr = addrParser.result();
640 return addr;
641}
642
643QString ExtractorPostprocessorPrivate::processPhoneNumber(const QString &phoneNumber, const PostalAddress &addr)
644{
645#if HAVE_PHONENUMBER
646 // or complete the phone number if we know the country
647 if (!phoneNumber.isEmpty() && addr.addressCountry().size() == 2) {
648 auto phoneStr = phoneNumber.toStdString();
649 const auto isoCode = addr.addressCountry().toStdString();
650 const auto util = i18n::phonenumbers::PhoneNumberUtil::GetInstance();
651 i18n::phonenumbers::PhoneNumber number;
652 if (util->ParseAndKeepRawInput(phoneStr, isoCode, &number) == i18n::phonenumbers::PhoneNumberUtil::NO_PARSING_ERROR) {
653 if (number.country_code_source() == i18n::phonenumbers::PhoneNumber_CountryCodeSource_FROM_DEFAULT_COUNTRY) {
654 util->Format(number, i18n::phonenumbers::PhoneNumberUtil::INTERNATIONAL, &phoneStr);
655 return QString::fromStdString(phoneStr);
656 }
657 }
658 }
659#else
660 Q_UNUSED(addr)
661#endif
662 return phoneNumber.simplified();
663}
664
665QVariantList ExtractorPostprocessorPrivate::processActions(QVariantList actions) const
666{
667 // remove non-actions and actions with invalid URLs
668 QUrl viewUrl;
669 for (auto it = actions.begin(); it != actions.end();) {
670 if (!JsonLd::canConvert<Action>(*it)) {
671 it = actions.erase(it);
672 continue;
673 }
674
675 const auto action = JsonLd::convert<Action>(*it);
676 if (!action.target().isValid()) {
677 it = actions.erase(it);
678 continue;
679 }
680
681 if (JsonLd::isA<ViewAction>(*it)) {
682 viewUrl = action.target();
683 }
684 ++it;
685 }
686
687 // normalize the order, so JSON comparison still yields correct results
688 std::sort(actions.begin(), actions.end(), [](const QVariant &lhs, const QVariant &rhs) {
689 return strcmp(lhs.typeName(), rhs.typeName()) < 0;
690 });
691
692 // remove actions that don't actually have their own target, or duplicates
693 QUrl prevUrl;
694 const char* prevType = nullptr;
695 for (auto it = actions.begin(); it != actions.end();) {
696 const auto action = JsonLd::convert<Action>(*it);
697 const auto isDuplicate = action.target() == prevUrl && (prevType ? strcmp(prevType, (*it).typeName()) == 0 : false);
698 if ((JsonLd::isA<ViewAction>(*it) || action.target() != viewUrl) && !isDuplicate) {
699 prevUrl = action.target();
700 prevType = (*it).typeName();
701 ++it;
702 } else {
703 it = actions.erase(it);
704 }
705 }
706
707 return actions;
708}
709
710template <typename T>
711QDateTime ExtractorPostprocessorPrivate::processTimeForLocation(QDateTime dt, const T &place) const
712{
713 if (!dt.isValid() || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() != QTimeZone::utc())) {
714 return dt;
715 }
716
717 const auto tz = KnowledgeDb::timezoneForLocation(place.geo().latitude(), place.geo().longitude(), place.address().addressCountry(), place.address().addressRegion());
718 if (!tz.isValid()) {
719 return dt;
720 }
721
722 // prefer our timezone over externally provided UTC offset, if they match
723 if (dt.timeSpec() == Qt::OffsetFromUTC && tz.offsetFromUtc(dt) != dt.offsetFromUtc()) {
724 qCDebug(Log) << "UTC offset clashes with expected timezone!" << dt << dt.offsetFromUtc() << tz.id() << tz.offsetFromUtc(dt);
725 return dt;
726 }
727
728 if (dt.timeSpec() == Qt::OffsetFromUTC || dt.timeSpec() == Qt::LocalTime) {
729 dt.setTimeZone(tz);
730 } else if (dt.timeSpec() == Qt::UTC || (dt.timeSpec() == Qt::TimeZone && dt.timeZone() == QTimeZone::utc())) {
731 dt = dt.toTimeZone(tz);
732 }
733 return dt;
734}
static KCountry fromLocation(float latitude, float longitude)
static KCountry fromName(QStringView name)
static KCountry fromQLocale(QLocale::Country country)
QString alpha2() const
static KCountry fromAlpha3(const char *alpha3Code)
A boat or ferry reservation.
A boat or ferry trip.
Definition boattrip.h:23
A bus reservation.
A bus trip.
Definition bustrip.h:22
An event reservation.
An event.
Definition event.h:21
Post-process extracted data to filter out garbage and augment data from other sources.
void setContextDate(const QDateTime &dt)
The date the reservation(s) processed here have been made, if known.
QList< QVariant > result() const
This returns the final result of all previously executed processing steps followed by sorting and fil...
void setValidationEnabled(bool validate)
Enable or disable validation.
A flight reservation.
Definition reservation.h:90
A flight.
Definition flight.h:25
Food-related business (such as a restaurant, or a bakery).
Geographic coordinates.
Definition place.h:23
static void writeProperty(QVariant &obj, const char *name, const QVariant &value)
Set property name on object obj to value value.
static QVariant apply(const QVariant &lhs, const QVariant &rhs)
Apply all properties of rhs on to lhs.
static QVariant readProperty(const QVariant &obj, const char *name)
Read property name on object obj.
constexpr bool isValid() const
Returns true if this is a valid identifier.
Definition alphaid.h:56
QString toString() const
Returns a string representation of this identifier.
Definition alphaid.h:75
A hotel reservation.
Definition reservation.h:77
static QVariant merge(const QVariant &lhs, const QVariant &rhs)
Merge the two given objects.
static bool hasSameArrival(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same arrival.
static bool isSame(const QVariant &lhs, const QVariant &rhs)
Checks if two Reservation or Trip values refer to the same booking element.
static bool hasSameDeparture(const QVariant &lhs, const QVariant &rhs)
Checks whether two transport reservation elements refer to the same departure.
A person.
Definition person.h:20
Base class for places.
Definition place.h:69
QString identifier
Identifier.
Definition place.h:85
Postal address.
Definition place.h:46
QString addressCountry
The country this address is in, as ISO 3166-1 alpha 2 code.
Definition place.h:53
A frequent traveler, bonus points or discount scheme program membership.
A Rental Car reservation.
A car rental.
Definition rentalcar.h:22
A reserved seat.
Definition ticket.h:23
A Taxi reservation.
A booked ticket.
Definition ticket.h:41
A train reservation.
Train station.
Definition place.h:126
A train trip.
Definition traintrip.h:24
QDate departureDay
The scheduled day of departure.
Definition traintrip.h:42
AKONADI_CALENDAR_EXPORT KCalendarCore::Event::Ptr event(const Akonadi::Item &item)
char * toString(const EngineQuery &query)
KIOCORE_EXPORT QString number(KIO::filesize_t size)
QList< QVariant > parse(const QString &message, const QDateTime &externalIssueDateTime=QDateTime())
Parses the bar coded boarding pass message message into a list of FlightReservation instances.
bool canConvert(const QVariant &value)
Checks if the given value can be up-cast to T.
Definition datatypes.h:31
TrainStation stationForViaRailStationCode(ViaRailStationCode code)
Lookup train station data by Via Rail station code.
CountryId countryIdForUicCode(uint16_t uicCountryCode)
Look up country ISO code from a UIC country code.
Definition countrydb.cpp:82
TrainStation stationForUkRailwayStationCode(UKRailwayStationCode code)
Lookup train station data by UK railway station code.
AlphaId< uint16_t, 3 > IataCode
IATA airport code.
Definition iatacode.h:17
TrainStation stationForIbnr(IBNR ibnr)
Lookup train station data by IBNR.
TrainStation stationForAmtrakStationCode(AmtrakStationCode code)
Lookup train station data by Amtrak station code.
TrainStation stationForUic(UICStation uic)
Lookup train station data by UIC station id.
TrainStation stationForIndianRailwaysStationCode(const QString &code)
Lookup train station data by Indian Railways station code.
TrainStation stationForSncfStationId(SncfStationId sncfId)
Lookup train station data by SNCF station id.
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition airportdb.cpp:50
TrainStation stationForVRStationCode(VRStationCode vrStation)
Lookup train station data by VR (Finland) station code.
TrainStation stationForIataCode(IataCode iataCode)
Lookup train station data by IATA location code.
TrainStation stationForBenerailId(BenerailStationId id)
Lookup train station data by Benerail station identifier.
GeoCoordinates geo(const QVariant &location)
Returns the geo coordinates of a given location.
bool isBefore(const QVariant &lhs, const QVariant &rhs)
Sorting function for top-level reservation/visit/event elements.
Definition sortutil.cpp:151
QString clean(const QString &s)
Cleans up extra white spaces and XML entities from s.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
KI18NLOCALEDATA_EXPORT KCountry country(const char *ianaId)
bool isValid(int year, int month, int day)
int year() const const
QDateTime addDays(qint64 ndays) const const
QDate date() const const
bool isValid() const const
int offsetFromUtc() const const
qint64 secsTo(const QDateTime &other) const const
void setDate(QDate date)
void setTimeZone(const QTimeZone &toZone)
Qt::TimeSpec timeSpec() const const
QTimeZone timeZone() const const
QDateTime toTimeZone(const QTimeZone &timeZone) const const
iterator begin()
iterator end()
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
QString fromStdString(const std::string &str)
bool isEmpty() const const
bool isNull() const const
QString left(qsizetype n) const const
QString mid(qsizetype position, qsizetype n) const const
QString simplified() const const
qsizetype size() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
std::string toStdString() const const
QString toUpper() const const
QString trimmed() const const
CaseInsensitive
TimeZone
QTimeZone utc()
bool isValid() const const
T value() const const
Train station entry in the station table.
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:14:49 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.