KItinerary

airportdb.cpp
1/*
2 SPDX-FileCopyrightText: 2017 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#include "airportdb.h"
8#include "airportdb_p.h"
9#include "airportdb_data.cpp"
10#include "airportnametokenizer_p.h"
11#include "stringutil.h"
12#include "timezonedb_p.h"
13
14#include <QDebug>
15#include <QTimeZone>
16
17#include <algorithm>
18#include <cstring>
19
20namespace KItinerary {
21namespace KnowledgeDb {
22
23static_assert(alignof(Airport) <= sizeof(Airport), "Airport struct alignment too big!");
24
25static bool operator<(const Airport &lhs, IataCode rhs)
26{
27 return lhs.iataCode < rhs;
28}
29
31{
32 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
33 if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
34 return {};
35 }
36
37 return (*it).coordinate;
38}
39
41{
42 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
43 if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
44 return {};
45 }
46
47 return KnowledgeDb::timezoneForLocation((*it).coordinate.latitude, (*it).coordinate.longitude, (*it).country.toString(), {});
48}
49
51{
52 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
53 if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
54 return {};
55 }
56
57 return (*it).country;
58}
59
60static QString normalizeFragment(const QString &s)
61{
62 auto res = StringUtil::normalize(s);
63 // resolve abbreviations
64 if (res == QLatin1StringView("intl")) {
65 return QStringLiteral("international");
66 }
67
68 return res;
69}
70
71static void applyTransliterations(QStringList &fragments)
72{
73 // note that the output has the corresponding diacritic markers already stripped,
74 // as StringUtil::normalize has already been applied to fragments
75 // similarly, the input is already case-folded
76 for (auto &fragment : fragments) {
77 fragment.replace(QLatin1StringView("ae"), QLatin1StringView("a"));
78 fragment.replace(QLatin1StringView("oe"), QLatin1StringView("o"));
79 fragment.replace(QLatin1StringView("ue"), QLatin1StringView("u"));
80 }
81}
82
83// HACK to work around MSVC string length limit
84static const char* name1_string_table(uint32_t offset)
85{
86 if (offset < sizeof(name1_string_table_0)) {
87 return name1_string_table_0 + offset;
88 }
89 return name1_string_table_1 + (offset - sizeof(name1_string_table_0));
90}
91
92static IataCode iataCodeForUniqueFragment(const QString &s)
93{
94 const auto it = std::lower_bound(std::begin(name1_string_index), std::end(name1_string_index), s.toUtf8(), [](const Name1Index &lhs, const QByteArray &rhs) {
95 const auto cmp = strncmp(name1_string_table(lhs.offset()), rhs.constData(), std::min<int>(lhs.length, rhs.size()));
96 if (cmp == 0) {
97 return lhs.length < rhs.size();
98 }
99 return cmp < 0;
100 });
101 if (it == std::end(name1_string_index) || it->length != s.toUtf8().size() || strncmp(name1_string_table(it->offset()), s.toUtf8().constData(), it->length) != 0) {
102 return {};
103 }
104 return airport_table[it->iataIndex].iataCode;
105}
106
107static void iataCodeForUniqueFragment(const QStringList &fragments, std::vector<IataCode> &codes)
108{
109 for (const auto &s : fragments) {
110 const auto foundCode = iataCodeForUniqueFragment(s);
111 if (!foundCode.isValid()) {
112 continue;
113 }
114
115 auto it = std::lower_bound(codes.begin(), codes.end(), foundCode);
116 if (it == codes.end() || (*it) != foundCode) {
117 codes.insert(it, foundCode);
118 }
119 }
120}
121
122static void iataCodeForNonUniqueFragments(const QStringList &fragments, std::vector<IataCode> &codes)
123{
124 // we didn't find a unique name fragment, try the non-unique index
125 QSet<uint16_t> iataIdxs;
126 for (const auto &s : fragments) {
127 const auto it = std::lower_bound(std::begin(nameN_string_index), std::end(nameN_string_index), s.toUtf8(), [](const NameNIndex &lhs, const QByteArray &rhs) {
128 const auto cmp = strncmp(nameN_string_table + lhs.strOffset, rhs.constData(), std::min<int>(lhs.strLength, rhs.size()));
129 if (cmp == 0) {
130 return lhs.strLength < rhs.size();
131 }
132 return cmp < 0;
133 });
134 if (it == std::end(nameN_string_index) || it->strLength != s.toUtf8().size() || strncmp(nameN_string_table + it->strOffset, s.toUtf8().constData(), it->strLength) != 0) {
135 continue;
136 }
137
138 // TODO we can do this in-place in codes
139 QSet<uint16_t> candidates;
140 candidates.reserve(it->iataCount);
141 for (auto i = 0; i < it->iataCount; ++i) {
142 candidates.insert(nameN_iata_table[it->iataOffset + i]);
143 }
144 if (iataIdxs.isEmpty()) { // first round
145 iataIdxs = candidates;
146 continue;
147 }
148
149 // ignore the imprecisely used "international" if it results in an empty set here
150 if (s == QLatin1StringView("international") &&
151 !iataIdxs.intersects(candidates)) {
152 continue;
153 }
154
155 iataIdxs &= candidates;
156 if (iataIdxs.isEmpty()) {
157 break;
158 }
159 }
160
161 std::transform(iataIdxs.begin(), iataIdxs.end(), std::back_inserter(codes), [](const auto idx) { return airport_table[idx].iataCode; });
162 std::sort(codes.begin(), codes.end());
163}
164
165static IataCode iataCodeForIataCodeFragment(const QStringList &fragments)
166{
167 IataCode code;
168 for (const auto &s : fragments) {
169 if (s.size() != 3) {
170 continue;
171 }
172 if (!std::all_of(s.begin(), s.end(), [](const auto c) { return c.isUpper(); })) {
173 continue;
174 }
175 const IataCode searchCode{s};
176 if (code.isValid() && searchCode != code) {
177 return {};
178 }
179 const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), searchCode);
180 if (it != std::end(airport_table) && (*it).iataCode == searchCode) {
181 code = searchCode;
182 }
183 // check that this is only a IATA code, not also a (conflicting) name fragment
184 const auto uniqueFragmentCode = iataCodeForUniqueFragment(normalizeFragment(s));
185 if (uniqueFragmentCode.isValid() && code.isValid() && uniqueFragmentCode != code) {
186 return {};
187 }
188 }
189 return code;
190}
191
192static void iataCodeForNameFragments(const QStringList &fragments, std::vector<IataCode> &codes)
193{
194 iataCodeForUniqueFragment(fragments, codes);
195 if (!codes.empty()) {
196 return;
197 }
198 iataCodeForNonUniqueFragments(fragments, codes);
199}
200
201static QStringList splitToFragments(QStringView name)
202{
203 AirportNameTokenizer tokenizer(name);
204 return tokenizer.toStringList();
205}
206
207}
208
209std::vector<KnowledgeDb::IataCode> KnowledgeDb::iataCodesFromName(QStringView name)
210{
211 const auto fragments = splitToFragments(name);
212 QStringList normalizedFragments;
213 normalizedFragments.reserve(fragments.size());
214 std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });
215
216 std::vector<IataCode> codes;
217 std::vector<IataCode> candidates;
218 iataCodeForNameFragments(normalizedFragments, codes);
219
220 // try again, with alternative translitarations of e.g. umlauts replaced
221 applyTransliterations(normalizedFragments);
222 iataCodeForNameFragments(normalizedFragments, candidates);
223 if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
224 codes = std::move(candidates);
225 }
226
227 // check if the name contained the IATA code as disambiguation already
228 const auto code = iataCodeForIataCodeFragment(fragments);
229 if (code.isValid() && std::find(codes.begin(), codes.end(), code) != codes.end()) {
230 return {code};
231 }
232
233 // attempt to cut off possibly confusing fancy terminal names
234 auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
235 if (it != normalizedFragments.end()) {
236 normalizedFragments.erase(it, normalizedFragments.end());
237 candidates.clear();
238 iataCodeForNameFragments(normalizedFragments, candidates);
239 if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
240 codes = std::move(candidates);
241 }
242 }
243 return codes;
244}
245
246}
constexpr bool isValid() const
Returns true if this is a valid identifier.
Definition alphaid.h:56
QTimeZone timezoneForAirport(IataCode iataCode)
Returns the timezone the airport with IATA code iataCode is in.
Definition airportdb.cpp:40
Coordinate coordinateForAirport(IataCode iataCode)
Returns the geographical coordinates the airport with IATA code iataCode is in.
Definition airportdb.cpp:30
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition airportdb.cpp:50
QString normalize(QStringView str)
Strips out diacritics and converts to case-folded form.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
const char * constData() const const
qsizetype size() const const
iterator begin()
iterator end()
iterator erase(const_iterator begin, const_iterator end)
void reserve(qsizetype size)
qsizetype size() const const
iterator begin()
iterator end()
iterator insert(const T &value)
bool intersects(const QSet< T > &other) const const
bool isEmpty() const const
void reserve(qsizetype size)
iterator begin()
iterator end()
qsizetype size() const const
QByteArray toUtf8() const const
Airport information structure as used in the database.
Definition airportdb.h:23
Geographical coordinate.
Definition knowledgedb.h:27
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Mon Nov 18 2024 12:09:58 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.