KItinerary

airportdb.cpp
1 /*
2  SPDX-FileCopyrightText: 2017 Volker Krause <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.0-or-later
5 */
6 
7 #include "airportdb.h"
8 #include "airportdb_p.h"
9 #include "airportdb_data.cpp"
10 #include "stringutil.h"
11 #include "timezonedb.h"
12 
13 #include <QDebug>
14 #include <QRegularExpression>
15 #include <QTimeZone>
16 
17 #include <algorithm>
18 #include <cstring>
19 
20 namespace KItinerary {
21 namespace KnowledgeDb {
22 
23 static_assert(alignof(Airport) <= sizeof(Airport), "Airport struct alignment too big!");
24 
25 static bool operator<(const Airport &lhs, IataCode rhs)
26 {
27  return lhs.iataCode < rhs;
28 }
29 
31 {
32  const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
33  if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
34  return {};
35  }
36 
37  return (*it).coordinate;
38 }
39 
41 {
42  const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
43  if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
44  return {};
45  }
46 
47  return KnowledgeDb::toQTimeZone(KnowledgeDb::timezoneForLocation((*it).coordinate.latitude, (*it).coordinate.longitude, (*it).country));
48 }
49 
51 {
52  const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), iataCode);
53  if (it == std::end(airport_table) || (*it).iataCode != iataCode) {
54  return {};
55  }
56 
57  return (*it).country;
58 }
59 
60 static QString normalizeFragment(const QString &s)
61 {
62  auto res = StringUtil::normalize(s);
63  // resolve abbreviations
64  if (res == QLatin1String("intl")) return QStringLiteral("international");
65 
66  return res;
67 }
68 
69 static void applyTransliterations(QStringList &fragments)
70 {
71  // note that the output has the corresponding diacritic markers already stripped,
72  // as StringUtil::normalize has already been applied to fragments
73  // similarly, the input is already case-folded
74  for (auto &fragment : fragments) {
75  fragment.replace(QLatin1String("ae"), QLatin1String("a"));
76  fragment.replace(QLatin1String("oe"), QLatin1String("o"));
77  fragment.replace(QLatin1String("ue"), QLatin1String("u"));
78  }
79 }
80 
81 static IataCode iataCodeForUniqueFragment(const QString &s)
82 {
83  const auto it = std::lower_bound(std::begin(name1_string_index), std::end(name1_string_index), s.toUtf8(), [](const Name1Index &lhs, const QByteArray &rhs) {
84  const auto cmp = strncmp(name1_string_table + lhs.offset(), rhs.constData(), std::min<int>(lhs.length, rhs.size()));
85  if (cmp == 0) {
86  return lhs.length < rhs.size();
87  }
88  return cmp < 0;
89  });
90  if (it == std::end(name1_string_index) || it->length != s.toUtf8().size() || strncmp(name1_string_table + it->offset(), s.toUtf8().constData(), it->length) != 0) {
91  return {};
92  }
93  return airport_table[it->iataIndex].iataCode;
94 }
95 
96 static void iataCodeForUniqueFragment(const QStringList &fragments, std::vector<IataCode> &codes)
97 {
98  for (const auto &s : fragments) {
99  const auto foundCode = iataCodeForUniqueFragment(s);
100  if (!foundCode.isValid()) {
101  continue;
102  }
103 
104  auto it = std::lower_bound(codes.begin(), codes.end(), foundCode);
105  if (it == codes.end() || (*it) != foundCode) {
106  codes.insert(it, foundCode);
107  }
108  }
109 }
110 
111 static void iataCodeForNonUniqueFragments(const QStringList &fragments, std::vector<IataCode> &codes)
112 {
113  // we didn't find a unique name fragment, try the non-unique index
114  QSet<uint16_t> iataIdxs;
115  for (const auto &s : fragments) {
116  const auto it = std::lower_bound(std::begin(nameN_string_index), std::end(nameN_string_index), s.toUtf8(), [](const NameNIndex &lhs, const QByteArray &rhs) {
117  const auto cmp = strncmp(nameN_string_table + lhs.strOffset, rhs.constData(), std::min<int>(lhs.strLength, rhs.size()));
118  if (cmp == 0) {
119  return lhs.strLength < rhs.size();
120  }
121  return cmp < 0;
122  });
123  if (it == std::end(nameN_string_index) || it->strLength != s.toUtf8().size() || strncmp(nameN_string_table + it->strOffset, s.toUtf8().constData(), it->strLength) != 0) {
124  continue;
125  }
126 
127  // TODO we can do this in-place in codes
128  QSet<uint16_t> candidates;
129  candidates.reserve(it->iataCount);
130  for (auto i = 0; i < it->iataCount; ++i) {
131  candidates.insert(nameN_iata_table[it->iataOffset + i]);
132  }
133  if (iataIdxs.isEmpty()) { // first round
134  iataIdxs = candidates;
135  continue;
136  }
137 
138  // ignore the imprecisely used "international" if it results in an empty set here
139  if (s == QLatin1String("international") && !iataIdxs.intersects(candidates)) {
140  continue;
141  }
142 
143  iataIdxs &= candidates;
144  if (iataIdxs.isEmpty()) {
145  break;
146  }
147  }
148 
149  std::transform(iataIdxs.begin(), iataIdxs.end(), std::back_inserter(codes), [](const auto idx) { return airport_table[idx].iataCode; });
150  std::sort(codes.begin(), codes.end());
151 }
152 
153 static IataCode iataCodeForIataCodeFragment(const QStringList &fragments)
154 {
155  IataCode code;
156  for (const auto &s : fragments) {
157  if (s.size() != 3) {
158  continue;
159  }
160  if (!std::all_of(s.begin(), s.end(), [](const auto c) { return c.isUpper(); })) {
161  continue;
162  }
163  const IataCode searchCode{s};
164  if (code.isValid() && searchCode != code) {
165  return {};
166  }
167  const auto it = std::lower_bound(std::begin(airport_table), std::end(airport_table), searchCode);
168  if (it != std::end(airport_table) && (*it).iataCode == searchCode) {
169  code = searchCode;
170  }
171  // check that this is only a IATA code, not also a (conflicting) name fragment
172  const auto uniqueFragmentCode = iataCodeForUniqueFragment(normalizeFragment(s));
173  if (uniqueFragmentCode.isValid() && code.isValid() && uniqueFragmentCode != code) {
174  return {};
175  }
176  }
177  return code;
178 }
179 
180 static void iataCodeForNameFragments(const QStringList &fragments, std::vector<IataCode> &codes)
181 {
182  iataCodeForUniqueFragment(fragments, codes);
183  if (!codes.empty()) {
184  return;
185  }
186  iataCodeForNonUniqueFragments(fragments, codes);
187 }
188 
189 static QStringList splitToFragments(const QString &name)
190 {
191  return name.split(QRegularExpression(QStringLiteral("[ 0-9/'\"\\(\\)&\\,.–„-]")),
192 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
194 #else
196 #endif
197 }
198 
199 }
200 
201 std::vector<KnowledgeDb::IataCode> KnowledgeDb::iataCodesFromName(const QString &name)
202 {
203  const auto fragments = splitToFragments(name);
204  QStringList normalizedFragments;
205  normalizedFragments.reserve(fragments.size());
206  std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });
207 
208  std::vector<IataCode> codes, candidates;
209  iataCodeForNameFragments(normalizedFragments, codes);
210 
211  // try again, with alternative translitarations of e.g. umlauts replaced
212  applyTransliterations(normalizedFragments);
213  iataCodeForNameFragments(normalizedFragments, candidates);
214  if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
215  codes = std::move(candidates);
216  }
217 
218  // check if the name contained the IATA code as disambiguation already
219  const auto code = iataCodeForIataCodeFragment(fragments);
220  if (code.isValid()) {
221  return {code};
222  }
223 
224  // attempt to cut off possibly confusing fancy terminal names
225  auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
226  if (it != normalizedFragments.end()) {
227  normalizedFragments.erase(it, normalizedFragments.end());
228  candidates.clear();
229  iataCodeForNameFragments(normalizedFragments, candidates);
230  if (!candidates.empty() && (codes.empty() || candidates.size() < codes.size())) {
231  codes = std::move(candidates);
232  }
233  }
234  return codes;
235 }
236 
238 {
239  const auto fragments = splitToFragments(name);
240  QStringList normalizedFragments;
241  normalizedFragments.reserve(fragments.size());
242  std::transform(fragments.begin(), fragments.end(), std::back_inserter(normalizedFragments), [](const auto &s) { return normalizeFragment(s); });
243 
244  std::vector<IataCode> codes;
245  iataCodeForNameFragments(normalizedFragments, codes);
246  if (codes.size() == 1) {
247  return codes[0];
248  }
249  codes.clear();
250 
251  // try again, with alternative translitarations of e.g. umlauts replaced
252  applyTransliterations(normalizedFragments);
253  iataCodeForNameFragments(normalizedFragments, codes);
254  if (codes.size() == 1) {
255  return codes[0];
256  }
257  codes.clear();
258 
259  // check if the name contained the IATA code as disambiguation already
260  const auto code = iataCodeForIataCodeFragment(fragments);
261  if (code.isValid()) {
262  return {code};
263  }
264 
265  // attempt to cut off possibly confusing fancy terminal names
266  auto it = std::find(normalizedFragments.begin(), normalizedFragments.end(), QStringLiteral("terminal"));
267  if (it != normalizedFragments.end()) {
268  normalizedFragments.erase(it, normalizedFragments.end());
269  iataCodeForNameFragments(normalizedFragments, codes);
270  }
271  if (codes.size() == 1) {
272  return codes[0];
273  }
274  return {};
275 }
276 
277 }
QChar normalize(QChar c)
Convert c to case-folded form and remove diacritic marks.
Definition: stringutil.cpp:14
Geographical coordinate.
Definition: knowledgedb.h:28
constexpr bool isValid() const
Returns true if this is a valid idenfier.
Definition: alphaid.h:56
Classes for reservation/travel data models, data extraction and data augmentation.
QString::iterator end()
void reserve(int alloc)
int size() const const
Airport information structure as used in the database.
Definition: airportdb.h:25
QList::iterator erase(QList::iterator pos)
KnowledgeDb::CountryId countryForAirport(IataCode iataCode)
Returns the country the airport with IATA code iataCode is in.
Definition: airportdb.cpp:50
QSet::iterator insert(const T &value)
Tz timezoneForLocation(float lat, float lon, CountryId country)
Returns the timezone for the given location consisting of coordinates and country.
Definition: timezonedb.cpp:99
QTimeZone timezoneForAirport(IataCode iataCode)
Returns the timezone the airport with IATA code iataCode is in.
Definition: airportdb.cpp:40
const char * constData() const const
QStringList split(const QString &sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
QList::iterator end()
QSet::iterator begin()
SkipEmptyParts
QSet::iterator end()
void reserve(int size)
bool intersects(const QSet< T > &other) const const
Coordinate coordinateForAirport(IataCode iataCode)
Returns the geographical coordinates the airport with IATA code iataCode is in.
Definition: airportdb.cpp:30
bool isEmpty() const const
IataCode iataCodeFromName(const QString &name)
Attempts to find the unique IATA code for the given airport name.
Definition: airportdb.cpp:237
QString::iterator begin()
int size() const const
QList::iterator begin()
std::vector< IataCode > iataCodesFromName(const QString &name)
Returns all possible IATA code candidates for the given airport name.
Definition: airportdb.cpp:201
QTimeZone toQTimeZone(Tz tz)
Returns the corresponding QTimeZone.
Definition: timezonedb.cpp:21
QByteArray toUtf8() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Wed Jul 8 2020 23:12:30 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.