KContacts

vcardparser.cpp
1 /*
2  This file is part of the KContacts framework.
3  SPDX-FileCopyrightText: 2003 Tobias Koenig <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.0-or-later
6 */
7 
8 #include "vcardparser.h"
9 #include "kcontacts_debug.h"
10 #include <KCodecs>
11 #include <QTextCodec>
12 #include <functional>
13 
14 // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings
15 class StringCache
16 {
17 public:
18  QString fromLatin1(const QByteArray &value)
19  {
20  if (value.isEmpty()) {
21  return QString();
22  }
23 
25  if (it != m_values.constEnd()) {
26  return it.value();
27  }
28 
29  QString string = QString::fromLatin1(value);
30  m_values.insert(value, string);
31  return string;
32  }
33 
34 private:
36 };
37 
38 using namespace KContacts;
39 
40 static void addEscapes(QByteArray &str, bool excludeEscapedComma)
41 {
42  str.replace('\\', "\\\\");
43  if (!excludeEscapedComma) {
44  str.replace(',', "\\,");
45  }
46  str.replace('\r', "\\r");
47  str.replace('\n', "\\n");
48 }
49 
50 static void removeEscapes(QByteArray &str)
51 {
52  // It's more likely that no escape is present, so add fast path
53  if (!str.contains('\\')) {
54  return;
55  }
56  str.replace("\\n", "\n");
57  str.replace("\\N", "\n");
58  str.replace("\\r", "\r");
59  str.replace("\\,", ",");
60  str.replace("\\\\", "\\");
61 }
62 
63 class VCardLineParser
64 {
65 public:
66  VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine)
67  : m_cache(cache)
68  , m_fetchAnotherLine(fetchAnotherLine)
69  {
70  }
71 
72  void parseLine(const QByteArray &currentLine, VCardLine *vCardLine);
73 
74 private:
75  void addParameter(const QByteArray &paramKey, const QByteArray &paramValue);
76 
77 private:
78  StringCache &m_cache;
79  std::function<QByteArray()> m_fetchAnotherLine;
80 
81  VCardLine *m_vCardLine = nullptr;
82  QByteArray m_encoding;
83  QByteArray m_charset;
84 };
85 
86 void VCardLineParser::addParameter(const QByteArray &paramKey, const QByteArray &paramValue)
87 {
88  if (paramKey == "encoding") {
89  m_encoding = paramValue.toLower();
90  } else if (paramKey == "charset") {
91  m_charset = paramValue.toLower();
92  }
93  // qDebug() << " add parameter" << paramKey << " = " << paramValue;
94  m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue));
95 }
96 
97 void VCardLineParser::parseLine(const QByteArray &currentLine, KContacts::VCardLine *vCardLine)
98 {
99  // qDebug() << currentLine;
100  m_vCardLine = vCardLine;
101  // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong.
102  // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234;
103  // Therefore we need a small state machine, just the way I like it.
104  enum State {
105  StateInitial,
106  StateParamKey,
107  StateParamValue,
108  StateQuotedValue,
109  StateAfterParamValue,
110  StateValue,
111  };
112  State state = StateInitial;
113  const int lineLength = currentLine.length();
114  const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode
115  QByteArray paramKey;
116  QByteArray paramValue;
117  int start = 0;
118  int pos = 0;
119  for (; pos < lineLength; ++pos) {
120  const char ch = lineData[pos];
121  const bool colonOrSemicolon = (ch == ';' || ch == ':');
122  switch (state) {
123  case StateInitial:
124  if (colonOrSemicolon) {
125  const QByteArray identifier = currentLine.mid(start, pos - start);
126  // qDebug() << " identifier" << identifier;
127  vCardLine->setIdentifier(m_cache.fromLatin1(identifier));
128  start = pos + 1;
129  }
130  if (ch == ';') {
131  state = StateParamKey;
132  } else if (ch == ':') {
133  state = StateValue;
134  } else if (ch == '.') {
135  vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start)));
136  start = pos + 1;
137  }
138  break;
139  case StateParamKey:
140  if (colonOrSemicolon || ch == '=') {
141  paramKey = currentLine.mid(start, pos - start);
142  start = pos + 1;
143  }
144  if (colonOrSemicolon) {
145  // correct the so-called 2.1 'standard'
146  paramValue = paramKey;
147  const QByteArray lowerKey = paramKey.toLower();
148  if (lowerKey == "quoted-printable" || lowerKey == "base64") {
149  paramKey = "encoding";
150  } else {
151  paramKey = "type";
152  }
153  addParameter(paramKey, paramValue);
154  }
155  if (ch == ';') {
156  state = StateParamKey;
157  } else if (ch == ':') {
158  state = StateValue;
159  } else if (ch == '=') {
160  state = StateParamValue;
161  }
162  break;
163  case StateQuotedValue:
164  if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) {
165  // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec
166  paramValue = currentLine.mid(start, pos - start);
167  addParameter(paramKey.toLower(), paramValue);
168  start = pos + 1;
169  if (ch == '"') {
170  state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char
171  }
172  }
173  break;
174  case StateParamValue:
175  if (colonOrSemicolon || ch == ',') {
176  paramValue = currentLine.mid(start, pos - start);
177  addParameter(paramKey.toLower(), paramValue);
178  start = pos + 1;
179  }
180  // fall-through intended
181  Q_FALLTHROUGH();
182  case StateAfterParamValue:
183  if (ch == ';') {
184  state = StateParamKey;
185  start = pos + 1;
186  } else if (ch == ':') {
187  state = StateValue;
188  } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec.
189  state = StateQuotedValue;
190  start = pos + 1;
191  }
192  break;
193  case StateValue:
194  Q_UNREACHABLE();
195  break;
196  }
197 
198  if (state == StateValue) {
199  break;
200  }
201  }
202 
203  if (state != StateValue) { // invalid line, no ':'
204  return;
205  }
206 
207  QByteArray value = currentLine.mid(pos + 1);
208  removeEscapes(value);
209 
210  QByteArray output;
211  bool wasBase64Encoded = false;
212 
213  if (!m_encoding.isEmpty()) {
214  // have to decode the data
215  if (m_encoding == "b" || m_encoding == "base64") {
216  output = QByteArray::fromBase64(value);
217  wasBase64Encoded = true;
218  } else if (m_encoding == "quoted-printable") {
219  // join any qp-folded lines
220  while (value.endsWith('=')) {
221  value.chop(1); // remove the '='
222  value.append(m_fetchAnotherLine());
223  }
224  KCodecs::quotedPrintableDecode(value, output);
225  } else if (m_encoding == "8bit") {
226  output = value;
227  } else {
228  qDebug("Unknown vcard encoding type!");
229  }
230  } else {
231  output = value;
232  }
233 
234  if (!m_charset.isEmpty()) {
235  // have to convert the data
236  QTextCodec *codec = QTextCodec::codecForName(m_charset);
237  if (codec) {
238  vCardLine->setValue(codec->toUnicode(output));
239  } else {
240  vCardLine->setValue(QString::fromUtf8(output));
241  }
242  } else if (wasBase64Encoded) {
243  vCardLine->setValue(output);
244  } else {
245  vCardLine->setValue(QString::fromUtf8(output));
246  }
247 }
248 
249 ////
250 
251 VCardParser::VCardParser()
252 {
253 }
254 
255 VCardParser::~VCardParser()
256 {
257 }
258 
259 VCard::List VCardParser::parseVCards(const QByteArray &text)
260 {
261  VCard currentVCard;
262  VCard::List vCardList;
263  QByteArray currentLine;
264 
265  int lineStart = 0;
266  int lineEnd = text.indexOf('\n');
267 
268  bool inVCard = false;
269 
270  StringCache cache;
271  for (; lineStart != text.size() + 1;
272  lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) {
273  QByteArray cur = text.mid(lineStart, lineEnd - lineStart);
274  // remove the trailing \r, left from \r\n
275  if (cur.endsWith('\r')) {
276  cur.chop(1);
277  }
278 
279  if (cur.startsWith(' ') //
280  || cur.startsWith('\t')) { // folded line => append to previous
281  currentLine.append(cur.mid(1));
282  continue;
283  } else {
284  if (cur.trimmed().isEmpty()) { // empty line
285  continue;
286  }
287  if (inVCard && !currentLine.isEmpty()) { // now parse the line
288  VCardLine vCardLine;
289 
290  // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support)
291  auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray {
292  const QByteArray ret = cur;
293  lineStart = lineEnd + 1;
294  lineEnd = text.indexOf('\n', lineStart);
295  if (lineEnd != -1) {
296  cur = text.mid(lineStart, lineEnd - lineStart);
297  // remove the trailing \r, left from \r\n
298  if (cur.endsWith('\r')) {
299  cur.chop(1);
300  }
301  }
302  return ret;
303  };
304 
305  VCardLineParser lineParser(cache, fetchAnotherLine);
306 
307  lineParser.parseLine(currentLine, &vCardLine);
308 
309  currentVCard.addLine(vCardLine);
310  }
311 
312  // we do not save the start and end tag as vcardline
313  if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) {
314  inVCard = true;
315  currentLine.clear();
316  currentVCard.clear(); // flush vcard
317  continue;
318  }
319 
320  if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) {
321  inVCard = false;
322  vCardList.append(currentVCard);
323  currentLine.clear();
324  currentVCard.clear(); // flush vcard
325  continue;
326  }
327 
328  currentLine = cur;
329  }
330  }
331 
332  return vCardList;
333 }
334 
335 static const int FOLD_WIDTH = 75;
336 
337 QByteArray VCardParser::createVCards(const VCard::List &list)
338 {
339  QByteArray text;
340  QByteArray textLine;
341  QString encodingType;
342  QStringList idents;
343  QStringList params;
346  QStringList::Iterator paramIt;
348 
349  VCardLine::List lines;
350  VCardLine::List::ConstIterator lineIt;
351  VCard::List::ConstIterator cardIt;
352 
353  bool hasEncoding;
354 
355  text.reserve(list.size() * 300); // reserve memory to be more efficient
356 
357  // iterate over the cards
358  const VCard::List::ConstIterator listEnd(list.end());
359  for (cardIt = list.begin(); cardIt != listEnd; ++cardIt) {
360  text.append("BEGIN:VCARD\r\n");
361 
362  idents = (*cardIt).identifiers();
363  // VERSION must be first
364  if (idents.contains(QLatin1String("VERSION"))) {
365  const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION")));
366  idents.prepend(str);
367  }
368 
369  for (identIt = idents.constBegin(); identIt != idents.constEnd(); ++identIt) {
370  lines = (*cardIt).lines((*identIt));
371 
372  // iterate over the lines
373  for (lineIt = lines.constBegin(); lineIt != lines.constEnd(); ++lineIt) {
374  QVariant val = (*lineIt).value();
375  if (val.isValid()) {
376  if ((*lineIt).hasGroup()) {
377  textLine = (*lineIt).group().toLatin1() + '.' + (*lineIt).identifier().toLatin1();
378  } else {
379  textLine = (*lineIt).identifier().toLatin1();
380  }
381 
382  params = (*lineIt).parameterList();
383  hasEncoding = false;
384  if (!params.isEmpty()) { // we have parameters
385  for (paramIt = params.begin(); paramIt != params.end(); ++paramIt) {
386  if ((*paramIt) == QLatin1String("encoding")) {
387  hasEncoding = true;
388  encodingType = (*lineIt).parameter(QStringLiteral("encoding")).toLower();
389  }
390 
391  values = (*lineIt).parameters(*paramIt);
392  for (valueIt = values.constBegin(); valueIt != values.constEnd(); ++valueIt) {
393  textLine.append(';' + (*paramIt).toLatin1().toUpper());
394  if (!(*valueIt).isEmpty()) {
395  textLine.append('=' + (*valueIt).toLatin1());
396  }
397  }
398  }
399  }
400 
401  QByteArray input, output;
402  bool checkMultibyte = false; // avoid splitting a multibyte character
403 
404  // handle charset
405  const QString charset = (*lineIt).parameter(QStringLiteral("charset"));
406  if (!charset.isEmpty()) {
407  // have to convert the data
408  const QString value = (*lineIt).value().toString();
409  QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1());
410  if (codec) {
411  input = codec->fromUnicode(value);
412  } else {
413  checkMultibyte = true;
414  input = value.toUtf8();
415  }
416  } else if ((*lineIt).value().type() == QVariant::ByteArray) {
417  input = (*lineIt).value().toByteArray();
418  } else {
419  checkMultibyte = true;
420  input = (*lineIt).value().toString().toUtf8();
421  }
422 
423  // handle encoding
424  if (hasEncoding) { // have to encode the data
425  if (encodingType == QLatin1Char('b')) {
426  checkMultibyte = false;
427  output = input.toBase64();
428  } else if (encodingType == QLatin1String("quoted-printable")) {
429  checkMultibyte = false;
430  KCodecs::quotedPrintableEncode(input, output, false);
431  }
432  } else {
433  output = input;
434  }
435  addEscapes(output, ((*lineIt).identifier() == QLatin1String("CATEGORIES") || (*lineIt).identifier() == QLatin1String("GEO")));
436 
437  if (!output.isEmpty()) {
438  textLine.append(':' + output);
439 
440  if (textLine.length() > FOLD_WIDTH) { // we have to fold the line
441  if (checkMultibyte) {
442  // RFC 6350: Multi-octet characters MUST remain contiguous.
443  // we know that textLine contains UTF-8 encoded characters
444  int lineLength = 0;
445  for (int i = 0; i < textLine.length(); ++i) {
446  if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows
447  int sequenceLength = 2;
448  if ((textLine[i] & 0xE0) == 0xE0) {
449  sequenceLength = 3;
450  } else if ((textLine[i] & 0xF0) == 0xF0) {
451  sequenceLength = 4;
452  }
453  if ((lineLength + sequenceLength) > FOLD_WIDTH) {
454  // the current line would be too long. fold it
455  text += "\r\n " + textLine.mid(i, sequenceLength);
456  lineLength = 1 + sequenceLength; // incl. leading space
457  } else {
458  text += textLine.mid(i, sequenceLength);
459  lineLength += sequenceLength;
460  }
461  i += sequenceLength - 1;
462  } else {
463  text += textLine[i];
464  ++lineLength;
465  }
466  if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) {
467  text += "\r\n ";
468  lineLength = 1; // leading space
469  }
470  }
471  text += "\r\n";
472  } else {
473  for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) {
474  text.append((i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n");
475  }
476  }
477  } else {
478  text.append(textLine + "\r\n");
479  }
480  }
481  }
482  }
483  }
484 
485  text.append("END:VCARD\r\n");
486  text.append("\r\n");
487  }
488 
489  return text;
490 }
QByteArray fromUnicode(const QString &str) const const
void clear()
QByteArray toLower() const const
void append(const T &value)
QByteArray trimmed() const const
void reserve(int size)
QVector::iterator begin()
QVector::const_iterator constEnd() const const
void chop(int n)
bool contains(const QString &str, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
bool startsWith(const QByteArray &ba) const const
T value() const const
QHash::const_iterator constFind(const Key &key) const const
T takeAt(int i)
int length() const const
QVector< V > values(const QMultiHash< K, V > &c)
int indexOf(char ch, int from) const const
QString fromUtf8(const char *str, int size)
QHash::const_iterator constEnd() const const
bool isEmpty() const const
bool isEmpty() const const
const char * constData() const const
QByteArray & replace(int pos, int len, const char *after)
typedef Iterator
KCODECS_EXPORT QByteArray quotedPrintableEncode(const QByteArray &in, bool useCRLF=true)
QByteArray mid(int pos, int len) const const
int indexOf(QStringView str, int from) const const
QByteArray & append(char ch)
QList::iterator end()
QString toLower() const const
const T value(const Key &key) const const
QVector::const_iterator constBegin() const const
QByteArray toLatin1() const const
QByteArray fromBase64(const QByteArray &base64, QByteArray::Base64Options options)
QTextCodec * codecForName(const QByteArray &name)
typedef ConstIterator
bool contains(char ch) const const
QString fromLatin1(const char *str, int size)
bool isValid() const const
void prepend(const T &value)
QList::const_iterator constEnd() const const
QList::const_iterator constBegin() const const
int size() const const
int size() const const
State
QVector::iterator end()
QList::iterator begin()
bool endsWith(const QByteArray &ba) const const
QByteArray toBase64(QByteArray::Base64Options options) const const
QString toUnicode(const QByteArray &a) const const
KCODECS_EXPORT QByteArray quotedPrintableDecode(const QByteArray &in)
QByteArray toUtf8() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sat Jun 19 2021 22:55:34 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.