KContacts

vcardparser.cpp
1 /*
2  This file is part of the KContacts framework.
3  SPDX-FileCopyrightText: 2003 Tobias Koenig <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.0-or-later
6 */
7 
8 #include "vcardparser.h"
9 #include <KCodecs>
10 #include "kcontacts_debug.h"
11 #include <QTextCodec>
12 #include <functional>
13 
14 // This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings
15 class StringCache
16 {
17 public:
18  QString fromLatin1(const QByteArray &value)
19  {
20  if (value.isEmpty()) {
21  return QString();
22  }
23 
25  if (it != m_values.constEnd()) {
26  return it.value();
27  }
28 
29  QString string = QString::fromLatin1(value);
30  m_values.insert(value, string);
31  return string;
32  }
33 
34 private:
36 };
37 
38 using namespace KContacts;
39 
40 static void addEscapes(QByteArray &str, bool excludeEscapedComma)
41 {
42  str.replace('\\', "\\\\");
43  if (!excludeEscapedComma) {
44  str.replace(',', "\\,");
45  }
46  str.replace('\r', "\\r");
47  str.replace('\n', "\\n");
48 }
49 
50 static void removeEscapes(QByteArray &str)
51 {
52  // It's more likely that no escape is present, so add fast path
53  if (!str.contains('\\')) {
54  return;
55  }
56  str.replace("\\n", "\n");
57  str.replace("\\N", "\n");
58  str.replace("\\r", "\r");
59  str.replace("\\,", ",");
60  str.replace("\\\\", "\\");
61 }
62 
63 class VCardLineParser
64 {
65 public:
66  VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine)
67  : m_cache(cache)
68  , m_fetchAnotherLine(fetchAnotherLine)
69  {
70  }
71 
72  void parseLine(const QByteArray &currentLine, VCardLine *vCardLine);
73 
74 private:
75  void addParameter(const QByteArray &paramKey, const QByteArray &paramValue);
76 
77 private:
78  StringCache &m_cache;
79  std::function<QByteArray()> m_fetchAnotherLine;
80 
81  VCardLine *m_vCardLine = nullptr;
82  QByteArray m_encoding;
83  QByteArray m_charset;
84 };
85 
86 void VCardLineParser::addParameter(const QByteArray &paramKey, const QByteArray &paramValue)
87 {
88  if (paramKey == "encoding") {
89  m_encoding = paramValue.toLower();
90  } else if (paramKey == "charset") {
91  m_charset = paramValue.toLower();
92  }
93  //qDebug() << " add parameter" << paramKey << " = " << paramValue;
94  m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue));
95 }
96 
97 void VCardLineParser::parseLine(const QByteArray &currentLine, KContacts::VCardLine *vCardLine)
98 {
99  //qDebug() << currentLine;
100  m_vCardLine = vCardLine;
101  // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong.
102  // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234;
103  // Therefore we need a small state machine, just the way I like it.
104  enum State {
105  StateInitial, StateParamKey, StateParamValue, StateQuotedValue, StateAfterParamValue, StateValue
106  };
107  State state = StateInitial;
108  const int lineLength = currentLine.length();
109  const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode
110  QByteArray paramKey;
111  QByteArray paramValue;
112  int start = 0;
113  int pos = 0;
114  for (; pos < lineLength; ++pos) {
115  const char ch = lineData[pos];
116  const bool colonOrSemicolon = (ch == ';' || ch == ':');
117  switch (state) {
118  case StateInitial:
119  if (colonOrSemicolon) {
120  const QByteArray identifier = currentLine.mid(start, pos - start);
121  //qDebug() << " identifier" << identifier;
122  vCardLine->setIdentifier(m_cache.fromLatin1(identifier));
123  start = pos + 1;
124  }
125  if (ch == ';') {
126  state = StateParamKey;
127  } else if (ch == ':') {
128  state = StateValue;
129  } else if (ch == '.') {
130  vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start)));
131  start = pos + 1;
132  }
133  break;
134  case StateParamKey:
135  if (colonOrSemicolon || ch == '=') {
136  paramKey = currentLine.mid(start, pos - start);
137  start = pos + 1;
138  }
139  if (colonOrSemicolon) {
140  // correct the so-called 2.1 'standard'
141  paramValue = paramKey;
142  const QByteArray lowerKey = paramKey.toLower();
143  if (lowerKey == "quoted-printable" || lowerKey == "base64") {
144  paramKey = "encoding";
145  } else {
146  paramKey = "type";
147  }
148  addParameter(paramKey, paramValue);
149  }
150  if (ch == ';') {
151  state = StateParamKey;
152  } else if (ch == ':') {
153  state = StateValue;
154  } else if (ch == '=') {
155  state = StateParamValue;
156  }
157  break;
158  case StateQuotedValue:
159  if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) {
160  // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec
161  paramValue = currentLine.mid(start, pos - start);
162  addParameter(paramKey.toLower(), paramValue);
163  start = pos + 1;
164  if (ch == '"') {
165  state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char
166  }
167  }
168  break;
169  case StateParamValue:
170  if (colonOrSemicolon || ch == ',') {
171  paramValue = currentLine.mid(start, pos - start);
172  addParameter(paramKey.toLower(), paramValue);
173  start = pos + 1;
174  }
175  // fall-through intended
176  Q_FALLTHROUGH();
177  case StateAfterParamValue:
178  if (ch == ';') {
179  state = StateParamKey;
180  start = pos + 1;
181  } else if (ch == ':') {
182  state = StateValue;
183  } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec.
184  state = StateQuotedValue;
185  start = pos + 1;
186  }
187  break;
188  case StateValue:
189  Q_UNREACHABLE();
190  break;
191  }
192 
193  if (state == StateValue) {
194  break;
195  }
196  }
197 
198  if (state != StateValue) { // invalid line, no ':'
199  return;
200  }
201 
202  QByteArray value = currentLine.mid(pos + 1);
203  removeEscapes(value);
204 
205  QByteArray output;
206  bool wasBase64Encoded = false;
207 
208  if (!m_encoding.isEmpty()) {
209  // have to decode the data
210  if (m_encoding == "b" || m_encoding == "base64") {
211  output = QByteArray::fromBase64(value);
212  wasBase64Encoded = true;
213  } else if (m_encoding == "quoted-printable") {
214  // join any qp-folded lines
215  while (value.endsWith('=')) {
216  value.chop(1); // remove the '='
217  value.append(m_fetchAnotherLine());
218  }
219  KCodecs::quotedPrintableDecode(value, output);
220  } else if (m_encoding == "8bit") {
221  output = value;
222  } else {
223  qDebug("Unknown vcard encoding type!");
224  }
225  } else {
226  output = value;
227  }
228 
229  if (!m_charset.isEmpty()) {
230  // have to convert the data
231  QTextCodec *codec = QTextCodec::codecForName(m_charset);
232  if (codec) {
233  vCardLine->setValue(codec->toUnicode(output));
234  } else {
235  vCardLine->setValue(QString::fromUtf8(output));
236  }
237  } else if (wasBase64Encoded) {
238  vCardLine->setValue(output);
239  } else {
240  vCardLine->setValue(QString::fromUtf8(output));
241  }
242 }
243 
245 
246 VCardParser::VCardParser()
247 {
248 }
249 
250 VCardParser::~VCardParser()
251 {
252 }
253 
254 VCard::List VCardParser::parseVCards(const QByteArray &text)
255 {
256  VCard currentVCard;
257  VCard::List vCardList;
258  QByteArray currentLine;
259 
260  int lineStart = 0;
261  int lineEnd = text.indexOf('\n');
262 
263  bool inVCard = false;
264 
265  StringCache cache;
266  for (; lineStart != text.size() + 1; lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) {
267  QByteArray cur = text.mid(lineStart, lineEnd - lineStart);
268  // remove the trailing \r, left from \r\n
269  if (cur.endsWith('\r')) {
270  cur.chop(1);
271  }
272 
273  if (cur.startsWith(' ')
274  || cur.startsWith('\t')) { //folded line => append to previous
275  currentLine.append(cur.mid(1));
276  continue;
277  } else {
278  if (cur.trimmed().isEmpty()) { // empty line
279  continue;
280  }
281  if (inVCard && !currentLine.isEmpty()) { // now parse the line
282  VCardLine vCardLine;
283 
284  // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support)
285  auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray {
286  const QByteArray ret = cur;
287  lineStart = lineEnd + 1;
288  lineEnd = text.indexOf('\n', lineStart);
289  if (lineEnd != -1) {
290  cur = text.mid(lineStart, lineEnd - lineStart);
291  // remove the trailing \r, left from \r\n
292  if (cur.endsWith('\r')) {
293  cur.chop(1);
294  }
295  }
296  return ret;
297  };
298 
299  VCardLineParser lineParser(cache, fetchAnotherLine);
300 
301  lineParser.parseLine(currentLine, &vCardLine);
302 
303  currentVCard.addLine(vCardLine);
304  }
305 
306  // we do not save the start and end tag as vcardline
307  if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) {
308  inVCard = true;
309  currentLine.clear();
310  currentVCard.clear(); // flush vcard
311  continue;
312  }
313 
314  if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) {
315  inVCard = false;
316  vCardList.append(currentVCard);
317  currentLine.clear();
318  currentVCard.clear(); // flush vcard
319  continue;
320  }
321 
322  currentLine = cur;
323  }
324  }
325 
326  return vCardList;
327 }
328 
329 static const int FOLD_WIDTH = 75;
330 
331 QByteArray VCardParser::createVCards(const VCard::List &list)
332 {
333  QByteArray text;
334  QByteArray textLine;
335  QString encodingType;
336  QStringList idents;
337  QStringList params;
340  QStringList::Iterator paramIt;
342 
343  VCardLine::List lines;
344  VCardLine::List::ConstIterator lineIt;
345  VCard::List::ConstIterator cardIt;
346 
347  bool hasEncoding;
348 
349  text.reserve(list.size() * 300); // reserve memory to be more efficient
350 
351  // iterate over the cards
352  const VCard::List::ConstIterator listEnd(list.end());
353  for (cardIt = list.begin(); cardIt != listEnd; ++cardIt) {
354  text.append("BEGIN:VCARD\r\n");
355 
356  idents = (*cardIt).identifiers();
357  //VERSION must be first
358  if (idents.contains(QLatin1String("VERSION"))) {
359  const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION")));
360  idents.prepend(str);
361  }
362 
363  for (identIt = idents.constBegin(); identIt != idents.constEnd(); ++identIt) {
364  lines = (*cardIt).lines((*identIt));
365 
366  // iterate over the lines
367  for (lineIt = lines.constBegin(); lineIt != lines.constEnd(); ++lineIt) {
368  QVariant val = (*lineIt).value();
369  if (val.isValid()) {
370  if ((*lineIt).hasGroup()) {
371  textLine = (*lineIt).group().toLatin1() + '.' + (*lineIt).identifier().toLatin1();
372  } else {
373  textLine = (*lineIt).identifier().toLatin1();
374  }
375 
376  params = (*lineIt).parameterList();
377  hasEncoding = false;
378  if (!params.isEmpty()) { // we have parameters
379  for (paramIt = params.begin(); paramIt != params.end(); ++paramIt) {
380  if ((*paramIt) == QLatin1String("encoding")) {
381  hasEncoding = true;
382  encodingType = (*lineIt).parameter(QStringLiteral("encoding")).toLower();
383  }
384 
385  values = (*lineIt).parameters(*paramIt);
386  for (valueIt = values.constBegin(); valueIt != values.constEnd(); ++valueIt) {
387  textLine.append(';' + (*paramIt).toLatin1().toUpper());
388  if (!(*valueIt).isEmpty()) {
389  textLine.append('=' + (*valueIt).toLatin1());
390  }
391  }
392  }
393  }
394 
395  QByteArray input, output;
396  bool checkMultibyte = false; // avoid splitting a multibyte character
397 
398  // handle charset
399  const QString charset = (*lineIt).parameter(QStringLiteral("charset"));
400  if (!charset.isEmpty()) {
401  // have to convert the data
402  const QString value = (*lineIt).value().toString();
403  QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1());
404  if (codec) {
405  input = codec->fromUnicode(value);
406  } else {
407  checkMultibyte = true;
408  input = value.toUtf8();
409  }
410  } else if ((*lineIt).value().type() == QVariant::ByteArray) {
411  input = (*lineIt).value().toByteArray();
412  } else {
413  checkMultibyte = true;
414  input = (*lineIt).value().toString().toUtf8();
415  }
416 
417  // handle encoding
418  if (hasEncoding) { // have to encode the data
419  if (encodingType == QLatin1Char('b')) {
420  checkMultibyte = false;
421  output = input.toBase64();
422  } else if (encodingType == QLatin1String("quoted-printable")) {
423  checkMultibyte = false;
424  KCodecs::quotedPrintableEncode(input, output, false);
425  }
426  } else {
427  output = input;
428  }
429  addEscapes(output, ((*lineIt).identifier() == QLatin1String("CATEGORIES") || (*lineIt).identifier() == QLatin1String("GEO")));
430 
431  if (!output.isEmpty()) {
432  textLine.append(':' + output);
433 
434  if (textLine.length() > FOLD_WIDTH) { // we have to fold the line
435  if (checkMultibyte) {
436  // RFC 6350: Multi-octet characters MUST remain contiguous.
437  // we know that textLine contains UTF-8 encoded characters
438  int lineLength = 0;
439  for (int i = 0; i < textLine.length(); ++i) {
440  if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows
441  int sequenceLength = 2;
442  if ((textLine[i] & 0xE0) == 0xE0) {
443  sequenceLength = 3;
444  } else if ((textLine[i] & 0xF0) == 0xF0) {
445  sequenceLength = 4;
446  }
447  if ((lineLength + sequenceLength) > FOLD_WIDTH) {
448  // the current line would be too long. fold it
449  text += "\r\n " + textLine.mid(i, sequenceLength);
450  lineLength = 1 + sequenceLength; // incl. leading space
451  } else {
452  text += textLine.mid(i, sequenceLength);
453  lineLength += sequenceLength;
454  }
455  i += sequenceLength - 1;
456  } else {
457  text += textLine[i];
458  ++lineLength;
459  }
460  if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) {
461  text += "\r\n ";
462  lineLength = 1; // leading space
463  }
464  }
465  text += "\r\n";
466  } else {
467  for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) {
468  text.append(
469  (i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n");
470  }
471  }
472  } else {
473  text.append(textLine + "\r\n");
474  }
475  }
476  }
477  }
478  }
479 
480  text.append("END:VCARD\r\n");
481  text.append("\r\n");
482  }
483 
484  return text;
485 }
QByteArray fromUnicode(const QString &str) const const
void clear()
QByteArray toLower() const const
void append(const T &value)
QByteArray trimmed() const const
void reserve(int size)
QVector::iterator begin()
QVector::const_iterator constEnd() const const
void chop(int n)
bool contains(const QString &str, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
bool startsWith(const QByteArray &ba) const const
T value() const const
QHash::const_iterator constFind(const Key &key) const const
T takeAt(int i)
int length() const const
QVector< V > values(const QMultiHash< K, V > &c)
int indexOf(char ch, int from) const const
QString fromUtf8(const char *str, int size)
QHash::const_iterator constEnd() const const
bool isEmpty() const const
bool isEmpty() const const
const char * constData() const const
QByteArray & replace(int pos, int len, const char *after)
typedef Iterator
KCODECS_EXPORT QByteArray quotedPrintableEncode(const QByteArray &in, bool useCRLF=true)
QByteArray mid(int pos, int len) const const
int indexOf(QStringView str, int from) const const
QByteArray & append(char ch)
QList::iterator end()
QString toLower() const const
const T value(const Key &key) const const
QVector::const_iterator constBegin() const const
QByteArray toLatin1() const const
QByteArray fromBase64(const QByteArray &base64, QByteArray::Base64Options options)
QTextCodec * codecForName(const QByteArray &name)
typedef ConstIterator
bool contains(char ch) const const
QString fromLatin1(const char *str, int size)
bool isValid() const const
void prepend(const T &value)
QList::const_iterator constEnd() const const
QList::const_iterator constBegin() const const
int size() const const
int size() const const
State
QVector::iterator end()
QList::iterator begin()
bool endsWith(const QByteArray &ba) const const
QByteArray toBase64(QByteArray::Base64Options options) const const
QString toUnicode(const QByteArray &a) const const
KCODECS_EXPORT QByteArray quotedPrintableDecode(const QByteArray &in)
QByteArray toUtf8() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sat Jul 11 2020 22:54:19 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.