KContacts

vcardparser.cpp
1/*
2 This file is part of the KContacts framework.
3 SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "kcontacts_debug.h"
9#include "vcardparser_p.h"
10#include <KCodecs>
11#include <QStringDecoder>
12#include <QStringEncoder>
13#include <functional>
14
15// This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings
16class StringCache
17{
18public:
19 QString fromLatin1(const QByteArray &value)
20 {
21 if (value.isEmpty()) {
22 return QString();
23 }
24
25 auto it = m_values.constFind(value);
26 if (it != m_values.constEnd()) {
27 return it.value();
28 }
29
30 QString string = QString::fromLatin1(value);
31 m_values.insert(value, string);
32 return string;
33 }
34
35private:
37};
38
39using namespace KContacts;
40
41static void addEscapes(QByteArray &str, bool excludeEscapedComma)
42{
43 str.replace('\\', "\\\\");
44 if (!excludeEscapedComma) {
45 str.replace(',', "\\,");
46 }
47 str.replace('\r', "\\r");
48 str.replace('\n', "\\n");
49}
50
51static void removeEscapes(QByteArray &str)
52{
53 // It's more likely that no escape is present, so add fast path
54 if (!str.contains('\\')) {
55 return;
56 }
57 str.replace("\\n", "\n");
58 str.replace("\\N", "\n");
59 str.replace("\\r", "\r");
60 str.replace("\\,", ",");
61 str.replace("\\\\", "\\");
62}
63
64class VCardLineParser
65{
66public:
67 VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine)
68 : m_cache(cache)
69 , m_fetchAnotherLine(fetchAnotherLine)
70 {
71 }
72
73 void parseLine(const QByteArray &currentLine, VCardLine *vCardLine);
74
75private:
76 void addParameter(const QByteArray &paramKey, const QByteArray &paramValue);
77
78private:
79 StringCache &m_cache;
80 std::function<QByteArray()> m_fetchAnotherLine;
81
82 VCardLine *m_vCardLine = nullptr;
83 QByteArray m_encoding;
84 QByteArray m_charset;
85};
86
87void VCardLineParser::addParameter(const QByteArray &paramKey, const QByteArray &paramValue)
88{
89 if (paramKey == "encoding") {
90 m_encoding = paramValue.toLower();
91 } else if (paramKey == "charset") {
92 m_charset = paramValue.toLower();
93 }
94 // qDebug() << " add parameter" << paramKey << " = " << paramValue;
95 m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue));
96}
97
98void VCardLineParser::parseLine(const QByteArray &currentLine, KContacts::VCardLine *vCardLine)
99{
100 // qDebug() << currentLine;
101 m_vCardLine = vCardLine;
102 // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong.
103 // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234;
104 // Therefore we need a small state machine, just the way I like it.
105 enum State {
106 StateInitial,
107 StateParamKey,
108 StateParamValue,
109 StateQuotedValue,
110 StateAfterParamValue,
111 StateValue,
112 };
113 State state = StateInitial;
114 const int lineLength = currentLine.length();
115 const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode
116 QByteArray paramKey;
117 QByteArray paramValue;
118 int start = 0;
119 int pos = 0;
120 for (; pos < lineLength; ++pos) {
121 const char ch = lineData[pos];
122 const bool colonOrSemicolon = (ch == ';' || ch == ':');
123 switch (state) {
124 case StateInitial:
125 if (colonOrSemicolon) {
126 const QByteArray identifier = currentLine.mid(start, pos - start);
127 // qDebug() << " identifier" << identifier;
128 vCardLine->setIdentifier(m_cache.fromLatin1(identifier));
129 start = pos + 1;
130 }
131 if (ch == ';') {
132 state = StateParamKey;
133 } else if (ch == ':') {
134 state = StateValue;
135 } else if (ch == '.') {
136 vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start)));
137 start = pos + 1;
138 }
139 break;
140 case StateParamKey:
141 if (colonOrSemicolon || ch == '=') {
142 paramKey = currentLine.mid(start, pos - start);
143 start = pos + 1;
144 }
145 if (colonOrSemicolon) {
146 // correct the so-called 2.1 'standard'
147 paramValue = paramKey;
148 const QByteArray lowerKey = paramKey.toLower();
149 if (lowerKey == "quoted-printable" || lowerKey == "base64") {
150 paramKey = "encoding";
151 } else {
152 paramKey = "type";
153 }
154 addParameter(paramKey, paramValue);
155 }
156 if (ch == ';') {
157 state = StateParamKey;
158 } else if (ch == ':') {
159 state = StateValue;
160 } else if (ch == '=') {
161 state = StateParamValue;
162 }
163 break;
164 case StateQuotedValue:
165 if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) {
166 // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec
167 paramValue = currentLine.mid(start, pos - start);
168 addParameter(paramKey.toLower(), paramValue);
169 start = pos + 1;
170 if (ch == '"') {
171 state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char
172 }
173 }
174 break;
175 case StateParamValue:
176 if (colonOrSemicolon || ch == ',') {
177 paramValue = currentLine.mid(start, pos - start);
178 addParameter(paramKey.toLower(), paramValue);
179 start = pos + 1;
180 }
181 // fall-through intended
182 Q_FALLTHROUGH();
183 case StateAfterParamValue:
184 if (ch == ';') {
185 state = StateParamKey;
186 start = pos + 1;
187 } else if (ch == ':') {
188 state = StateValue;
189 } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec.
190 state = StateQuotedValue;
191 start = pos + 1;
192 }
193 break;
194 case StateValue:
195 Q_UNREACHABLE();
196 break;
197 }
198
199 if (state == StateValue) {
200 break;
201 }
202 }
203
204 if (state != StateValue) { // invalid line, no ':'
205 return;
206 }
207
208 QByteArray value = currentLine.mid(pos + 1);
209 removeEscapes(value);
210
211 QByteArray output;
212 bool wasBase64Encoded = false;
213
214 if (!m_encoding.isEmpty()) {
215 // have to decode the data
216 if (m_encoding == "b" || m_encoding == "base64") {
217 output = QByteArray::fromBase64(value);
218 wasBase64Encoded = true;
219 } else if (m_encoding == "quoted-printable") {
220 // join any qp-folded lines
221 while (value.endsWith('=')) {
222 value.chop(1); // remove the '='
223 value.append(m_fetchAnotherLine());
224 }
225 KCodecs::quotedPrintableDecode(value, output);
226 } else if (m_encoding == "8bit") {
227 output = value;
228 } else {
229 qDebug("Unknown vcard encoding type!");
230 }
231 } else {
232 output = value;
233 }
234
235 if (!m_charset.isEmpty()) {
236 // have to convert the data
237 auto codec = QStringDecoder(m_charset.constData());
238 if (codec.isValid()) {
239 vCardLine->setValue(QVariant::fromValue<QString>(codec.decode(output)));
240 } else {
241 vCardLine->setValue(QString::fromUtf8(output));
242 }
243 } else if (wasBase64Encoded) {
244 vCardLine->setValue(output);
245 } else {
246 vCardLine->setValue(QString::fromUtf8(output));
247 }
248}
249
250////
251
252VCardParser::VCardParser()
253{
254}
255
256VCardParser::~VCardParser()
257{
258}
259
260VCard::List VCardParser::parseVCards(const QByteArray &text)
261{
262 VCard currentVCard;
263 VCard::List vCardList;
264 QByteArray currentLine;
265
266 int lineStart = 0;
267 int lineEnd = text.indexOf('\n');
268
269 bool inVCard = false;
270
271 StringCache cache;
272 for (; lineStart != text.size() + 1;
273 lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) {
274 QByteArray cur = text.mid(lineStart, lineEnd - lineStart);
275 // remove the trailing \r, left from \r\n
276 if (cur.endsWith('\r')) {
277 cur.chop(1);
278 }
279
280 if (cur.startsWith(' ') //
281 || cur.startsWith('\t')) { // folded line => append to previous
282 currentLine.append(cur.mid(1));
283 continue;
284 } else {
285 if (cur.trimmed().isEmpty()) { // empty line
286 continue;
287 }
288 if (inVCard && !currentLine.isEmpty()) { // now parse the line
289 VCardLine vCardLine;
290
291 // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support)
292 auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray {
293 const QByteArray ret = cur;
294 lineStart = lineEnd + 1;
295 lineEnd = text.indexOf('\n', lineStart);
296 if (lineEnd != -1) {
297 cur = text.mid(lineStart, lineEnd - lineStart);
298 // remove the trailing \r, left from \r\n
299 if (cur.endsWith('\r')) {
300 cur.chop(1);
301 }
302 }
303 return ret;
304 };
305
306 VCardLineParser lineParser(cache, fetchAnotherLine);
307
308 lineParser.parseLine(currentLine, &vCardLine);
309
310 currentVCard.addLine(vCardLine);
311 }
312
313 // we do not save the start and end tag as vcardline
314 if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) {
315 inVCard = true;
316 currentLine.clear();
317 currentVCard.clear(); // flush vcard
318 continue;
319 }
320
321 if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) {
322 inVCard = false;
323 vCardList.append(currentVCard);
324 currentLine.clear();
325 currentVCard.clear(); // flush vcard
326 continue;
327 }
328
329 currentLine = cur;
330 }
331 }
332
333 return vCardList;
334}
335
336static const int FOLD_WIDTH = 75;
337
338QByteArray VCardParser::createVCards(const VCard::List &list)
339{
340 QByteArray text;
341 QByteArray textLine;
342 QString encodingType;
343 QStringList params;
344 QStringList values;
345
346 VCardLine::List lines;
347
348 bool hasEncoding;
349
350 text.reserve(list.size() * 300); // reserve memory to be more efficient
351
352 // iterate over the cards
353 for (const VCard &card : list) {
354 text.append("BEGIN:VCARD\r\n");
355
356 QStringList idents = card.identifiers();
357 // VERSION must be first
358 if (idents.contains(QLatin1String("VERSION"))) {
359 const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION")));
360 idents.prepend(str);
361 }
362
363 for (const auto &id : std::as_const(idents)) {
364 lines = card.lines(id);
365
366 // iterate over the lines
367 for (const VCardLine &vline : std::as_const(lines)) {
368 QVariant val = vline.value();
369 if (val.isValid()) {
370 if (vline.hasGroup()) {
371 textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1();
372 } else {
373 textLine = vline.identifier().toLatin1();
374 }
375
376 params = vline.parameterList();
377 hasEncoding = false;
378 if (!params.isEmpty()) { // we have parameters
379 for (const QString &param : std::as_const(params)) {
380 if (param == QLatin1String("encoding")) {
381 hasEncoding = true;
382 encodingType = vline.parameter(QStringLiteral("encoding")).toLower();
383 }
384
385 values = vline.parameters(param);
386 for (const QString &str : std::as_const(values)) {
387 textLine.append(';' + param.toLatin1().toUpper());
388 if (!str.isEmpty()) {
389 textLine.append('=' + str.toLatin1());
390 }
391 }
392 }
393 }
394
395 QByteArray input;
396 QByteArray output;
397 bool checkMultibyte = false; // avoid splitting a multibyte character
398
399 // handle charset
400 const QString charset = vline.parameter(QStringLiteral("charset"));
401 if (!charset.isEmpty()) {
402 // have to convert the data
403 const QString value = vline.value().toString();
404 auto codec = QStringEncoder(charset.toLatin1().constData());
405 if (codec.isValid()) {
406 input = codec.encode(value);
407 } else {
408 checkMultibyte = true;
409 input = value.toUtf8();
410 }
411 } else if (vline.value().userType() == QMetaType::QByteArray) {
412 input = vline.value().toByteArray();
413 } else {
414 checkMultibyte = true;
415 input = vline.value().toString().toUtf8();
416 }
417
418 // handle encoding
419 if (hasEncoding) { // have to encode the data
420 if (encodingType == QLatin1Char('b')) {
421 checkMultibyte = false;
422 output = input.toBase64();
423 } else if (encodingType == QLatin1String("quoted-printable")) {
424 checkMultibyte = false;
425 KCodecs::quotedPrintableEncode(input, output, false);
426 }
427 } else {
428 output = input;
429 }
430 addEscapes(output, (vline.identifier() == QLatin1String("CATEGORIES") || vline.identifier() == QLatin1String("GEO")));
431
432 if (!output.isEmpty()) {
433 textLine.append(':' + output);
434
435 if (textLine.length() > FOLD_WIDTH) { // we have to fold the line
436 if (checkMultibyte) {
437 // RFC 6350: Multi-octet characters MUST remain contiguous.
438 // we know that textLine contains UTF-8 encoded characters
439 int lineLength = 0;
440 for (int i = 0; i < textLine.length(); ++i) {
441 if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows
442 int sequenceLength = 2;
443 if ((textLine[i] & 0xE0) == 0xE0) {
444 sequenceLength = 3;
445 } else if ((textLine[i] & 0xF0) == 0xF0) {
446 sequenceLength = 4;
447 }
448 if ((lineLength + sequenceLength) > FOLD_WIDTH) {
449 // the current line would be too long. fold it
450 text += "\r\n " + textLine.mid(i, sequenceLength);
451 lineLength = 1 + sequenceLength; // incl. leading space
452 } else {
453 text += textLine.mid(i, sequenceLength);
454 lineLength += sequenceLength;
455 }
456 i += sequenceLength - 1;
457 } else {
458 text += textLine[i];
459 ++lineLength;
460 }
461 if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) {
462 text += "\r\n ";
463 lineLength = 1; // leading space
464 }
465 }
466 text += "\r\n";
467 } else {
468 for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) {
469 text.append((i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n");
470 }
471 }
472 } else {
473 text.append(textLine);
474 text.append("\r\n");
475 }
476 }
477 }
478 }
479 }
480
481 text.append("END:VCARD\r\n");
482 text.append("\r\n");
483 }
484
485 return text;
486}
Q_SCRIPTABLE Q_NOREPLY void start()
KCODECS_EXPORT QByteArray quotedPrintableDecode(QByteArrayView in)
KCODECS_EXPORT QByteArray quotedPrintableEncode(QByteArrayView in, bool useCRLF=true)
KIOCORE_EXPORT QStringList list(const QString &fileClass)
QByteArray & append(QByteArrayView data)
void chop(qsizetype n)
void clear()
const char * constData() const const
bool contains(QByteArrayView bv) const const
bool endsWith(QByteArrayView bv) const const
QByteArray fromBase64(const QByteArray &base64, Base64Options options)
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
bool isEmpty() const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
QByteArray & replace(QByteArrayView before, QByteArrayView after)
void reserve(qsizetype size)
qsizetype size() const const
bool startsWith(QByteArrayView bv) const const
QByteArray toBase64(Base64Options options) const const
QByteArray toLower() const const
QByteArray trimmed() const const
const_iterator constEnd() const const
const_iterator constFind(const Key &key) const const
iterator insert(const Key &key, const T &value)
bool isEmpty() const const
void prepend(parameter_type value)
qsizetype size() const const
T takeAt(qsizetype i)
QString fromLatin1(QByteArrayView str)
QString fromUtf8(QByteArrayView str)
bool isEmpty() const const
QByteArray toLatin1() const const
QString toLower() const const
QByteArray toUtf8() const const
bool contains(QLatin1StringView str, Qt::CaseSensitivity cs) const const
qsizetype indexOf(const QRegularExpression &re, qsizetype from) const const
bool isValid() const const
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:14:08 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.