KContacts

vcardparser.cpp
1/*
2 This file is part of the KContacts framework.
3 SPDX-FileCopyrightText: 2003 Tobias Koenig <tokoe@kde.org>
4
5 SPDX-License-Identifier: LGPL-2.0-or-later
6*/
7
8#include "kcontacts_debug.h"
9#include "vcardparser_p.h"
10#include <KCodecs>
11#include <QStringDecoder>
12#include <QStringEncoder>
13#include <functional>
14
15// This cache for QString::fromLatin1() isn't about improving speed, but about reducing memory usage by sharing common strings
16class StringCache
17{
18public:
19 QString fromLatin1(const QByteArray &value)
20 {
21 if (value.isEmpty()) {
22 return QString();
23 }
24
25 auto it = m_values.constFind(value);
26 if (it != m_values.constEnd()) {
27 return it.value();
28 }
29
30 QString string = QString::fromLatin1(value);
31 m_values.insert(value, string);
32 return string;
33 }
34
35private:
36 QHash<QByteArray, QString> m_values;
37};
38
39using namespace KContacts;
40
41static void addEscapes(QByteArray &str, bool excludeEscapedComma)
42{
43 str.replace('\\', "\\\\");
44 if (!excludeEscapedComma) {
45 str.replace(',', "\\,");
46 }
47 str.replace('\r', "\\r");
48 str.replace('\n', "\\n");
49}
50
51static void removeEscapes(QByteArray &str)
52{
53 // It's more likely that no escape is present, so add fast path
54 if (!str.contains('\\')) {
55 return;
56 }
57 str.replace("\\n", "\n");
58 str.replace("\\N", "\n");
59 str.replace("\\r", "\r");
60 str.replace("\\,", ",");
61 str.replace("\\\\", "\\");
62}
63
64class VCardLineParser
65{
66public:
67 VCardLineParser(StringCache &cache, std::function<QByteArray()> fetchAnotherLine)
68 : m_cache(cache)
69 , m_fetchAnotherLine(fetchAnotherLine)
70 {
71 }
72
73 void parseLine(const QByteArray &currentLine, VCardLine *vCardLine);
74
75private:
76 void addParameter(const QByteArray &paramKey, const QByteArray &paramValue);
77
78private:
79 StringCache &m_cache;
80 std::function<QByteArray()> m_fetchAnotherLine;
81
82 VCardLine *m_vCardLine = nullptr;
83 QByteArray m_encoding;
84 QByteArray m_charset;
85};
86
87void VCardLineParser::addParameter(const QByteArray &paramKey, const QByteArray &paramValue)
88{
89 if (paramKey == "encoding") {
90 m_encoding = paramValue.toLower();
91 } else if (paramKey == "charset") {
92 m_charset = paramValue.toLower();
93 }
94 // qDebug() << " add parameter" << paramKey << " = " << paramValue;
95 m_vCardLine->addParameter(m_cache.fromLatin1(paramKey), m_cache.fromLatin1(paramValue));
96}
97
98void VCardLineParser::parseLine(const QByteArray &currentLine, KContacts::VCardLine *vCardLine)
99{
100 // qDebug() << currentLine;
101 m_vCardLine = vCardLine;
102 // The syntax is key:value, but the key can contain semicolon-separated parameters, which can contain a ':', so indexOf(':') is wrong.
103 // EXAMPLE: ADR;GEO="geo:22.500000,45.099998";LABEL="My Label";TYPE=home:P.O. Box 101;;;Any Town;CA;91921-1234;
104 // Therefore we need a small state machine, just the way I like it.
105 enum State {
106 StateInitial,
107 StateParamKey,
108 StateParamValue,
109 StateQuotedValue,
110 StateAfterParamValue,
111 StateValue,
112 };
113 State state = StateInitial;
114 const int lineLength = currentLine.length();
115 const char *lineData = currentLine.constData(); // to skip length checks from at() in debug mode
116 QByteArray paramKey;
117 QByteArray paramValue;
118 int start = 0;
119 int pos = 0;
120 for (; pos < lineLength; ++pos) {
121 const char ch = lineData[pos];
122 const bool colonOrSemicolon = (ch == ';' || ch == ':');
123 switch (state) {
124 case StateInitial:
125 if (colonOrSemicolon) {
126 const QByteArray identifier = currentLine.mid(start, pos - start);
127 // qDebug() << " identifier" << identifier;
128 vCardLine->setIdentifier(m_cache.fromLatin1(identifier));
129 start = pos + 1;
130 }
131 if (ch == ';') {
132 state = StateParamKey;
133 } else if (ch == ':') {
134 state = StateValue;
135 } else if (ch == '.') {
136 vCardLine->setGroup(m_cache.fromLatin1(currentLine.mid(start, pos - start)));
137 start = pos + 1;
138 }
139 break;
140 case StateParamKey:
141 if (colonOrSemicolon || ch == '=') {
142 paramKey = currentLine.mid(start, pos - start);
143 start = pos + 1;
144 }
145 if (colonOrSemicolon) {
146 // correct the so-called 2.1 'standard'
147 paramValue = paramKey;
148 const QByteArray lowerKey = paramKey.toLower();
149 if (lowerKey == "quoted-printable" || lowerKey == "base64") {
150 paramKey = "encoding";
151 } else {
152 paramKey = "type";
153 }
154 addParameter(paramKey, paramValue);
155 }
156 if (ch == ';') {
157 state = StateParamKey;
158 } else if (ch == ':') {
159 state = StateValue;
160 } else if (ch == '=') {
161 state = StateParamValue;
162 }
163 break;
164 case StateQuotedValue:
165 if (ch == '"' || (ch == ',' && paramKey.toLower() == "type")) {
166 // TODO the hack above is for TEL;TYPE=\"voice,home\":... without breaking GEO.... TODO: check spec
167 paramValue = currentLine.mid(start, pos - start);
168 addParameter(paramKey.toLower(), paramValue);
169 start = pos + 1;
170 if (ch == '"') {
171 state = StateAfterParamValue; // to avoid duplicating code with StateParamValue, we use this intermediate state for one char
172 }
173 }
174 break;
175 case StateParamValue:
176 if (colonOrSemicolon || ch == ',') {
177 paramValue = currentLine.mid(start, pos - start);
178 addParameter(paramKey.toLower(), paramValue);
179 start = pos + 1;
180 }
181 // fall-through intended
182 Q_FALLTHROUGH();
183 case StateAfterParamValue:
184 if (ch == ';') {
185 state = StateParamKey;
186 start = pos + 1;
187 } else if (ch == ':') {
188 state = StateValue;
189 } else if (pos == start && ch == '"') { // don't treat foo"bar" as quoted - TODO check the vCard 3.0 spec.
190 state = StateQuotedValue;
191 start = pos + 1;
192 }
193 break;
194 case StateValue:
195 Q_UNREACHABLE();
196 break;
197 }
198
199 if (state == StateValue) {
200 break;
201 }
202 }
203
204 if (state != StateValue) { // invalid line, no ':'
205 return;
206 }
207
208 QByteArray value = currentLine.mid(pos + 1);
209 removeEscapes(value);
210
211 QByteArray output;
212 bool wasBase64Encoded = false;
213
214 if (!m_encoding.isEmpty()) {
215 // have to decode the data
216 if (m_encoding == "b" || m_encoding == "base64") {
217 output = QByteArray::fromBase64(value);
218 wasBase64Encoded = true;
219 } else if (m_encoding == "quoted-printable") {
220 // join any qp-folded lines
221 while (value.endsWith('=')) {
222 value.chop(1); // remove the '='
223 value.append(m_fetchAnotherLine());
224 }
225 KCodecs::quotedPrintableDecode(value, output);
226 } else if (m_encoding == "8bit") {
227 output = value;
228 } else {
229 qDebug("Unknown vcard encoding type!");
230 }
231 } else {
232 output = value;
233 }
234
235 if (!m_charset.isEmpty()) {
236 // have to convert the data
237 auto codec = QStringDecoder(m_charset.constData());
238 if (codec.isValid()) {
239 vCardLine->setValue(QVariant::fromValue<QString>(codec.decode(output)));
240 } else {
241 vCardLine->setValue(QString::fromUtf8(output));
242 }
243 } else if (wasBase64Encoded) {
244 vCardLine->setValue(output);
245 } else {
246 vCardLine->setValue(QString::fromUtf8(output));
247 }
248}
249
250////
251
252VCardParser::VCardParser()
253{
254}
255
256VCardParser::~VCardParser()
257{
258}
259
260VCard::List VCardParser::parseVCards(const QByteArray &text)
261{
262 VCard currentVCard;
263 VCard::List vCardList;
264 QByteArray currentLine;
265
266 int lineStart = 0;
267 int lineEnd = text.indexOf('\n');
268
269 bool inVCard = false;
270
271 StringCache cache;
272 for (; lineStart != text.size() + 1;
273 lineStart = lineEnd + 1, lineEnd = (text.indexOf('\n', lineStart) == -1) ? text.size() : text.indexOf('\n', lineStart)) {
274 QByteArray cur = text.mid(lineStart, lineEnd - lineStart);
275 // remove the trailing \r, left from \r\n
276 if (cur.endsWith('\r')) {
277 cur.chop(1);
278 }
279
280 if (cur.startsWith(' ') //
281 || cur.startsWith('\t')) { // folded line => append to previous
282 currentLine.append(cur.mid(1));
283 continue;
284 } else {
285 if (cur.trimmed().isEmpty()) { // empty line
286 continue;
287 }
288 if (inVCard && !currentLine.isEmpty()) { // now parse the line
289 VCardLine vCardLine;
290
291 // Provide a way for the parseVCardLine function to read more lines (for quoted-printable support)
292 auto fetchAnotherLine = [&text, &lineStart, &lineEnd, &cur]() -> QByteArray {
293 const QByteArray ret = cur;
294 lineStart = lineEnd + 1;
295 lineEnd = text.indexOf('\n', lineStart);
296 if (lineEnd != -1) {
297 cur = text.mid(lineStart, lineEnd - lineStart);
298 // remove the trailing \r, left from \r\n
299 if (cur.endsWith('\r')) {
300 cur.chop(1);
301 }
302 }
303 return ret;
304 };
305
306 VCardLineParser lineParser(cache, fetchAnotherLine);
307
308 lineParser.parseLine(currentLine, &vCardLine);
309
310 currentVCard.addLine(vCardLine);
311 }
312
313 // we do not save the start and end tag as vcardline
314 if (qstrnicmp(cur.constData(), "begin:vcard", 11) == 0) {
315 inVCard = true;
316 currentLine.clear();
317 currentVCard.clear(); // flush vcard
318 continue;
319 }
320
321 if (qstrnicmp(cur.constData(), "end:vcard", 9) == 0) {
322 inVCard = false;
323 vCardList.append(currentVCard);
324 currentLine.clear();
325 currentVCard.clear(); // flush vcard
326 continue;
327 }
328
329 currentLine = cur;
330 }
331 }
332
333 return vCardList;
334}
335
336static const int FOLD_WIDTH = 75;
337
338QByteArray VCardParser::createVCards(const VCard::List &list)
339{
340 QByteArray text;
341 QByteArray textLine;
342 QString encodingType;
343 QStringList params;
344 QStringList values;
345
346 VCardLine::List lines;
347
348 bool hasEncoding;
349
350 text.reserve(list.size() * 300); // reserve memory to be more efficient
351
352 // iterate over the cards
353 for (const VCard &card : list) {
354 text.append("BEGIN:VCARD\r\n");
355
356 QStringList idents = card.identifiers();
357 // VERSION must be first
358 if (idents.contains(QLatin1String("VERSION"))) {
359 const QString str = idents.takeAt(idents.indexOf(QLatin1String("VERSION")));
360 idents.prepend(str);
361 }
362
363 for (const auto &id : std::as_const(idents)) {
364 lines = card.lines(id);
365
366 // iterate over the lines
367 for (const VCardLine &vline : std::as_const(lines)) {
368 QVariant val = vline.value();
369 if (val.isValid()) {
370 if (vline.hasGroup()) {
371 textLine = vline.group().toLatin1() + '.' + vline.identifier().toLatin1();
372 } else {
373 textLine = vline.identifier().toLatin1();
374 }
375
376 params = vline.parameterList();
377 hasEncoding = false;
378 if (!params.isEmpty()) { // we have parameters
379 for (const QString &param : std::as_const(params)) {
380 if (param == QLatin1String("encoding")) {
381 hasEncoding = true;
382 encodingType = vline.parameter(QStringLiteral("encoding")).toLower();
383 }
384
385 values = vline.parameters(param);
386 for (const QString &str : std::as_const(values)) {
387 textLine.append(';' + param.toLatin1().toUpper());
388 if (!str.isEmpty()) {
389 textLine.append('=' + str.toLatin1());
390 }
391 }
392 }
393 }
394
395 QByteArray input;
396 QByteArray output;
397 bool checkMultibyte = false; // avoid splitting a multibyte character
398
399 // handle charset
400 const QString charset = vline.parameter(QStringLiteral("charset"));
401 if (!charset.isEmpty()) {
402 // have to convert the data
403 const QString value = vline.value().toString();
404 auto codec = QStringEncoder(charset.toLatin1().constData());
405 if (codec.isValid()) {
406 input = codec.encode(value);
407 } else {
408 checkMultibyte = true;
409 input = value.toUtf8();
410 }
411 } else if (vline.value().userType() == QMetaType::QByteArray) {
412 input = vline.value().toByteArray();
413 } else {
414 checkMultibyte = true;
415 input = vline.value().toString().toUtf8();
416 }
417
418 // handle encoding
419 if (hasEncoding) { // have to encode the data
420 if (encodingType == QLatin1Char('b')) {
421 checkMultibyte = false;
422 output = input.toBase64();
423 } else if (encodingType == QLatin1String("quoted-printable")) {
424 checkMultibyte = false;
425 KCodecs::quotedPrintableEncode(input, output, false);
426 }
427 } else {
428 output = input;
429 }
430 addEscapes(output, (vline.identifier() == QLatin1String("CATEGORIES") || vline.identifier() == QLatin1String("GEO")));
431
432 if (!output.isEmpty()) {
433 textLine.append(':' + output);
434
435 if (textLine.length() > FOLD_WIDTH) { // we have to fold the line
436 if (checkMultibyte) {
437 // RFC 6350: Multi-octet characters MUST remain contiguous.
438 // we know that textLine contains UTF-8 encoded characters
439 int lineLength = 0;
440 for (int i = 0; i < textLine.length(); ++i) {
441 if ((textLine[i] & 0xC0) == 0xC0) { // a multibyte sequence follows
442 int sequenceLength = 2;
443 if ((textLine[i] & 0xE0) == 0xE0) {
444 sequenceLength = 3;
445 } else if ((textLine[i] & 0xF0) == 0xF0) {
446 sequenceLength = 4;
447 }
448 if ((lineLength + sequenceLength) > FOLD_WIDTH) {
449 // the current line would be too long. fold it
450 text += "\r\n " + textLine.mid(i, sequenceLength);
451 lineLength = 1 + sequenceLength; // incl. leading space
452 } else {
453 text += textLine.mid(i, sequenceLength);
454 lineLength += sequenceLength;
455 }
456 i += sequenceLength - 1;
457 } else {
458 text += textLine[i];
459 ++lineLength;
460 }
461 if ((lineLength == FOLD_WIDTH) && (i < (textLine.length() - 1))) {
462 text += "\r\n ";
463 lineLength = 1; // leading space
464 }
465 }
466 text += "\r\n";
467 } else {
468 for (int i = 0; i <= (textLine.length() / FOLD_WIDTH); ++i) {
469 text.append((i == 0 ? "" : " ") + textLine.mid(i * FOLD_WIDTH, FOLD_WIDTH) + "\r\n");
470 }
471 }
472 } else {
473 text.append(textLine);
474 text.append("\r\n");
475 }
476 }
477 }
478 }
479 }
480
481 text.append("END:VCARD\r\n");
482 text.append("\r\n");
483 }
484
485 return text;
486}
Q_SCRIPTABLE Q_NOREPLY void start()
KCODECS_EXPORT QByteArray quotedPrintableDecode(QByteArrayView in)
KCODECS_EXPORT QByteArray quotedPrintableEncode(QByteArrayView in, bool useCRLF=true)
KIOCORE_EXPORT QStringList list(const QString &fileClass)
QByteArray & append(QByteArrayView data)
void chop(qsizetype n)
void clear()
const char * constData() const const
bool contains(QByteArrayView bv) const const
bool endsWith(QByteArrayView bv) const const
QByteArray fromBase64(const QByteArray &base64, Base64Options options)
qsizetype indexOf(QByteArrayView bv, qsizetype from) const const
bool isEmpty() const const
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
QByteArray & replace(QByteArrayView before, QByteArrayView after)
void reserve(qsizetype size)
qsizetype size() const const
bool startsWith(QByteArrayView bv) const const
QByteArray toBase64(Base64Options options) const const
QByteArray toLower() const const
QByteArray trimmed() const const
bool isEmpty() const const
void prepend(parameter_type value)
qsizetype size() const const
T takeAt(qsizetype i)
QString fromLatin1(QByteArrayView str)
QString fromUtf8(QByteArrayView str)
bool isEmpty() const const
QByteArray toLatin1() const const
QString toLower() const const
QByteArray toUtf8() const const
bool contains(QLatin1StringView str, Qt::CaseSensitivity cs) const const
qsizetype indexOf(const QRegularExpression &re, qsizetype from) const const
QVariant fromValue(T &&value)
bool isValid() const const
T value() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 31 2025 12:06:29 by doxygen 1.13.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.