KIO

dataprotocol.cpp
1/*
2 Implementation of the data protocol (rfc 2397)
3
4 SPDX-FileCopyrightText: 2002, 2003 Leo Savernik <l.savernik@aon.at>
5
6 SPDX-License-Identifier: LGPL-2.0-only
7*/
8
9#include "dataprotocol_p.h"
10
11#include "global.h"
12#include "metadata.h"
13
14#include <QByteArray>
15#include <QStringDecoder>
16
17using namespace KIO;
18
19/** structure containing header information */
20struct DataHeader {
21 QString mime_type; // MIME type of content (lowercase)
22 MetaData attributes; // attribute/value pairs (attribute lowercase,
23 // value unchanged)
24 bool is_base64; // true if data is base64 encoded
25 QByteArray url; // reference to decoded url
26 int data_offset; // zero-indexed position within url
27 // where the real data begins. May point beyond
28 // the end to indicate that there is no data
29};
30
31/** returns the position of the first occurrence of any of the given
32 * characters @p c1 or comma (',') or semicolon (';') or buf.length()
33 * if none is contained.
34 *
35 * @param buf buffer where to look for c
36 * @param begin zero-indexed starting position
37 * @param c1 character to find or '\0' to ignore
38 */
39static int find(const QByteArray &buf, int begin, const char c1)
40{
41 static const char comma = ',';
42 static const char semicolon = ';';
43 int pos = begin;
44 int size = buf.length();
45 while (pos < size) {
46 const char ch = buf[pos];
47 if (ch == comma || ch == semicolon || (c1 != '\0' && ch == c1)) {
48 break;
49 }
50 pos++;
51 } /*wend*/
52 return pos;
53}
54
55/** extracts the string between the current position @p pos and the first
56 * occurrence of either @p c1 or comma (',') or semicolon (';') exclusively
57 * and updates @p pos to point at the found delimiter or at the end of the
58 * buffer if neither character occurred.
59 * @param buf buffer where to look for
60 * @param pos zero-indexed position within buffer
61 * @param c1 character to find or '\0' to ignore
62 */
63static inline QString extract(const QByteArray &buf, int &pos, const char c1 = '\0')
64{
65 int oldpos = pos;
66 pos = find(buf, oldpos, c1);
67 return QString::fromLatin1(buf.mid(oldpos, pos - oldpos));
68}
69
70/** ignores all whitespaces
71 * @param buf buffer to operate on
72 * @param pos position to shift to first non-whitespace character
73 * Upon return @p pos will either point to the first non-whitespace
74 * character or to the end of the buffer.
75 */
76static inline void ignoreWS(const QByteArray &buf, int &pos)
77{
78 int size = buf.length();
79 while (pos < size && (buf[pos] == ' ' || buf[pos] == '\t')) {
80 ++pos;
81 }
82}
83
84/** parses a quoted string as per rfc 822.
85 *
86 * If trailing quote is missing, the whole rest of the buffer is returned.
87 * @param buf buffer to operate on
88 * @param pos position pointing to the leading quote
89 * @return the extracted string. @p pos will be updated to point to the
90 * character following the trailing quote.
91 */
92static QString parseQuotedString(const QByteArray &buf, int &pos)
93{
94 int size = buf.length();
95 QString res;
96 res.reserve(size); // can't be larger than buf
97 pos++; // jump over leading quote
98 bool escaped = false; // if true means next character is literal
99 bool parsing = true; // true as long as end quote not found
100 while (parsing && pos < size) {
101 const QChar ch = QLatin1Char(buf[pos++]);
102 if (escaped) {
103 res += ch;
104 escaped = false;
105 } else {
106 switch (ch.unicode()) {
107 case '"':
108 parsing = false;
109 break;
110 case '\\':
111 escaped = true;
112 break;
113 default:
114 res += ch;
115 break;
116 } /*end switch*/
117 } /*end if*/
118 } /*wend*/
119 res.squeeze();
120 return res;
121}
122
123/** parses the header of a data url
124 * @param url the data url
125 * @param mimeOnly if the only interesting information is the MIME type
126 * @return DataHeader structure with the header information
127 */
128static DataHeader parseDataHeader(const QUrl &url, const bool mimeOnly)
129{
130 DataHeader header_info;
131
132 // initialize header info members
133 header_info.mime_type = QStringLiteral("text/plain");
134 header_info.attributes.insert(QStringLiteral("charset"), QStringLiteral("us-ascii"));
135 header_info.is_base64 = false;
136
137 // decode url and save it
138 const QByteArray &raw_url = header_info.url = QByteArray::fromPercentEncoding(url.path(QUrl::FullyEncoded).toLatin1());
139 const int raw_url_len = raw_url.length();
140
141 header_info.data_offset = 0;
142
143 // read MIME type
144 if (raw_url_len == 0) {
145 return header_info;
146 }
147 const QString mime_type = extract(raw_url, header_info.data_offset).trimmed();
148 if (!mime_type.isEmpty()) {
149 header_info.mime_type = mime_type;
150 }
151 if (mimeOnly) {
152 return header_info;
153 }
154
155 if (header_info.data_offset >= raw_url_len) {
156 return header_info;
157 }
158 // jump over delimiter token and return if data reached
159 if (raw_url[header_info.data_offset++] == ',') {
160 return header_info;
161 }
162
163 // read all attributes and store them
164 bool data_begin_reached = false;
165 while (!data_begin_reached && header_info.data_offset < raw_url_len) {
166 // read attribute
167 const QString attribute = extract(raw_url, header_info.data_offset, '=').trimmed();
168 if (header_info.data_offset >= raw_url_len || raw_url[header_info.data_offset] != '=') {
169 // no assignment, must be base64 option
170 if (attribute == QLatin1String("base64")) {
171 header_info.is_base64 = true;
172 }
173 } else {
174 header_info.data_offset++; // jump over '=' token
175
176 // read value
177 ignoreWS(raw_url, header_info.data_offset);
178 if (header_info.data_offset >= raw_url_len) {
179 return header_info;
180 }
181
182 QString value;
183 if (raw_url[header_info.data_offset] == '"') {
184 value = parseQuotedString(raw_url, header_info.data_offset);
185 ignoreWS(raw_url, header_info.data_offset);
186 } else {
187 value = extract(raw_url, header_info.data_offset).trimmed();
188 }
189
190 // add attribute to map
191 header_info.attributes[attribute.toLower()] = value;
192
193 } /*end if*/
194 if (header_info.data_offset < raw_url_len && raw_url[header_info.data_offset] == ',') {
195 data_begin_reached = true;
196 }
197 header_info.data_offset++; // jump over separator token
198 } /*wend*/
199
200 return header_info;
201}
202
203DataProtocol::DataProtocol()
204{
205}
206
207DataProtocol::~DataProtocol() = default;
208
209void DataProtocol::get(const QUrl &url)
210{
211 ref();
212 // qDebug() << this;
213
214 const DataHeader hdr = parseDataHeader(url, false);
215
216 const int size = hdr.url.length();
217 const int data_ofs = qMin(hdr.data_offset, size);
218 // FIXME: string is copied, would be nice if we could have a reference only
219 const QByteArray url_data = hdr.url.mid(data_ofs);
220 QByteArray outData;
221
222 if (hdr.is_base64) {
223 // base64 stuff is expected to contain the correct charset, so we just
224 // decode it and pass it to the receiver
225 outData = QByteArray::fromBase64(url_data);
226 } else {
227 QStringDecoder codec(hdr.attributes[QStringLiteral("charset")].toLatin1().constData());
228 if (codec.isValid()) {
229 outData = QString(codec.decode(url_data)).toUtf8();
230 } else {
231 outData = url_data;
232 } /*end if*/
233 } /*end if*/
234
235 // qDebug() << "emit mimeType@"<<this;
236 Q_EMIT mimeType(hdr.mime_type);
237 // qDebug() << "emit totalSize@"<<this;
238 Q_EMIT totalSize(outData.size());
239
240 // qDebug() << "emit setMetaData@"<<this;
241 setAllMetaData(hdr.attributes);
242
243 // qDebug() << "emit sendMetaData@"<<this;
244 sendMetaData();
245 // qDebug() << "(1) queue size " << dispatchQueue.size();
246 // empiric studies have shown that this shouldn't be queued & dispatched
247 Q_EMIT data(outData);
248 // qDebug() << "(2) queue size " << dispatchQueue.size();
249 dispatch_data(QByteArray{});
250 // qDebug() << "(3) queue size " << dispatchQueue.size();
251 dispatch_finished();
252 // qDebug() << "(4) queue size " << dispatchQueue.size();
253 deref();
254}
255
256/* --------------------------------------------------------------------- */
257
258void DataProtocol::mimetype(const QUrl &url)
259{
260 ref();
261 Q_EMIT mimeType(parseDataHeader(url, true).mime_type);
262 Q_EMIT finished();
263 deref();
264}
265
266/* --------------------------------------------------------------------- */
267
268#if !defined(TESTKIO)
269#include "moc_dataprotocol_p.cpp"
270#endif
MetaData is a simple map of key/value strings.
KCALUTILS_EXPORT QString mimeType()
A namespace for KIO globals.
QAction * find(const QObject *recvr, const char *slot, QObject *parent)
const QList< QKeySequence > & begin()
QByteArray fromBase64(const QByteArray &base64, Base64Options options)
QByteArray fromPercentEncoding(const QByteArray &input, char percent)
qsizetype length() const const
QByteArray mid(qsizetype pos, qsizetype len) const const
qsizetype size() const const
char16_t & unicode()
iterator insert(const Key &key, const T &value)
QString fromLatin1(QByteArrayView str)
bool isEmpty() const const
void reserve(qsizetype size)
void squeeze()
QByteArray toLatin1() const const
QString toLower() const const
QByteArray toUtf8() const const
QString trimmed() const const
FullyEncoded
QString path(ComponentFormattingOptions options) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:56:12 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.