• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KIOSlave

  • sources
  • kde-4.12
  • kdelibs
  • kioslave
  • http
parsinghelpers.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE libraries
2  Copyright (C) 2008 Andreas Hartmetz <ahartmetz@gmail.com>
3  Copyright (C) 2010,2011 Rolf Eike Beer <kde@opensource.sf-tec.de>
4 
5  This library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Library General Public
7  License as published by the Free Software Foundation; either
8  version 2 of the License, or (at your option) any later version.
9 
10  This library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  Library General Public License for more details.
14 
15  You should have received a copy of the GNU Library General Public License
16  along with this library; see the file COPYING.LIB. If not, write to
17  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  Boston, MA 02110-1301, USA.
19 */
20 
21 #include <ctype.h>
22 
23 #include <QDir>
24 #include <QMap>
25 #include <QTextCodec>
26 #include <QUrl>
27 
28 #include <kcodecs.h>
29 #include <kdebug.h>
30 
31 // Advance *pos beyond spaces / tabs
32 static void skipSpace(const char input[], int *pos, int end)
33 {
34  int idx = *pos;
35  while (idx < end && (input[idx] == ' ' || input[idx] == '\t')) {
36  idx++;
37  }
38  *pos = idx;
39  return;
40 }
41 
42 // Advance *pos to start of next line while being forgiving about line endings.
43 // Return false if the end of the header has been reached, true otherwise.
44 static bool nextLine(const char input[], int *pos, int end)
45 {
46  int idx = *pos;
47  while (idx < end && input[idx] != '\r' && input[idx] != '\n') {
48  idx++;
49  }
50  int rCount = 0;
51  int nCount = 0;
52  while (idx < end && qMax(rCount, nCount) < 2 && (input[idx] == '\r' || input[idx] == '\n')) {
53  input[idx] == '\r' ? rCount++ : nCount++;
54  idx++;
55  }
56  if (idx < end && qMax(rCount, nCount) == 2 && qMin(rCount, nCount) == 1) {
57  // if just one of the others is missing eat it too.
58  // this ensures that conforming headers using the proper
59  // \r\n sequence (and also \n\r) will be parsed correctly.
60  if ((rCount == 1 && input[idx] == '\r') || (nCount == 1 && input[idx] == '\n')) {
61  idx++;
62  }
63  }
64 
65  *pos = idx;
66  return idx < end && rCount < 2 && nCount < 2;
67 }
68 
69 // QByteArray::fromPercentEncoding() does not notify us about encoding errors so we need
70 // to check here if this is valid at all.
71 static bool isValidPercentEncoding(const QByteArray &data)
72 {
73  int i = 0;
74  const int last = data.length() - 1;
75  const char *d = data.constData();
76 
77  while ( (i = data.indexOf('%', i)) != -1) {
78  if ( i >= last - 2 )
79  return false;
80  if ( ! isxdigit(d[i + 1]) )
81  return false;
82  if ( ! isxdigit(d[i + 2]) )
83  return false;
84  i++;
85  }
86 
87  return true;
88 }
89 
90 QByteArray TokenIterator::next()
91 {
92  QPair<int, int> token = m_tokens[m_currentToken++];
93  //fromRawData brings some speed advantage but also the requirement to keep the text buffer
94  //around. this together with implicit sharing (you don't know where copies end up)
95  //is dangerous!
96  //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
97  return QByteArray(&m_buffer[token.first], token.second - token.first);
98 }
99 
100 QByteArray TokenIterator::current() const
101 {
102  QPair<int, int> token = m_tokens[m_currentToken - 1];
103  //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
104  return QByteArray(&m_buffer[token.first], token.second - token.first);
105 }
106 
107 QList<QByteArray> TokenIterator::all() const
108 {
109  QList<QByteArray> ret;
110  for (int i = 0; i < m_tokens.count(); i++) {
111  QPair<int, int> token = m_tokens[i];
112  ret.append(QByteArray(&m_buffer[token.first], token.second - token.first));
113  }
114  return ret;
115 }
116 
117 
118 HeaderTokenizer::HeaderTokenizer(char *buffer)
119  : m_buffer(buffer)
120 {
121  // add information about available headers and whether they have one or multiple,
122  // comma-separated values.
123 
124  //The following response header fields are from RFC 2616 unless otherwise specified.
125  //Hint: search the web for e.g. 'http "accept-ranges header"' to find information about
126  //a header field.
127  static const HeaderFieldTemplate headerFieldTemplates[] = {
128  {"accept-ranges", false},
129  {"age", false},
130  {"cache-control", true},
131  {"connection", true},
132  {"content-disposition", false}, //is multi-valued in a way, but with ";" separator!
133  {"content-encoding", true},
134  {"content-language", true},
135  {"content-length", false},
136  {"content-location", false},
137  {"content-md5", false},
138  {"content-type", false},
139  {"date", false},
140  {"dav", true}, //RFC 2518
141  {"etag", false},
142  {"expires", false},
143  {"keep-alive", true}, //RFC 2068
144  {"last-modified", false},
145  {"link", false}, //RFC 2068, multi-valued with ";" separator
146  {"location", false},
147  {"p3p", true}, // http://www.w3.org/TR/P3P/
148  {"pragma", true},
149  {"proxy-authenticate", false}, //complicated multi-valuedness: quoted commas don't separate
150  //multiple values. we handle this at a higher level.
151  {"proxy-connection", true}, //inofficial but well-known; to avoid misunderstandings
152  //when using "connection" when talking to a proxy.
153  {"refresh", false}, //not sure, only found some mailing list posts mentioning it
154  {"set-cookie", false}, //RFC 2109; the multi-valuedness seems to be usually achieved
155  //by sending several instances of this field as opposed to
156  //usually comma-separated lists with maybe multiple instances.
157  {"transfer-encoding", true},
158  {"upgrade", true},
159  {"warning", true},
160  {"www-authenticate", false} //see proxy-authenticate
161  };
162 
163  for (uint i = 0; i < sizeof(headerFieldTemplates) / sizeof(HeaderFieldTemplate); i++) {
164  const HeaderFieldTemplate &ft = headerFieldTemplates[i];
165  insert(QByteArray(ft.name), HeaderField(ft.isMultiValued));
166  }
167 }
168 
169 int HeaderTokenizer::tokenize(int begin, int end)
170 {
171  char *buf = m_buffer; //keep line length in check :/
172  int idx = begin;
173  int startIdx = begin; //multi-purpose start of current token
174  bool multiValuedEndedWithComma = false; //did the last multi-valued line end with a comma?
175  QByteArray headerKey;
176  do {
177 
178  if (buf[idx] == ' ' || buf [idx] == '\t') {
179  // line continuation; preserve startIdx except (see below)
180  if (headerKey.isEmpty()) {
181  continue;
182  }
183  // turn CR/LF into spaces for later parsing convenience
184  int backIdx = idx - 1;
185  while (backIdx >= begin && (buf[backIdx] == '\r' || buf[backIdx] == '\n')) {
186  buf[backIdx--] = ' ';
187  }
188 
189  // multiple values, comma-separated: add new value or continue previous?
190  if (operator[](headerKey).isMultiValued) {
191  if (multiValuedEndedWithComma) {
192  // start new value; this is almost like no line continuation
193  skipSpace(buf, &idx, end);
194  startIdx = idx;
195  } else {
196  // continue previous value; this is tricky. unit tests to the rescue!
197  if (operator[](headerKey).beginEnd.last().first == startIdx) {
198  // remove entry, it will be re-added because already idx != startIdx
199  operator[](headerKey).beginEnd.removeLast();
200  } else {
201  // no comma, no entry: the prev line was whitespace only - start new value
202  skipSpace(buf, &idx, end);
203  startIdx = idx;
204  }
205  }
206  }
207 
208  } else {
209  // new field
210  startIdx = idx;
211  // also make sure that there is at least one char after the colon
212  while (idx < (end - 1) && buf[idx] != ':' && buf[idx] != '\r' && buf[idx] != '\n') {
213  buf[idx] = tolower(buf[idx]);
214  idx++;
215  }
216  if (buf[idx] != ':') {
217  //malformed line: no colon
218  headerKey.clear();
219  continue;
220  }
221  headerKey = QByteArray(&buf[startIdx], idx - startIdx);
222  if (!contains(headerKey)) {
223  //we don't recognize this header line
224  headerKey.clear();
225  continue;
226  }
227  // skip colon & leading whitespace
228  idx++;
229  skipSpace(buf, &idx, end);
230  startIdx = idx;
231  }
232 
233  // we have the name/key of the field, now parse the value
234  if (!operator[](headerKey).isMultiValued) {
235 
236  // scan to end of line
237  while (idx < end && buf[idx] != '\r' && buf[idx] != '\n') {
238  idx++;
239  }
240  if (!operator[](headerKey).beginEnd.isEmpty()) {
241  // there already is an entry; are we just in a line continuation?
242  if (operator[](headerKey).beginEnd.last().first == startIdx) {
243  // line continuation: delete previous entry and later insert a new, longer one.
244  operator[](headerKey).beginEnd.removeLast();
245  }
246  }
247  operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
248 
249  } else {
250 
251  // comma-separated list
252  while (true) {
253  //skip one value
254  while (idx < end && buf[idx] != '\r' && buf[idx] != '\n' && buf[idx] != ',') {
255  idx++;
256  }
257  if (idx != startIdx) {
258  operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
259  }
260  multiValuedEndedWithComma = buf[idx] == ',';
261  //skip comma(s) and leading whitespace, if any respectively
262  while (idx < end && buf[idx] == ',') {
263  idx++;
264  }
265  skipSpace(buf, &idx, end);
266  //next value or end-of-line / end of header?
267  if (buf[idx] >= end || buf[idx] == '\r' || buf[idx] == '\n') {
268  break;
269  }
270  //next value
271  startIdx = idx;
272  }
273  }
274  } while (nextLine(buf, &idx, end));
275  return idx;
276 }
277 
278 
279 TokenIterator HeaderTokenizer::iterator(const char *key) const
280 {
281  QByteArray keyBa = QByteArray::fromRawData(key, strlen(key));
282  if (contains(keyBa)) {
283  return TokenIterator(value(keyBa).beginEnd, m_buffer);
284  } else {
285  return TokenIterator(m_nullTokens, m_buffer);
286  }
287 }
288 
289 static void skipLWS(const QString &str, int &pos)
290 {
291  while (pos < str.length() && (str[pos] == QLatin1Char(' ') || str[pos] == QLatin1Char('\t'))) {
292  ++pos;
293  }
294 }
295 
296 // keep the common ending, this allows the compiler to join them
297 static const char typeSpecials[] = "{}*'%()<>@,;:\\\"/[]?=";
298 static const char attrSpecials[] = "'%()<>@,;:\\\"/[]?=";
299 static const char valueSpecials[] = "()<>@,;:\\\"/[]?=";
300 
301 static bool specialChar(const QChar &ch, const char *specials)
302 {
303  // WORKAROUND: According to RFC 2616, any character other than ascii
304  // characters should NOT be allowed in unquoted content-disposition file
305  // names. However, since none of the major browsers follow this rule, we do
306  // the same thing here and allow all printable unicode characters. See
307  // https://bugs.kde.org/show_bug.cgi?id=261223 for the detials.
308  if (!ch.isPrint()) {
309  return true;
310  }
311 
312  for (int i = qstrlen(specials) - 1; i >= 0; i--) {
313  if (ch == QLatin1Char(specials[i])) {
314  return true;
315  }
316  }
317 
318  return false;
319 }
320 
336 static QString extractUntil(const QString &str, QChar term, int &pos, const char *specials)
337 {
338  QString out;
339  skipLWS(str, pos);
340  bool valid = true;
341 
342  while (pos < str.length() && (str[pos] != term)) {
343  out += str[pos];
344  valid = (valid && !specialChar(str[pos], specials));
345  ++pos;
346  }
347 
348  if (pos < str.length()) { // Stopped due to finding term
349  ++pos;
350  }
351 
352  if (!valid) {
353  return QString();
354  }
355 
356  // Remove trailing linear whitespace...
357  while (out.endsWith(QLatin1Char(' ')) || out.endsWith(QLatin1Char('\t'))) {
358  out.chop(1);
359  }
360 
361  if (out.contains(QLatin1Char(' '))) {
362  out.clear();
363  }
364 
365  return out;
366 }
367 
368 // As above, but also handles quotes..
369 // pos is set to -1 on parse error
370 static QString extractMaybeQuotedUntil(const QString &str, int &pos)
371 {
372  const QChar term = QLatin1Char(';');
373 
374  skipLWS(str, pos);
375 
376  // Are we quoted?
377  if (pos < str.length() && str[pos] == QLatin1Char('"')) {
378  QString out;
379 
380  // Skip the quote...
381  ++pos;
382 
383  // when quoted we also need an end-quote
384  bool endquote = false;
385 
386  // Parse until trailing quote...
387  while (pos < str.length()) {
388  if (str[pos] == QLatin1Char('\\') && pos + 1 < str.length()) {
389  // quoted-pair = "\" CHAR
390  out += str[pos + 1];
391  pos += 2; // Skip both...
392  } else if (str[pos] == QLatin1Char('"')) {
393  ++pos;
394  endquote = true;
395  break;
396  } else if (!str[pos].isPrint()) { // Don't allow CTL's RFC 2616 sec 2.2
397  break;
398  } else {
399  out += str[pos];
400  ++pos;
401  }
402  }
403 
404  if (!endquote) {
405  pos = -1;
406  return QString();
407  }
408 
409  // Skip until term..
410  while (pos < str.length() && (str[pos] != term)) {
411  if ((str[pos] != QLatin1Char(' ')) && (str[pos] != QLatin1Char('\t'))) {
412  pos = -1;
413  return QString();
414  }
415  ++pos;
416  }
417 
418  if (pos < str.length()) { // Stopped due to finding term
419  ++pos;
420  }
421 
422  return out;
423  } else {
424  return extractUntil(str, term, pos, valueSpecials);
425  }
426 }
427 
428 static QMap<QString, QString> contentDispositionParserInternal(const QString &disposition)
429 {
430  kDebug(7113) << "disposition: " << disposition;
431  int pos = 0;
432  const QString strDisposition = extractUntil(disposition, QLatin1Char(';'), pos, typeSpecials).toLower();
433 
434  QMap<QString, QString> parameters;
435  QMap<QString, QString> contparams; // all parameters that contain continuations
436  QMap<QString, QString> encparams; // all parameters that have character encoding
437 
438  // the type is invalid, the complete header is junk
439  if (strDisposition.isEmpty()) {
440  return parameters;
441  }
442 
443  parameters.insert(QLatin1String("type"), strDisposition);
444 
445  while (pos < disposition.length()) {
446  QString key = extractUntil(disposition, QLatin1Char('='), pos, attrSpecials).toLower();
447 
448  if (key.isEmpty()) {
449  // parse error in this key: do not parse more, but add up
450  // everything we already got
451  kDebug(7113) << "parse error in key, abort parsing";
452  break;
453  }
454 
455  QString val;
456  if (key.endsWith(QLatin1Char('*'))) {
457  val = extractUntil(disposition, QLatin1Char(';'), pos, valueSpecials);
458  } else {
459  val = extractMaybeQuotedUntil(disposition, pos);
460  }
461 
462  if (val.isEmpty()) {
463  if (pos == -1) {
464  kDebug(7113) << "parse error in value, abort parsing";
465  break;
466  }
467  continue;
468  }
469 
470  const int spos = key.indexOf(QLatin1Char('*'));
471  if (spos == key.length() - 1) {
472  key.chop(1);
473  encparams.insert(key, val);
474  } else if (spos >= 0) {
475  contparams.insert(key, val);
476  } else if (parameters.contains(key)) {
477  kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
478  parameters.remove(key);
479  return parameters;
480  } else {
481  parameters.insert(key, val);
482  }
483  }
484 
485  QMap<QString, QString>::iterator i = contparams.begin();
486  while (i != contparams.end()) {
487  QString key = i.key();
488  int spos = key.indexOf(QLatin1Char('*'));
489  bool hasencoding = false;
490 
491  if (key.at(spos + 1) != QLatin1Char('0')) {
492  ++i;
493  continue;
494  }
495 
496  // no leading zeros allowed, so delete the junk
497  int klen = key.length();
498  if (klen > spos + 2) {
499  // nothing but continuations and encodings may insert * into parameter name
500  if ((klen > spos + 3) || ((klen == spos + 3) && (key.at(spos + 2) != QLatin1Char('*')))) {
501  kDebug(7113) << "removing invalid key " << key << "with val" << i.value() << key.at(spos + 2);
502  i = contparams.erase(i);
503  continue;
504  }
505  hasencoding = true;
506  }
507 
508  int seqnum = 1;
509  QMap<QString, QString>::iterator partsi;
510  // we do not need to care about encoding specifications: only the first
511  // part is allowed to have one
512  QString val = i.value();
513 
514  key.chop(hasencoding ? 2 : 1);
515 
516  while ((partsi = contparams.find(key + QString::number(seqnum))) != contparams.end()) {
517  val += partsi.value();
518  contparams.erase(partsi);
519  }
520 
521  i = contparams.erase(i);
522 
523  key.chop(1);
524  if (hasencoding) {
525  encparams.insert(key, val);
526  } else {
527  if (parameters.contains(key)) {
528  kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
529  parameters.remove(key);
530  return parameters;
531  }
532 
533  parameters.insert(key, val);
534  }
535  }
536 
537  for (QMap<QString, QString>::iterator i = encparams.begin(); i != encparams.end(); ++i) {
538  QString val = i.value();
539 
540  // RfC 2231 encoded character set in filename
541  int spos = val.indexOf(QLatin1Char('\''));
542  if (spos == -1) {
543  continue;
544  }
545  int npos = val.indexOf(QLatin1Char('\''), spos + 1);
546  if (npos == -1) {
547  continue;
548  }
549 
550  const QString charset = val.left(spos);
551  const QString lang = val.mid(spos + 1, npos - spos - 1);
552  const QByteArray encodedVal = val.mid(npos + 1).toLatin1();
553 
554  if ( ! isValidPercentEncoding(encodedVal) )
555  continue;
556 
557  const QByteArray rawval = QByteArray::fromPercentEncoding(encodedVal);
558 
559  if (charset.isEmpty() || (charset == QLatin1String("us-ascii"))) {
560  bool valid = true;
561  for (int j = rawval.length() - 1; (j >= 0) && valid; j--) {
562  valid = (rawval.at(j) >= 32);
563  }
564 
565  if (!valid)
566  continue;
567  val = QString::fromLatin1(rawval.constData());
568  } else {
569  QTextCodec *codec = QTextCodec::codecForName(charset.toLatin1());
570  if (!codec)
571  continue;
572  val = codec->toUnicode(rawval);
573  }
574 
575  parameters.insert(i.key(), val);
576  }
577 
578  return parameters;
579 }
580 
581 static QMap<QString, QString> contentDispositionParser(const QString &disposition)
582 {
583  QMap<QString, QString> parameters = contentDispositionParserInternal(disposition);
584 
585  const QLatin1String fn("filename");
586  if (parameters.contains(fn)) {
587  // Content-Disposition is not allowed to dictate directory
588  // path, thus we extract the filename only.
589  const QString val = QDir::toNativeSeparators(parameters[fn]);
590  int slpos = val.lastIndexOf(QDir::separator());
591 
592  if (slpos > -1) {
593  parameters.insert(fn, val.mid(slpos + 1));
594  }
595  }
596 
597  return parameters;
598 }
kdebug.h
extractMaybeQuotedUntil
static QString extractMaybeQuotedUntil(const QString &str, int &pos)
Definition: parsinghelpers.cpp:370
nextLine
static bool nextLine(const char input[], int *pos, int end)
Definition: parsinghelpers.cpp:44
TokenIterator::next
QByteArray next()
Definition: parsinghelpers.cpp:90
valueSpecials
static const char valueSpecials[]
Definition: parsinghelpers.cpp:299
isxdigit
#define isxdigit(c)
contentDispositionParserInternal
static QMap< QString, QString > contentDispositionParserInternal(const QString &disposition)
Definition: parsinghelpers.cpp:428
QString
kDebug
static QDebug kDebug(bool cond, int area=KDE_DEFAULT_DEBUG_AREA)
insert
KGuiItem insert()
isValidPercentEncoding
static bool isValidPercentEncoding(const QByteArray &data)
Definition: parsinghelpers.cpp:71
skipSpace
static void skipSpace(const char input[], int *pos, int end)
Definition: parsinghelpers.cpp:32
TokenIterator
Definition: parsinghelpers.h:39
typeSpecials
static const char typeSpecials[]
Definition: parsinghelpers.cpp:297
TokenIterator::current
QByteArray current() const
Definition: parsinghelpers.cpp:100
skipLWS
static void skipLWS(const QString &str, int &pos)
Definition: parsinghelpers.cpp:289
TokenIterator::all
QList< QByteArray > all() const
Definition: parsinghelpers.cpp:107
contentDispositionParser
static QMap< QString, QString > contentDispositionParser(const QString &disposition)
Definition: parsinghelpers.cpp:581
HeaderField
Definition: parsinghelpers.h:27
QPair
HeaderTokenizer::HeaderTokenizer
HeaderTokenizer(char *buffer)
Definition: parsinghelpers.cpp:118
kcodecs.h
extractUntil
static QString extractUntil(const QString &str, QChar term, int &pos, const char *specials)
read and parse the input until the given terminator
Definition: parsinghelpers.cpp:336
QMap< QString, QString >
HeaderTokenizer::tokenize
int tokenize(int begin, int end)
Definition: parsinghelpers.cpp:169
QList< QByteArray >
specialChar
static bool specialChar(const QChar &ch, const char *specials)
Definition: parsinghelpers.cpp:301
attrSpecials
static const char attrSpecials[]
Definition: parsinghelpers.cpp:298
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:50:58 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KIOSlave

Skip menu "KIOSlave"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal