Libksieve

parser.cpp
1 /* -*- c++ -*-
2  parser/parser.cpp
3 
4  This file is part of KSieve,
5  the KDE internet mail/usenet news message filtering library.
6  Copyright (c) 2002-2003 Marc Mutz <[email protected]>
7 
8  KSieve is free software; you can redistribute it and/or modify it
9  under the terms of the GNU General Public License, version 2, as
10  published by the Free Software Foundation.
11 
12  KSieve is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to the Free Software
19  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 
21  In addition, as a special exception, the copyright holders give
22  permission to link the code of this program with any edition of
23  the Qt library by Trolltech AS, Norway (or with modified versions
24  of Qt that use the same license as Qt), and distribute linked
25  combinations including the two. You must obey the GNU General
26  Public License in all respects for all of the code used other than
27  Qt. If you modify this file, you may extend this exception to
28  your version of the file, but you are not obligated to do so. If
29  you do not wish to do so, delete this exception statement from
30  your version.
31 */
32 
33 #include <ksieve/parser.h>
34 #include <impl/parser.h>
35 
36 #include <ksieve/error.h>
37 
38 #include <QString>
39 #include <QByteArray>
40 
41 #include <assert.h>
42 #include <limits.h> // ULONG_MAX
43 #include <ctype.h> // isdigit
44 
45 namespace KSieve {
46 //
47 //
48 // Parser Bridge implementation
49 //
50 //
51 
52 Parser::Parser(const char *scursor, const char *const send, int options)
53  : i(nullptr)
54 {
55  i = new Impl(scursor, send, options);
56 }
57 
58 Parser::~Parser()
59 {
60  delete i;
61  i = nullptr;
62 }
63 
64 void Parser::setScriptBuilder(ScriptBuilder *builder)
65 {
66  assert(i);
67  i->mBuilder = builder;
68 }
69 
70 ScriptBuilder *Parser::scriptBuilder() const
71 {
72  assert(i);
73  return i->mBuilder;
74 }
75 
76 const Error &Parser::error() const
77 {
78  assert(i);
79  return i->error();
80 }
81 
82 bool Parser::parse()
83 {
84  assert(i);
85  return i->parse();
86 }
87 }
88 
89 static inline unsigned long factorForQuantifier(char ch)
90 {
91  switch (ch) {
92  case 'g':
93  case 'G':
94  return 1024 * 1024 * 1024;
95  case 'm':
96  case 'M':
97  return 1024 * 1024;
98  case 'k':
99  case 'K':
100  return 1024;
101  default:
102  assert(0); // lexer should prohibit this
103  return 1; // make compiler happy
104  }
105 }
106 
107 static inline bool willOverflowULong(unsigned long result, unsigned long add)
108 {
109  static const unsigned long maxULongByTen = (unsigned long)(ULONG_MAX / 10.0);
110  return result > maxULongByTen || ULONG_MAX - 10 * result < add;
111 }
112 
113 namespace KSieve {
114 //
115 //
116 // Parser Implementation
117 //
118 //
119 
120 Parser::Impl::Impl(const char *scursor, const char *const send, int options)
121  : mToken(Lexer::None)
122  , lexer(scursor, send, options)
123  , mBuilder(nullptr)
124 {
125 }
126 
127 bool Parser::Impl::isStringToken() const
128 {
129  return token() == Lexer::QuotedString
130  || token() == Lexer::MultiLineString;
131 }
132 
133 bool Parser::Impl::isArgumentToken() const
134 {
135  return isStringToken()
136  || token() == Lexer::Number
137  || token() == Lexer::Tag
138  || (token() == Lexer::Special && mTokenValue == QLatin1String("["));
139 }
140 
141 bool Parser::Impl::obtainToken()
142 {
143  while (!mToken && !lexer.atEnd() && !lexer.error()) {
144  mToken = lexer.nextToken(mTokenValue);
145  if (lexer.error()) {
146  break;
147  }
148  // comments and line feeds are semantically invisible and may
149  // appear anywhere, so we handle them here centrally:
150  switch (token()) {
151  case Lexer::HashComment:
152  if (scriptBuilder()) {
153  scriptBuilder()->hashComment(tokenValue());
154  }
155  consumeToken();
156  break;
157  case Lexer::BracketComment:
158  if (scriptBuilder()) {
159  scriptBuilder()->bracketComment(tokenValue());
160  }
161  consumeToken();
162  break;
163  case Lexer::LineFeeds:
164  for (unsigned int i = 0, end = tokenValue().toUInt(); i < end; ++i) {
165  if (scriptBuilder()) { // better check every iteration, b/c
166  // we call out to ScriptBuilder,
167  // where nasty things might happen!
168  scriptBuilder()->lineFeed();
169  }
170  }
171  consumeToken();
172  break;
173  default:
174  ; // make compiler happy
175  }
176  }
177  if (lexer.error() && scriptBuilder()) {
178  scriptBuilder()->error(lexer.error());
179  }
180  return !lexer.error();
181 }
182 
183 bool Parser::Impl::parse()
184 {
185  // this is the entry point: START := command-list
186  if (!parseCommandList()) {
187  return false;
188  }
189  if (!atEnd()) {
190  makeUnexpectedTokenError(Error::ExpectedCommand);
191  return false;
192  }
193  if (scriptBuilder()) {
194  scriptBuilder()->finished();
195  }
196  return true;
197 }
198 
199 bool Parser::Impl::parseCommandList()
200 {
201  // our ABNF:
202  // command-list := *comand
203 
204  while (!atEnd()) {
205  if (!obtainToken()) {
206  return false;
207  }
208  if (token() == Lexer::None) {
209  continue;
210  }
211  if (token() != Lexer::Identifier) {
212  return true;
213  }
214  if (!parseCommand()) {
215  assert(error());
216  return false;
217  }
218  }
219  return true;
220 }
221 
222 bool Parser::Impl::parseCommand()
223 {
224  // command := identifier arguments ( ";" / block )
225  // arguments := *argument [ test / test-list ]
226  // block := "{" *command "}"
227  // our ABNF:
228  // block := "{" [ command-list ] "}"
229 
230  if (atEnd()) {
231  return false;
232  }
233 
234  //
235  // identifier
236  //
237 
238  if (!obtainToken() || token() != Lexer::Identifier) {
239  return false;
240  }
241 
242  if (scriptBuilder()) {
243  scriptBuilder()->commandStart(tokenValue(), lexer.line());
244  }
245  consumeToken();
246 
247  //
248  // *argument
249  //
250 
251  if (!obtainToken()) {
252  return false;
253  }
254 
255  if (atEnd()) {
256  makeError(Error::MissingSemicolonOrBlock);
257  return false;
258  }
259 
260  if (isArgumentToken() && !parseArgumentList()) {
261  assert(error());
262  return false;
263  }
264 
265  //
266  // test / test-list
267  //
268 
269  if (!obtainToken()) {
270  return false;
271  }
272 
273  if (atEnd()) {
274  makeError(Error::MissingSemicolonOrBlock);
275  return false;
276  }
277 
278  if (token() == Lexer::Special && tokenValue() == QLatin1Char('(')) { // test-list
279  if (!parseTestList()) {
280  assert(error());
281  return false;
282  }
283  } else if (token() == Lexer::Identifier) { // should be test:
284  if (!parseTest()) {
285  assert(error());
286  return false;
287  }
288  }
289 
290  //
291  // ";" / block
292  //
293 
294  if (!obtainToken()) {
295  return false;
296  }
297 
298  if (atEnd()) {
299  makeError(Error::MissingSemicolonOrBlock);
300  return false;
301  }
302 
303  if (token() != Lexer::Special) {
304  makeUnexpectedTokenError(Error::ExpectedBlockOrSemicolon);
305  return false;
306  }
307 
308  if (tokenValue() == QLatin1Char(';')) {
309  consumeToken();
310  } else if (tokenValue() == QLatin1String("{")) { // block
311  if (!parseBlock()) {
312  return false; // it's an error since we saw '{'
313  }
314  } else {
315  makeError(Error::MissingSemicolonOrBlock);
316  return false;
317  }
318 
319  if (scriptBuilder()) {
320  scriptBuilder()->commandEnd(lexer.line());
321  }
322  return true;
323 }
324 
325 bool Parser::Impl::parseArgumentList()
326 {
327  // our ABNF:
328  // argument-list := *argument
329 
330  while (!atEnd()) {
331  if (!obtainToken()) {
332  return false;
333  }
334  if (!isArgumentToken()) {
335  return true;
336  }
337  if (!parseArgument()) {
338  return !error();
339  }
340  }
341  return true;
342 }
343 
344 bool Parser::Impl::parseArgument()
345 {
346  // argument := string-list / number / tag
347 
348  if (!obtainToken() || atEnd()) {
349  return false;
350  }
351 
352  if (token() == Lexer::Number) {
353  if (!parseNumber()) {
354  assert(error());
355  return false;
356  }
357  return true;
358  } else if (token() == Lexer::Tag) {
359  if (scriptBuilder()) {
360  scriptBuilder()->taggedArgument(tokenValue());
361  }
362  consumeToken();
363  return true;
364  } else if (isStringToken()) {
365  if (scriptBuilder()) {
366  scriptBuilder()->stringArgument(tokenValue(), token() == Lexer::MultiLineString, QString());
367  }
368  consumeToken();
369  return true;
370  } else if (token() == Lexer::Special && tokenValue() == QLatin1String("[")) {
371  if (!parseStringList()) {
372  assert(error());
373  return false;
374  }
375  return true;
376  }
377 
378  return false;
379 }
380 
381 bool Parser::Impl::parseTestList()
382 {
383  // test-list := "(" test *("," test) ")"
384 
385  if (!obtainToken() || atEnd()) {
386  return false;
387  }
388 
389  if (token() != Lexer::Special || tokenValue() != QLatin1String("(")) {
390  return false;
391  }
392  if (scriptBuilder()) {
393  scriptBuilder()->testListStart();
394  }
395  consumeToken();
396 
397  // generic while/switch construct for comma-separated lists. See
398  // parseStringList() for another one. Any fix here is like to apply there, too.
399  bool lastWasComma = true;
400  while (!atEnd()) {
401  if (!obtainToken()) {
402  return false;
403  }
404 
405  switch (token()) {
406  case Lexer::None:
407  break;
408  case Lexer::Special:
409  assert(tokenValue().length() == 1);
410  assert(tokenValue().at(0).toLatin1());
411  switch (tokenValue().at(0).toLatin1()) {
412  case ')':
413  consumeToken();
414  if (lastWasComma) {
415  makeError(Error::ConsecutiveCommasInTestList);
416  return false;
417  }
418  if (scriptBuilder()) {
419  scriptBuilder()->testListEnd();
420  }
421  return true;
422  case ',':
423  consumeToken();
424  if (lastWasComma) {
425  makeError(Error::ConsecutiveCommasInTestList);
426  return false;
427  }
428  lastWasComma = true;
429  break;
430  default:
431  makeError(Error::NonStringInStringList);
432  return false;
433  }
434  break;
435 
436  case Lexer::Identifier:
437  if (!lastWasComma) {
438  makeError(Error::MissingCommaInTestList);
439  return false;
440  } else {
441  lastWasComma = false;
442  if (!parseTest()) {
443  assert(error());
444  return false;
445  }
446  }
447  break;
448 
449  default:
450  makeUnexpectedTokenError(Error::NonTestInTestList);
451  return false;
452  }
453  }
454 
455  makeError(Error::PrematureEndOfTestList);
456  return false;
457 }
458 
459 bool Parser::Impl::parseTest()
460 {
461  // test := identifier arguments
462  // arguments := *argument [ test / test-list ]
463 
464  //
465  // identifier
466  //
467 
468  if (!obtainToken() || atEnd()) {
469  return false;
470  }
471 
472  if (token() != Lexer::Identifier) {
473  return false;
474  }
475 
476  if (scriptBuilder()) {
477  scriptBuilder()->testStart(tokenValue());
478  }
479  consumeToken();
480 
481  //
482  // *argument
483  //
484 
485  if (!obtainToken()) {
486  return false;
487  }
488 
489  if (atEnd()) { // a test w/o args
490  goto TestEnd;
491  }
492 
493  if (isArgumentToken() && !parseArgumentList()) {
494  assert(error());
495  return false;
496  }
497 
498  //
499  // test / test-list
500  //
501 
502  if (!obtainToken()) {
503  return false;
504  }
505 
506  if (atEnd()) { // a test w/o nested tests
507  goto TestEnd;
508  }
509 
510  if (token() == Lexer::Special && tokenValue() == QLatin1Char('(')) { // test-list
511  if (!parseTestList()) {
512  assert(error());
513  return false;
514  }
515  } else if (token() == Lexer::Identifier) { // should be test:
516  if (!parseTest()) {
517  assert(error());
518  return false;
519  }
520  }
521 
522 TestEnd:
523  if (scriptBuilder()) {
524  scriptBuilder()->testEnd();
525  }
526  return true;
527 }
528 
529 bool Parser::Impl::parseBlock()
530 {
531  // our ABNF:
532  // block := "{" [ command-list ] "}"
533 
534  if (!obtainToken() || atEnd()) {
535  return false;
536  }
537 
538  if (token() != Lexer::Special || tokenValue() != QLatin1String("{")) {
539  return false;
540  }
541  if (scriptBuilder()) {
542  scriptBuilder()->blockStart(lexer.line());
543  }
544  consumeToken();
545 
546  if (!obtainToken()) {
547  return false;
548  }
549 
550  if (atEnd()) {
551  makeError(Error::PrematureEndOfBlock);
552  return false;
553  }
554 
555  if (token() == Lexer::Identifier) {
556  if (!parseCommandList()) {
557  assert(error());
558  return false;
559  }
560  }
561 
562  if (!obtainToken()) {
563  return false;
564  }
565 
566  if (atEnd()) {
567  makeError(Error::PrematureEndOfBlock);
568  return false;
569  }
570 
571  if (token() != Lexer::Special || tokenValue() != QLatin1String("}")) {
572  makeError(Error::NonCommandInCommandList);
573  return false;
574  }
575  if (scriptBuilder()) {
576  scriptBuilder()->blockEnd(lexer.line());
577  }
578  consumeToken();
579  return true;
580 }
581 
582 bool Parser::Impl::parseStringList()
583 {
584  // string-list := "[" string *("," string) "]" / string
585  // ;; if there is only a single string, the brackets are optional
586  //
587  // However, since strings are already handled separately from
588  // string lists in parseArgument(), our ABNF is modified to:
589  // string-list := "[" string *("," string) "]"
590 
591  if (!obtainToken() || atEnd()) {
592  return false;
593  }
594 
595  if (token() != Lexer::Special || tokenValue() != QLatin1String("[")) {
596  return false;
597  }
598 
599  if (scriptBuilder()) {
600  scriptBuilder()->stringListArgumentStart();
601  }
602  consumeToken();
603 
604  // generic while/switch construct for comma-separated lists. See
605  // parseTestList() for another one. Any fix here is like to apply there, too.
606  bool lastWasComma = true;
607  while (!atEnd()) {
608  if (!obtainToken()) {
609  return false;
610  }
611 
612  switch (token()) {
613  case Lexer::None:
614  break;
615  case Lexer::Special:
616  assert(tokenValue().length() == 1);
617  switch (tokenValue().at(0).toLatin1()) {
618  case ']':
619  consumeToken();
620  if (lastWasComma) {
621  makeError(Error::ConsecutiveCommasInStringList);
622  return false;
623  }
624  if (scriptBuilder()) {
625  scriptBuilder()->stringListArgumentEnd();
626  }
627  return true;
628  case ',':
629  consumeToken();
630  if (lastWasComma) {
631  makeError(Error::ConsecutiveCommasInStringList);
632  return false;
633  }
634  lastWasComma = true;
635  break;
636  default:
637  makeError(Error::NonStringInStringList);
638  return false;
639  }
640  break;
641 
642  case Lexer::QuotedString:
643  case Lexer::MultiLineString:
644  if (!lastWasComma) {
645  makeError(Error::MissingCommaInStringList);
646  return false;
647  }
648  lastWasComma = false;
649  if (scriptBuilder()) {
650  scriptBuilder()->stringListEntry(tokenValue(), token() == Lexer::MultiLineString, QString());
651  }
652  consumeToken();
653  break;
654 
655  default:
656  makeError(Error::NonStringInStringList);
657  return false;
658  }
659  }
660 
661  makeError(Error::PrematureEndOfStringList);
662  return false;
663 }
664 
665 bool Parser::Impl::parseNumber()
666 {
667  // The lexer returns the number including the quantifier as a
668  // single token value. Here, we split is an check that the number
669  // is not out of range:
670 
671  if (!obtainToken() || atEnd()) {
672  return false;
673  }
674 
675  if (token() != Lexer::Number) {
676  return false;
677  }
678 
679  // number:
680  unsigned long result = 0;
681  int i = 0;
682  const QByteArray s = tokenValue().toLatin1();
683  for (const int len = s.length(); i < len && isdigit(s[i]); ++i) {
684  const unsigned long digitValue = s[i] - '0';
685  if (willOverflowULong(result, digitValue)) {
686  makeError(Error::NumberOutOfRange);
687  return false;
688  } else {
689  result *= 10;
690  result += digitValue;
691  }
692  }
693 
694  // optional quantifier:
695  char quantifier = '\0';
696  if (i < s.length()) {
697  assert(i + 1 == s.length());
698  quantifier = s[i];
699  const unsigned long factor = factorForQuantifier(quantifier);
700  if (result > double(ULONG_MAX) / double(factor)) {
701  makeError(Error::NumberOutOfRange);
702  return false;
703  }
704  result *= factor;
705  }
706 
707  if (scriptBuilder()) {
708  scriptBuilder()->numberArgument(result, quantifier);
709  }
710  consumeToken();
711  return true;
712 }
713 } // namespace KSieve
int length() const const
void error(QWidget *parent, const QString &text, const QString &caption=QString(), Options options=Notify)
KDEGAMES_EXPORT QAction * end(const QObject *recvr, const char *slot, QObject *parent)
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Thu Jun 4 2020 23:09:06 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.