Libksieve

parser.cpp
1/* -*- c++ -*-
2 parser/parser.cpp
3
4 This file is part of KSieve,
5 the KDE internet mail/usenet news message filtering library.
6 SPDX-FileCopyrightText: 2002-2003 Marc Mutz <mutz@kde.org>
7
8 SPDX-License-Identifier: GPL-2.0-only
9*/
10
11#include "parser_p.h"
12
13#include "error.h"
14
15#include <QByteArray>
16#include <QString>
17
18#include <cassert>
19#include <cctype> // isdigit
20#include <climits> // ULONG_MAX
21
22namespace KSieve
23{
24//
25//
26// Parser Bridge implementation
27//
28//
29
30Parser::Parser(const char *scursor, const char *const send, int options)
31 : i(new Impl(scursor, send, options))
32{
33}
34
35Parser::~Parser()
36{
37 delete i;
38 i = nullptr;
39}
40
41void Parser::setScriptBuilder(ScriptBuilder *builder)
42{
43 assert(i);
44 i->mBuilder = builder;
45}
46
47ScriptBuilder *Parser::scriptBuilder() const
48{
49 assert(i);
50 return i->mBuilder;
51}
52
53const Error &Parser::error() const
54{
55 assert(i);
56 return i->error();
57}
58
59bool Parser::parse()
60{
61 assert(i);
62 return i->parse();
63}
64}
65
66static inline unsigned long factorForQuantifier(char ch)
67{
68 switch (ch) {
69 case 'g':
70 case 'G':
71 return 1024 * 1024 * 1024;
72 case 'm':
73 case 'M':
74 return 1024 * 1024;
75 case 'k':
76 case 'K':
77 return 1024;
78 default:
79 assert(0); // lexer should prohibit this
80 return 1; // make compiler happy
81 }
82}
83
84static inline bool willOverflowULong(unsigned long result, unsigned long add)
85{
86 static const auto maxULongByTen = (unsigned long)(ULONG_MAX / 10.0);
87 return result > maxULongByTen || ULONG_MAX - 10 * result < add;
88}
89
90namespace KSieve
91{
92//
93//
94// Parser Implementation
95//
96//
97
98Parser::Impl::Impl(const char *scursor, const char *const send, int options)
99 : mToken(Lexer::None)
100 , lexer(scursor, send, options)
101 , mBuilder(nullptr)
102{
103}
104
105bool Parser::Impl::isStringToken() const
106{
107 return token() == Lexer::QuotedString || token() == Lexer::MultiLineString;
108}
109
110bool Parser::Impl::isArgumentToken() const
111{
112 return isStringToken() || token() == Lexer::Number || token() == Lexer::Tag || (token() == Lexer::Special && mTokenValue == QLatin1StringView("["));
113}
114
115bool Parser::Impl::obtainToken()
116{
117 while (!mToken && !lexer.atEnd() && !lexer.error()) {
118 mToken = lexer.nextToken(mTokenValue);
119 if (lexer.error()) {
120 break;
121 }
122 // comments and line feeds are semantically invisible and may
123 // appear anywhere, so we handle them here centrally:
124 switch (token()) {
125 case Lexer::HashComment:
126 if (scriptBuilder()) {
127 scriptBuilder()->hashComment(tokenValue());
128 }
129 consumeToken();
130 break;
131 case Lexer::BracketComment:
132 if (scriptBuilder()) {
133 scriptBuilder()->bracketComment(tokenValue());
134 }
135 consumeToken();
136 break;
137 case Lexer::LineFeeds:
138 for (unsigned int i = 0, end = tokenValue().toUInt(); i < end; ++i) {
139 if (scriptBuilder()) { // better check every iteration, b/c
140 // we call out to ScriptBuilder,
141 // where nasty things might happen!
142 scriptBuilder()->lineFeed();
143 }
144 }
145 consumeToken();
146 break;
147 default:; // make compiler happy
148 }
149 }
150 if (lexer.error() && scriptBuilder()) {
151 scriptBuilder()->error(lexer.error());
152 }
153 return !lexer.error();
154}
155
156bool Parser::Impl::parse()
157{
158 // this is the entry point: START := command-list
159 if (!parseCommandList()) {
160 return false;
161 }
162 if (!atEnd()) {
163 makeUnexpectedTokenError(Error::ExpectedCommand);
164 return false;
165 }
166 if (scriptBuilder()) {
167 scriptBuilder()->finished();
168 }
169 return true;
170}
171
172bool Parser::Impl::parseCommandList()
173{
174 // our ABNF:
175 // command-list := *command
176
177 while (!atEnd()) {
178 if (!obtainToken()) {
179 return false;
180 }
181 if (token() == Lexer::None) {
182 continue;
183 }
184 if (token() != Lexer::Identifier) {
185 return true;
186 }
187 if (!parseCommand()) {
188 assert(error());
189 return false;
190 }
191 }
192 return true;
193}
194
195bool Parser::Impl::parseCommand()
196{
197 // command := identifier arguments ( ";" / block )
198 // arguments := *argument [ test / test-list ]
199 // block := "{" *command "}"
200 // our ABNF:
201 // block := "{" [ command-list ] "}"
202
203 if (atEnd()) {
204 return false;
205 }
206
207 //
208 // identifier
209 //
210
211 if (!obtainToken() || token() != Lexer::Identifier) {
212 return false;
213 }
214
215 if (scriptBuilder()) {
216 scriptBuilder()->commandStart(tokenValue(), lexer.line());
217 }
218 consumeToken();
219
220 //
221 // *argument
222 //
223
224 if (!obtainToken()) {
225 return false;
226 }
227
228 if (atEnd()) {
229 makeError(Error::MissingSemicolonOrBlock);
230 return false;
231 }
232
233 if (isArgumentToken() && !parseArgumentList()) {
234 assert(error());
235 return false;
236 }
237
238 //
239 // test / test-list
240 //
241
242 if (!obtainToken()) {
243 return false;
244 }
245
246 if (atEnd()) {
247 makeError(Error::MissingSemicolonOrBlock);
248 return false;
249 }
250
251 if (token() == Lexer::Special && tokenValue() == QLatin1Char('(')) { // test-list
252 if (!parseTestList()) {
253 assert(error());
254 return false;
255 }
256 } else if (token() == Lexer::Identifier) { // should be test:
257 if (!parseTest()) {
258 assert(error());
259 return false;
260 }
261 }
262
263 //
264 // ";" / block
265 //
266
267 if (!obtainToken()) {
268 return false;
269 }
270
271 if (atEnd()) {
272 makeError(Error::MissingSemicolonOrBlock);
273 return false;
274 }
275
276 if (token() != Lexer::Special) {
277 makeUnexpectedTokenError(Error::ExpectedBlockOrSemicolon);
278 return false;
279 }
280
281 if (tokenValue() == QLatin1Char(';')) {
282 consumeToken();
283 } else if (tokenValue() == QLatin1StringView("{")) { // block
284 if (!parseBlock()) {
285 return false; // it's an error since we saw '{'
286 }
287 } else {
288 makeError(Error::MissingSemicolonOrBlock);
289 return false;
290 }
291
292 if (scriptBuilder()) {
293 scriptBuilder()->commandEnd(lexer.line());
294 }
295 return true;
296}
297
298bool Parser::Impl::parseArgumentList()
299{
300 // our ABNF:
301 // argument-list := *argument
302
303 while (!atEnd()) {
304 if (!obtainToken()) {
305 return false;
306 }
307 if (!isArgumentToken()) {
308 return true;
309 }
310 if (!parseArgument()) {
311 return !error();
312 }
313 }
314 return true;
315}
316
317bool Parser::Impl::parseArgument()
318{
319 // argument := string-list / number / tag
320
321 if (!obtainToken() || atEnd()) {
322 return false;
323 }
324
325 if (token() == Lexer::Number) {
326 if (!parseNumber()) {
327 assert(error());
328 return false;
329 }
330 return true;
331 } else if (token() == Lexer::Tag) {
332 if (scriptBuilder()) {
333 scriptBuilder()->taggedArgument(tokenValue());
334 }
335 consumeToken();
336 return true;
337 } else if (isStringToken()) {
338 if (scriptBuilder()) {
339 scriptBuilder()->stringArgument(tokenValue(), token() == Lexer::MultiLineString, QString());
340 }
341 consumeToken();
342 return true;
343 } else if (token() == Lexer::Special && tokenValue() == QLatin1StringView("[")) {
344 if (!parseStringList()) {
345 assert(error());
346 return false;
347 }
348 return true;
349 }
350
351 return false;
352}
353
354bool Parser::Impl::parseTestList()
355{
356 // test-list := "(" test *("," test) ")"
357
358 if (!obtainToken() || atEnd()) {
359 return false;
360 }
361
362 if (token() != Lexer::Special || tokenValue() != QLatin1StringView("(")) {
363 return false;
364 }
365 if (scriptBuilder()) {
366 scriptBuilder()->testListStart();
367 }
368 consumeToken();
369
370 // generic while/switch construct for comma-separated lists. See
371 // parseStringList() for another one. Any fix here is like to apply there, too.
372 bool lastWasComma = true;
373 while (!atEnd()) {
374 if (!obtainToken()) {
375 return false;
376 }
377
378 switch (token()) {
379 case Lexer::None:
380 break;
381 case Lexer::Special:
382 assert(tokenValue().length() == 1);
383 assert(tokenValue().at(0).toLatin1());
384 switch (tokenValue().at(0).toLatin1()) {
385 case ')':
386 consumeToken();
387 if (lastWasComma) {
388 makeError(Error::ConsecutiveCommasInTestList);
389 return false;
390 }
391 if (scriptBuilder()) {
392 scriptBuilder()->testListEnd();
393 }
394 return true;
395 case ',':
396 consumeToken();
397 if (lastWasComma) {
398 makeError(Error::ConsecutiveCommasInTestList);
399 return false;
400 }
401 lastWasComma = true;
402 break;
403 default:
404 makeError(Error::NonStringInStringList);
405 return false;
406 }
407 break;
408
409 case Lexer::Identifier:
410 if (!lastWasComma) {
411 makeError(Error::MissingCommaInTestList);
412 return false;
413 } else {
414 lastWasComma = false;
415 if (!parseTest()) {
416 assert(error());
417 return false;
418 }
419 }
420 break;
421
422 default:
423 makeUnexpectedTokenError(Error::NonTestInTestList);
424 return false;
425 }
426 }
427
428 makeError(Error::PrematureEndOfTestList);
429 return false;
430}
431
432bool Parser::Impl::parseTest()
433{
434 // test := identifier arguments
435 // arguments := *argument [ test / test-list ]
436
437 //
438 // identifier
439 //
440
441 if (!obtainToken() || atEnd()) {
442 return false;
443 }
444
445 if (token() != Lexer::Identifier) {
446 return false;
447 }
448
449 if (scriptBuilder()) {
450 scriptBuilder()->testStart(tokenValue());
451 }
452 consumeToken();
453
454 //
455 // *argument
456 //
457
458 if (!obtainToken()) {
459 return false;
460 }
461
462 if (atEnd()) { // a test w/o args
463 goto TestEnd;
464 }
465
466 if (isArgumentToken() && !parseArgumentList()) {
467 assert(error());
468 return false;
469 }
470
471 //
472 // test / test-list
473 //
474
475 if (!obtainToken()) {
476 return false;
477 }
478
479 if (atEnd()) { // a test w/o nested tests
480 goto TestEnd;
481 }
482
483 if (token() == Lexer::Special && tokenValue() == QLatin1Char('(')) { // test-list
484 if (!parseTestList()) {
485 assert(error());
486 return false;
487 }
488 } else if (token() == Lexer::Identifier) { // should be test:
489 if (!parseTest()) {
490 assert(error());
491 return false;
492 }
493 }
494
495TestEnd:
496 if (scriptBuilder()) {
497 scriptBuilder()->testEnd();
498 }
499 return true;
500}
501
502bool Parser::Impl::parseBlock()
503{
504 // our ABNF:
505 // block := "{" [ command-list ] "}"
506
507 if (!obtainToken() || atEnd()) {
508 return false;
509 }
510
511 if (token() != Lexer::Special || tokenValue() != QLatin1StringView("{")) {
512 return false;
513 }
514 if (scriptBuilder()) {
515 scriptBuilder()->blockStart(lexer.line());
516 }
517 consumeToken();
518
519 if (!obtainToken()) {
520 return false;
521 }
522
523 if (atEnd()) {
524 makeError(Error::PrematureEndOfBlock);
525 return false;
526 }
527
528 if (token() == Lexer::Identifier) {
529 if (!parseCommandList()) {
530 assert(error());
531 return false;
532 }
533 }
534
535 if (!obtainToken()) {
536 return false;
537 }
538
539 if (atEnd()) {
540 makeError(Error::PrematureEndOfBlock);
541 return false;
542 }
543
544 if (token() != Lexer::Special || tokenValue() != QLatin1StringView("}")) {
545 makeError(Error::NonCommandInCommandList);
546 return false;
547 }
548 if (scriptBuilder()) {
549 scriptBuilder()->blockEnd(lexer.line());
550 }
551 consumeToken();
552 return true;
553}
554
555bool Parser::Impl::parseStringList()
556{
557 // string-list := "[" string *("," string) "]" / string
558 // ;; if there is only a single string, the brackets are optional
559 //
560 // However, since strings are already handled separately from
561 // string lists in parseArgument(), our ABNF is modified to:
562 // string-list := "[" string *("," string) "]"
563
564 if (!obtainToken() || atEnd()) {
565 return false;
566 }
567
568 if (token() != Lexer::Special || tokenValue() != QLatin1StringView("[")) {
569 return false;
570 }
571
572 if (scriptBuilder()) {
573 scriptBuilder()->stringListArgumentStart();
574 }
575 consumeToken();
576
577 // generic while/switch construct for comma-separated lists. See
578 // parseTestList() for another one. Any fix here is like to apply there, too.
579 bool lastWasComma = true;
580 while (!atEnd()) {
581 if (!obtainToken()) {
582 return false;
583 }
584
585 switch (token()) {
586 case Lexer::None:
587 break;
588 case Lexer::Special:
589 assert(tokenValue().length() == 1);
590 switch (tokenValue().at(0).toLatin1()) {
591 case ']':
592 consumeToken();
593 if (lastWasComma) {
594 makeError(Error::ConsecutiveCommasInStringList);
595 return false;
596 }
597 if (scriptBuilder()) {
598 scriptBuilder()->stringListArgumentEnd();
599 }
600 return true;
601 case ',':
602 consumeToken();
603 if (lastWasComma) {
604 makeError(Error::ConsecutiveCommasInStringList);
605 return false;
606 }
607 lastWasComma = true;
608 break;
609 default:
610 makeError(Error::NonStringInStringList);
611 return false;
612 }
613 break;
614
615 case Lexer::QuotedString:
616 case Lexer::MultiLineString:
617 if (!lastWasComma) {
618 makeError(Error::MissingCommaInStringList);
619 return false;
620 }
621 lastWasComma = false;
622 if (scriptBuilder()) {
623 scriptBuilder()->stringListEntry(tokenValue(), token() == Lexer::MultiLineString, QString());
624 }
625 consumeToken();
626 break;
627
628 default:
629 makeError(Error::NonStringInStringList);
630 return false;
631 }
632 }
633
634 makeError(Error::PrematureEndOfStringList);
635 return false;
636}
637
638bool Parser::Impl::parseNumber()
639{
640 // The lexer returns the number including the quantifier as a
641 // single token value. Here, we split is an check that the number
642 // is not out of range:
643
644 if (!obtainToken() || atEnd()) {
645 return false;
646 }
647
648 if (token() != Lexer::Number) {
649 return false;
650 }
651
652 // number:
653 unsigned long result = 0;
654 int i = 0;
655 const QByteArray s = tokenValue().toLatin1();
656 for (const int len = s.length(); i < len && isdigit(s[i]); ++i) {
657 const unsigned long digitValue = s[i] - '0';
658 if (willOverflowULong(result, digitValue)) {
659 makeError(Error::NumberOutOfRange);
660 return false;
661 } else {
662 result *= 10;
663 result += digitValue;
664 }
665 }
666
667 // optional quantifier:
668 char quantifier = '\0';
669 if (i < s.length()) {
670 assert(i + 1 == s.length());
671 quantifier = s[i];
672 const unsigned long factor = factorForQuantifier(quantifier);
673 if (result > double(ULONG_MAX) / double(factor)) {
674 makeError(Error::NumberOutOfRange);
675 return false;
676 }
677 result *= factor;
678 }
679
680 if (scriptBuilder()) {
681 scriptBuilder()->numberArgument(result, quantifier);
682 }
683 consumeToken();
684 return true;
685}
686} // namespace KSieve
void error(QWidget *parent, const QString &text, const QString &title, const KGuiItem &buttonOk, Options options=Notify)
KIOCORE_EXPORT void add(const QString &fileClass, const QString &directory)
const QList< QKeySequence > & end()
qsizetype length() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 12:01:21 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.