11 #include <impl/lexer.h> 12 #include <ksieve/lexer.h> 14 #include <impl/utf8validator.h> 15 #include <ksieve/error.h> 18 #include <QStringList> 29 #define STR_DIM(x) (sizeof(x) - 1) 39 Lexer::Lexer(
const char *scursor,
const char *send,
int options)
40 : i(new Impl(scursor, send, options))
50 bool Lexer::ignoreComments()
const 53 return i->ignoreComments();
56 const Error &Lexer::error()
const 62 bool Lexer::atEnd()
const 68 int Lexer::column()
const 74 int Lexer::line()
const 92 Lexer::Token Lexer::nextToken(
QString &result)
95 return i->nextToken(result);
100 static const unsigned char iTextMap[16] = {
121 static const unsigned char delimMap[16] = {
141 static const unsigned char illegalMap[16] = {0xFF, 0x9B, 0xFF, 0xFF, 0x4F, 0x16, 0x00, 0x0F, 0x80, 0x00, 0x00, 0x0A, 0x80, 0x00, 0x00, 0x0A};
143 static inline bool isOfSet(
const unsigned char map[16],
unsigned char ch)
146 return map[ch / 8] & 0x80 >> ch % 8;
149 static inline bool isIText(
unsigned char ch)
151 return ch <=
'z' && isOfSet(iTextMap, ch);
154 static inline bool isDelim(
unsigned char ch)
156 return ch <=
'}' && isOfSet(delimMap, ch);
159 static inline bool isIllegal(
unsigned char ch)
161 return ch >=
'~' || isOfSet(illegalMap, ch);
164 static inline bool is8Bit(
signed char ch)
174 const int e = CRLF ? 2 : LF ? 1 : 0;
192 Lexer::Impl::Impl(
const char *scursor,
const char *send,
int options)
193 : mState(scursor ? scursor : send)
194 , mEnd(send ? send : scursor)
195 , mIgnoreComments(options & IgnoreComments)
196 , mIgnoreLF(options & IgnoreLineFeeds)
198 if (!scursor || !send) {
203 Lexer::Token Lexer::Impl::nextToken(
QString &result)
209 const int oldLine = line();
211 const bool eatingWSSucceeded = ignoreComments() ? eatCWS() : eatWS();
213 if (!ignoreLineFeeds() && oldLine != line()) {
214 result.
setNum(line() - oldLine);
218 if (!eatingWSSucceeded) {
226 switch (*mState.cursor) {
228 assert(!ignoreComments());
231 parseHashComment(result,
true);
235 assert(!ignoreComments());
237 if (atEnd() || *mState.cursor !=
'*') {
238 makeError(Error::SlashWithoutAsterisk);
239 return BracketComment;
243 makeError(Error::UnfinishedBracketComment);
244 return BracketComment;
246 parseBracketComment(result,
true);
247 return BracketComment;
251 makeError(Error::UnexpectedCharacter, line(), column() - 1);
254 if (!isIText(*mState.cursor)) {
255 makeIllegalCharError(*mState.cursor);
262 parseQuotedString(result);
287 if (_strnicmp(mState.cursor,
"text:", STR_DIM(
"text:")) == 0) {
289 mState.cursor += STR_DIM(
"text:");
290 parseMultiLine(result);
293 return MultiLineString;
297 if (!isIText(*mState.cursor)) {
298 makeError(Error::IllegalCharacter);
301 parseIdentifier(result);
306 bool Lexer::Impl::eatWS()
309 switch (*mState.cursor) {
329 bool Lexer::Impl::eatCRLF()
332 assert(*mState.cursor ==
'\n' || *mState.cursor ==
'\r');
334 if (*mState.cursor ==
'\r') {
336 if (atEnd() || *mState.cursor !=
'\n') {
338 makeError(Error::CRWithoutLF);
352 bool Lexer::Impl::parseHashComment(
QString &result,
bool reallySave)
357 assert(*(mState.cursor - 1) ==
'#');
359 const char *
const commentStart = mState.cursor;
363 if (*mState.cursor ==
'\n' || *mState.cursor ==
'\r') {
368 const char *
const commentEnd = mState.cursor - 1;
375 if (atEnd() || eatCRLF()) {
376 const int commentLength = commentEnd - commentStart + 1;
377 if (commentLength > 0) {
378 if (!isValidUtf8(commentStart, commentLength)) {
379 makeError(Error::InvalidUTF8);
396 bool Lexer::Impl::parseBracketComment(
QString &result,
bool reallySave)
401 assert(*(mState.cursor - 2) ==
'/');
402 assert(*(mState.cursor - 1) ==
'*');
404 const char *
const commentStart = mState.cursor;
405 const int commentCol = column() - 2;
406 const int commentLine = line();
412 makeError(Error::UnfinishedBracketComment, commentLine, commentCol);
416 }
while (!atEnd() && *++mState.cursor !=
'/');
419 makeError(Error::UnfinishedBracketComment, commentLine, commentCol);
423 assert(*mState.cursor ==
'/');
425 const int commentLength = mState.cursor - commentStart - 1;
426 if (commentLength > 0) {
427 if (!isValidUtf8(commentStart, commentLength)) {
428 makeError(Error::InvalidUTF8);
441 bool Lexer::Impl::parseComment(
QString &result,
bool reallySave)
445 switch (*mState.cursor) {
448 return parseHashComment(result, reallySave);
450 if (charsLeft() < 2 || mState.cursor[1] !=
'*') {
451 makeError(Error::IllegalCharacter);
455 return parseBracketComment(result, reallySave);
462 bool Lexer::Impl::eatCWS()
467 switch (*mState.cursor) {
481 if (!parseComment(dummy)) {
493 bool Lexer::Impl::parseIdentifier(
QString &result)
497 assert(isIText(*mState.cursor));
499 const char *
const identifierStart = mState.cursor;
502 if (isdigit(*mState.cursor)) {
503 makeError(Error::NoLeadingDigits);
508 for (++mState.cursor; !atEnd() && isIText(*mState.cursor); ++mState.cursor) { }
510 const int identifierLength = mState.cursor - identifierStart;
516 if (atEnd() || isDelim(*mState.cursor)) {
520 makeIllegalCharError(*mState.cursor);
524 bool Lexer::Impl::parseTag(
QString &result)
529 assert(*(mState.cursor - 1) ==
':');
531 assert(isIText(*mState.cursor));
533 return parseIdentifier(result);
536 bool Lexer::Impl::parseNumber(
QString &result)
541 assert(isdigit(*mState.cursor));
543 while (!atEnd() && isdigit(*mState.cursor)) {
547 if (atEnd() || isDelim(*mState.cursor)) {
551 switch (*mState.cursor) {
561 makeIllegalCharError();
566 if (atEnd() || isDelim(*mState.cursor)) {
569 makeIllegalCharError();
573 bool Lexer::Impl::parseMultiLine(
QString &result)
583 assert(_strnicmp(mState.cursor - 5,
"text:", STR_DIM(
"text:")) == 0);
585 const int mlBeginLine = line();
586 const int mlBeginCol = column() - 5;
589 switch (*mState.cursor) {
597 if (!parseHashComment(dummy)) {
609 makeError(Error::NonCWSAfterTextColon);
616 makeError(Error::PrematureEndOfMultiLine, mlBeginLine, mlBeginCol);
623 const char *
const oldBeginOfLine = beginOfLine();
627 const int lineLength = mState.cursor - oldBeginOfLine;
628 if (lineLength > 0) {
629 if (!isValidUtf8(oldBeginOfLine, lineLength)) {
630 makeError(Error::InvalidUTF8);
644 makeError(Error::PrematureEndOfMultiLine, mlBeginLine, mlBeginCol);
648 assert(!lines.
empty());
654 bool Lexer::Impl::parseQuotedString(
QString &result)
659 assert(*(mState.cursor - 1) ==
'"');
661 const int qsBeginCol = column() - 1;
662 const int qsBeginLine = line();
670 switch (*mState.cursor) {
688 if (!is8Bit(*mState.cursor)) {
691 const char *
const eightBitBegin = mState.cursor;
693 const int eightBitLen = mState.cursor - eightBitBegin;
694 assert(eightBitLen > 0);
695 if (isValidUtf8(eightBitBegin, eightBitLen)) {
696 result +=
dec->toUnicode(eightBitBegin, eightBitLen);
698 assert(column() >= eightBitLen);
699 makeError(Error::InvalidUTF8, line(), column() - eightBitLen);
706 makeError(Error::PrematureEndOfQuotedString, qsBeginLine, qsBeginCol);
710 void Lexer::Impl::makeIllegalCharError(
char ch)
712 makeError(isIllegal(ch) ? Error::IllegalCharacter : Error::UnexpectedCharacter);
void push_back(const T &value)
QList::iterator erase(QList::iterator pos)
QString join(const QString &separator) const const
QString & remove(int position, int n)
QString fromUtf8(const char *str, int size)
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const const
void error(QWidget *parent, const QString &text, const QString &caption=QString(), Options options=Notify)
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
QString & replace(int position, int n, QChar after)
QString mid(int position, int n) const const
QTextDecoder * makeDecoder(QTextCodec::ConversionFlags flags) const const
QString & setNum(short n, int base)
QTextCodec * codecForMib(int mib)
QString left(int n) const const
QString fromLatin1(const char *str, int size)
QTextStream & dec(QTextStream &stream)