KTextTemplate

lexer.cpp
1/*
2 This file is part of the KTextTemplate library
3
4 SPDX-FileCopyrightText: 2009, 2010, 2011 Stephen Kelly <steveire@gmail.com>
5
6 SPDX-License-Identifier: LGPL-2.1-or-later
7
8*/
9
10#include "lexer_p.h"
11
12using namespace KTextTemplate;
13
15using TextProcessingTransition = TextProcessingMachine::Transition;
16
17using ChurningState = LexerObject<TextProcessingState, NullTest, MarksClearer>;
18using FinalizeTokenState = LexerObject<TextProcessingState, NullTest, TokenFinalizer>;
19using EofHandler = LexerObject<TextProcessingTransition, NullTest, TokenFinalizer>;
20using EofHandlerWithTrimming = LexerObject<TextProcessingTransition, NullTest, TokenFinalizerWithTrimming>;
21
22using MaybeTemplateSyntaxHandler = CharacterTransition<'{'>;
23
24using TagStartHandler = CharacterTransition<'%', MarkStartSyntax>;
25using CommentStartHandler = CharacterTransition<'#', MarkStartSyntax>;
26using TagEndHandler = CharacterTransition<'%'>;
27using CommentEndHandler = CharacterTransition<'#'>;
28using BeginValueHandler = CharacterTransition<'{', MarkStartSyntax>;
29using MaybeEndValueHandler = CharacterTransition<'}'>;
30using NewlineHandler = CharacterTransition<'\n', MarkNewline>;
31using EndTemplateSyntaxHandler = CharacterTransition<'}', MarkEndSyntax>;
32using NotEndTemplateSyntaxHandler = NegateCharacterTransition<'}'>;
33
34using NotBeginTemplateSyntaxHandler = LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'{'>, OrTest<CharacterTest<'#'>, CharacterTest<'%'>>>>>;
35
36using NotBeginTemplateSyntaxOrNewlineHandler =
37 LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'{'>, OrTest<CharacterTest<'#'>, OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>>;
38
39using NotTagCommentOrNewlineHandler =
40 LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'#'>, OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>;
41
42using NonWhitespaceLineTextHandler = LexerObject<TextProcessingTransition, Negate<OrTest<IsSpace, CharacterTest<'{'>>>>;
43
44using WhitespaceNonNewlineHandler = LexerObject<TextProcessingTransition, AndTest<Negate<CharacterTest<'\n'>>, IsSpace>>;
45
46using FinalizingLineTextHandler = LexerObject<TextProcessingTransition, Negate<OrTest<CharacterTest<'{'>, IsSpace>>, TokenFinalizer>;
47
48using SyntaxBoundaryNewlineHandler = CharacterTransition<'\n', TokenFinalizerWithTrimmingAndNewline>;
49using SyntaxBoundaryHandler = CharacterTransition<'{', FinalizeAndMarkStartSyntax>;
50
51template<typename Transition>
52void addTransition(TextProcessingState *source, Lexer *lexer, TextProcessingState *target)
53{
54 auto tr = new Transition(lexer, source);
55 tr->setTargetState(target);
56}
57
58TextProcessingMachine *createMachine(Lexer *lexer, Lexer::TrimType type)
59{
60 auto machine = new TextProcessingMachine;
61
62 auto notFinished = new TextProcessingState(machine);
63 auto finished = new TextProcessingState(machine);
64 machine->setInitialState(notFinished);
65
66 auto processingText = new ChurningState(lexer, notFinished);
67 auto processingPostNewline = new TextProcessingState(notFinished);
68 auto processingBeginTemplateSyntax = new TextProcessingState(notFinished);
69 auto processingTag = new TextProcessingState(notFinished);
70 auto processingComment = new TextProcessingState(notFinished);
71 auto processingValue = new TextProcessingState(notFinished);
72 auto maybeProcessingValue = new TextProcessingState(notFinished);
73 auto processingEndTag = new TextProcessingState(notFinished);
74 auto processingEndComment = new TextProcessingState(notFinished);
75 auto processingEndValue = new TextProcessingState(notFinished);
76 TextProcessingState *processingPostTemplateSyntax;
77
78 if (type == Lexer::SmartTrim)
79 processingPostTemplateSyntax = new TextProcessingState(notFinished);
80 else
81 processingPostTemplateSyntax = new FinalizeTokenState(lexer, notFinished);
82 auto processingPostTemplateSyntaxWhitespace = new TextProcessingState(notFinished);
83
84 if (type == Lexer::SmartTrim)
85 notFinished->setInitialState(processingPostNewline);
86 else
87 notFinished->setInitialState(processingText);
88
89 if (type == Lexer::SmartTrim) {
90 addTransition<NewlineHandler>(processingText, lexer, processingPostNewline);
91
92 addTransition<NewlineHandler>(processingPostNewline, lexer, processingPostNewline);
93 addTransition<MaybeTemplateSyntaxHandler>(processingPostNewline, lexer, processingBeginTemplateSyntax);
94 addTransition<NonWhitespaceLineTextHandler>(processingPostNewline, lexer, processingText);
95 }
96 addTransition<MaybeTemplateSyntaxHandler>(processingText, lexer, processingBeginTemplateSyntax);
97
98 addTransition<TagStartHandler>(processingBeginTemplateSyntax, lexer, processingTag);
99 addTransition<CommentStartHandler>(processingBeginTemplateSyntax, lexer, processingComment);
100 addTransition<BeginValueHandler>(processingBeginTemplateSyntax, lexer, maybeProcessingValue);
101
102 if (type == Lexer::SmartTrim) {
103 addTransition<NotBeginTemplateSyntaxOrNewlineHandler>(processingBeginTemplateSyntax, lexer, processingText);
104 addTransition<NewlineHandler>(processingBeginTemplateSyntax, lexer, processingPostNewline);
105 } else {
106 addTransition<NotBeginTemplateSyntaxHandler>(processingBeginTemplateSyntax, lexer, processingText);
107 }
108
109 addTransition<NewlineHandler>(processingTag, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText);
110 addTransition<TagEndHandler>(processingTag, lexer, processingEndTag);
111
112 addTransition<NewlineHandler>(processingComment, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText);
113 addTransition<CommentEndHandler>(processingComment, lexer, processingEndComment);
114
115 addTransition<TagStartHandler>(maybeProcessingValue, lexer, processingTag);
116 addTransition<CommentStartHandler>(maybeProcessingValue, lexer, processingComment);
117 addTransition<NotTagCommentOrNewlineHandler>(maybeProcessingValue, lexer, processingValue);
118 addTransition<NewlineHandler>(maybeProcessingValue, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText);
119
120 addTransition<NewlineHandler>(processingValue, lexer, type == Lexer::SmartTrim ? processingPostNewline : processingText);
121 addTransition<MaybeEndValueHandler>(processingValue, lexer, processingEndValue);
122
123 addTransition<NewlineHandler>(processingEndTag, lexer, processingPostNewline);
124 addTransition<NotEndTemplateSyntaxHandler>(processingEndTag, lexer, processingTag);
125 addTransition<EndTemplateSyntaxHandler>(processingEndTag, lexer, processingPostTemplateSyntax);
126
127 addTransition<NewlineHandler>(processingEndComment, lexer, processingPostNewline);
128 addTransition<NotEndTemplateSyntaxHandler>(processingEndComment, lexer, processingComment);
129 addTransition<EndTemplateSyntaxHandler>(processingEndComment, lexer, processingPostTemplateSyntax);
130
131 addTransition<NewlineHandler>(processingEndValue, lexer, processingPostNewline);
132 addTransition<NotEndTemplateSyntaxHandler>(processingEndValue, lexer, processingValue);
133 addTransition<EndTemplateSyntaxHandler>(processingEndValue, lexer, processingPostTemplateSyntax);
134
135 if (type != Lexer::SmartTrim) {
136 processingPostTemplateSyntax->setUnconditionalTransition(processingText);
137 } else {
138 addTransition<SyntaxBoundaryNewlineHandler>(processingPostTemplateSyntax, lexer, processingPostNewline);
139 addTransition<WhitespaceNonNewlineHandler>(processingPostTemplateSyntax, lexer, processingPostTemplateSyntaxWhitespace);
140 addTransition<FinalizingLineTextHandler>(processingPostTemplateSyntax, lexer, processingText);
141 addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntax, lexer, processingBeginTemplateSyntax);
142
143 // NOTE: We only have to transition to this if there was whitespace
144 // before the opening tag. Maybe store that in an external state property?
145 // Actually, this may be a bug if we try to finalize with trimming and
146 // there is no leading whitespace.
147 addTransition<SyntaxBoundaryNewlineHandler>(processingPostTemplateSyntaxWhitespace, lexer, processingPostNewline);
148 addTransition<FinalizingLineTextHandler>(processingPostTemplateSyntaxWhitespace, lexer, processingText);
149 addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntaxWhitespace, lexer, processingBeginTemplateSyntax);
150 }
151
152 {
153 auto handler = new EofHandler(lexer, notFinished);
154 handler->setTargetState(finished);
155 notFinished->setEndTransition(handler);
156 }
157
158 if (type == Lexer::SmartTrim) {
159 {
160 auto handler = new EofHandlerWithTrimming(lexer, processingPostTemplateSyntaxWhitespace);
161 handler->setTargetState(finished);
162 processingPostTemplateSyntaxWhitespace->setEndTransition(handler);
163 }
164 {
165 auto handler = new EofHandlerWithTrimming(lexer, processingPostTemplateSyntax);
166 handler->setTargetState(finished);
167 processingPostTemplateSyntax->setEndTransition(handler);
168 }
169 }
170 return machine;
171}
172
173Lexer::Lexer(const QString &templateString)
174 : m_templateString(templateString)
175{
176}
177
178Lexer::~Lexer() = default;
179
180void Lexer::clearMarkers()
181{
182 m_startSyntaxPosition = -1;
183 m_endSyntaxPosition = -1;
184 m_newlinePosition = -1;
185}
186
187void Lexer::reset()
188{
189 m_tokenList.clear();
190 m_lineCount = 0;
191 m_upto = 0;
192 m_processedUpto = 0;
193 clearMarkers();
194}
195
196QList<Token> Lexer::tokenize(TrimType type)
197{
198 auto machine = createMachine(this, type);
199
200 machine->start();
201
202 auto it = m_templateString.constBegin();
203 const auto end = m_templateString.constEnd();
204
205 reset();
206 for (; it != end; ++it, ++m_upto)
207 machine->processCharacter(it);
208
209 machine->finished();
210
211 machine->stop();
212
213 delete machine;
214
215 return m_tokenList;
216}
217
218void Lexer::markStartSyntax()
219{
220 m_startSyntaxPosition = m_upto;
221}
222
223void Lexer::markEndSyntax()
224{
225 m_endSyntaxPosition = m_upto + 1;
226}
227
228void Lexer::markNewline()
229{
230 m_newlinePosition = m_upto;
231 ++m_lineCount;
232}
233
234void Lexer::finalizeToken()
235{
236 auto nextPosition = m_upto;
237 const auto validSyntax = m_endSyntaxPosition > m_startSyntaxPosition && (m_startSyntaxPosition >= m_processedUpto);
238
239 if (validSyntax) {
240 Q_ASSERT(m_startSyntaxPosition >= 0);
241 nextPosition = m_startSyntaxPosition - 1;
242 }
243 finalizeToken(nextPosition, validSyntax);
244}
245
246void Lexer::finalizeTokenWithTrimmedWhitespace()
247{
248 auto nextPosition = m_upto;
249 // We know this to be true because the state machine has already guaranteed
250 // it. This method is only called from transition and state actions which
251 // occur after valid syntax.
252 // TODO Investigate performance and other implications of changing the state
253 // machine to assure similar in finalizeToken()
254 Q_ASSERT(m_endSyntaxPosition > m_startSyntaxPosition);
255
256 Q_ASSERT(m_startSyntaxPosition >= 0);
257 if (m_newlinePosition >= 0 && m_newlinePosition >= m_processedUpto)
258 nextPosition = qMin(m_startSyntaxPosition - 1, m_newlinePosition);
259 else
260 nextPosition = m_startSyntaxPosition - 1;
261 finalizeToken(nextPosition, true);
262}
263
264void Lexer::finalizeToken(int nextPosition, bool processSyntax)
265{
266 {
267 Token token;
268 token.content = m_templateString.mid(m_processedUpto, nextPosition - m_processedUpto);
269 token.tokenType = TextToken;
270 token.linenumber = m_lineCount;
271 m_tokenList.append(token);
272 }
273
274 m_processedUpto = nextPosition;
275
276 if (!processSyntax)
277 return;
278
279 m_processedUpto = m_endSyntaxPosition;
280
281 const auto differentiator = m_templateString.at(m_startSyntaxPosition);
282 if (differentiator == QLatin1Char('#'))
283 return;
284
285 Token syntaxToken;
286 syntaxToken.content = m_templateString.mid(m_startSyntaxPosition + 1, m_endSyntaxPosition - m_startSyntaxPosition - 3).trimmed();
287 syntaxToken.linenumber = m_lineCount;
288
289 if (differentiator == QLatin1Char('{')) {
290 syntaxToken.tokenType = VariableToken;
291 } else {
292 Q_ASSERT(differentiator == QLatin1Char('%'));
293 syntaxToken.tokenType = BlockToken;
294 }
295 m_tokenList.append(syntaxToken);
296}
QAction * end(const QObject *recvr, const char *slot, QObject *parent)
KGuiItem reset()
The KTextTemplate namespace holds all public KTextTemplate API.
Definition Mainpage.dox:8
@ TextToken
The Token is a text fragment.
Definition token.h:24
@ VariableToken
The Token is a variable node.
Definition token.h:25
@ BlockToken
The Token is a block, ie, part of a tag.
Definition token.h:26
std::pair< long long int, long long int > nextPosition(const MdBlock< Trait > &fr, long long int pos, long long int line)
QString mid(qsizetype position, qsizetype n) const const
QString trimmed() const const
A token in a parse stream for a template.
Definition token.h:37
int tokenType
The Type of this Token.
Definition token.h:38
QString content
The content of this Token.
Definition token.h:40
int linenumber
The line number this Token starts at.
Definition token.h:39
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Fri Nov 29 2024 11:58:54 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.