KHtml

htmlprospectivetokenizer.h
1 /*
2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef HTMLPROSPECTIVETOKENIZER_H
27 #define HTMLPROSPECTIVETOKENIZER_H
28 
29 #include "misc/stringit.h"
30 #include <wtf/Vector.h>
31 
32 namespace DOM
33 {
34 class DocumentImpl;
35 }
36 
37 namespace khtml
38 {
39 
40 class CachedObject;
41 class CachedObjectClient;
42 
43 class ProspectiveTokenizer
44 {
45 public:
46  ProspectiveTokenizer(DOM::DocumentImpl *);
47  ~ProspectiveTokenizer();
48  void begin();
49  void write(const khtml::TokenizerString &);
50  void end();
51  bool inProgress() const
52  {
53  return m_inProgress;
54  }
55 
56  static unsigned consumeEntity(khtml::TokenizerString &, bool &notEnoughCharacters);
57 
58 private:
59  void tokenize(const khtml::TokenizerString &);
60  void reset();
61 
62  void emitTag();
63  void emitCharacter(QChar);
64 
65  void tokenizeCSS(QChar);
66  void emitCSSRule();
67 
68  void processAttribute();
69 
70  void clearLastCharacters();
71  void rememberCharacter(QChar);
72  bool lastCharactersMatch(const char *, unsigned count) const;
73 
74  bool m_inProgress;
75  khtml::TokenizerString m_source;
76 
77  enum State {
78  Data,
79  EntityData,
80  TagOpen,
81  CloseTagOpen,
82  TagName,
83  BeforeAttributeName,
84  AttributeName,
85  AfterAttributeName,
86  BeforeAttributeValue,
87  AttributeValueDoubleQuoted,
88  AttributeValueSingleQuoted,
89  AttributeValueUnquoted,
90  EntityInAttributeValue,
91  BogusComment,
92  MarkupDeclarationOpen,
93  CommentStart,
94  CommentStartDash,
95  Comment,
96  CommentEndDash,
97  CommentEnd
98  };
99  State m_state;
100  bool m_escape;
101  enum ContentModel {
102  PCDATA,
103  RCDATA,
104  CDATA,
105  PLAINTEXT
106  };
107  ContentModel m_contentModel;
108  unsigned m_commentPos;
109  State m_stateBeforeEntityInAttributeValue;
110 
111  static const unsigned lastCharactersBufferSize = 8;
112  QChar m_lastCharacters[lastCharactersBufferSize];
113  unsigned m_lastCharacterIndex;
114 
115  bool m_closeTag;
116  WTF::Vector<QChar, 8> m_tagName;
117  WTF::Vector<QChar, 8> m_attributeName;
118  WTF::Vector<QChar, 32> m_attributeValue;
119  WTF::Vector<QChar, 8> m_lastStartTag;
120  uint m_lastStartTagId;
121 
122  DOM::DOMString m_urlToLoad;
123  bool m_linkIsStyleSheet;
124 
125  enum CSSState {
126  CSSInitial,
127  CSSMaybeComment,
128  CSSComment,
129  CSSMaybeCommentEnd,
130  CSSRuleStart,
131  CSSRule,
132  CSSAfterRule,
133  CSSRuleValue,
134  CSSAferRuleValue
135  };
136  CSSState m_cssState;
137  WTF::Vector<QChar> m_cssRule;
138  WTF::Vector<QChar> m_cssRuleValue;
139 
140  int m_timeUsed;
141 
142  DOM::DocumentImpl *m_document;
143 };
144 
145 }
146 
147 #endif
This file is part of the HTML rendering engine for KDE.
const QList< QKeySequence > & begin()
This represents the content of a comment, i.e., all the characters between the starting &#39; <!– &#39; and ...
Definition: dom_text.h:225
This class implements the basic string we use in the DOM.
Definition: dom_string.h:44
This library provides a full-featured HTML parser and widget.
const QList< QKeySequence > & end()
The CSSRule interface is the abstract base interface for any type of CSS statement ...
Definition: css_rule.h:53
KGuiItem reset()
State
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Tue Oct 26 2021 22:48:03 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.