KHtml

htmlprospectivetokenizer.cpp
1 /*
2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3  * (C) 2008 Germain Garand <[email protected]>
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "htmlprospectivetokenizer.h"
28 
29 #include <QTime>
30 #include <QVarLengthArray>
31 
32 #include "html_headimpl.h"
33 #include "html_documentimpl.h"
34 #include "htmlparser.h"
35 #include "dtd.h"
36 
37 #include <misc/loader.h>
38 #include <khtmlview.h>
39 #include <khtml_part.h>
40 #include <xml/dom_docimpl.h>
41 #include <css/csshelper.h>
42 #include <ecma/kjs_proxy.h>
43 #include <ctype.h>
44 #include <assert.h>
45 #include <QVariant>
46 #include <stdlib.h>
47 
48 #include "kentities_p.h"
49 
50 #define PRELOAD_DEBUG 0
51 
52 #define U16_TRAIL(sup) (ushort)(((sup)&0x3ff)|0xdc00)
53 #define U16_LEAD(sup) (ushort)(((sup)>>10)+0xd7c0)
54 
55 using namespace khtml;
56 
57 ProspectiveTokenizer::ProspectiveTokenizer(DOM::DocumentImpl *doc)
58  : m_inProgress(false)
59  , m_tagName(32)
60  , m_attributeName(32)
61  , m_attributeValue(255)
62  , m_cssRule(16)
63  , m_cssRuleValue(255)
64  , m_timeUsed(0)
65  , m_document(doc)
66 {
67 #if PRELOAD_DEBUG
68  qCDebug(KHTML_LOG) << "CREATING PRELOAD SCANNER FOR" << m_document << m_document->URL().toDisplayString();
69 #endif
70 }
71 
72 ProspectiveTokenizer::~ProspectiveTokenizer()
73 {
74 #if PRELOAD_DEBUG
75  fprintf(stderr, "DELETING PRELOAD SCANNER FOR %p\n", m_document);
76  fprintf(stderr, "TOTAL TIME USED %dms\n", m_timeUsed);
77 #endif
78 }
79 
80 void ProspectiveTokenizer::begin()
81 {
82  assert(!m_inProgress);
83  reset();
84  m_inProgress = true;
85 }
86 
87 void ProspectiveTokenizer::end()
88 {
89  assert(m_inProgress);
90  m_inProgress = false;
91 }
92 
93 void ProspectiveTokenizer::reset()
94 {
95  m_source.clear();
96 
97  m_state = Data;
98  m_escape = false;
99  m_contentModel = PCDATA;
100  m_commentPos = 0;
101 
102  m_closeTag = false;
103  m_tagName.clear();
104  m_attributeName.clear();
105  m_attributeValue.clear();
106  m_lastStartTag.clear();
107  m_lastStartTagId = 0;
108 
109  m_urlToLoad = DOMString();
110  m_linkIsStyleSheet = false;
111  m_lastCharacterIndex = 0;
112  clearLastCharacters();
113 
114  m_cssState = CSSInitial;
115  m_cssRule.clear();
116  m_cssRuleValue.clear();
117 }
118 
119 void ProspectiveTokenizer::write(const TokenizerString &source)
120 {
121 #if PRELOAD_DEBUG
122  QTime t;
123  t.start();
124 #endif
125 
126  tokenize(source);
127 
128 #if PRELOAD_DEBUG
129  m_timeUsed += t.elapsed();
130 #endif
131 }
132 
133 static inline bool isWhitespace(const QChar &c)
134 {
135  unsigned short u = c.unicode();
136  if (u > 0x20) {
137  return false;
138  }
139  return u == ' ' || u == '\n' || u == '\r' || u == '\t';
140 }
141 
142 inline void ProspectiveTokenizer::clearLastCharacters()
143 {
144  memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(QChar));
145 }
146 
147 inline void ProspectiveTokenizer::rememberCharacter(QChar c)
148 {
149  m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize;
150  m_lastCharacters[m_lastCharacterIndex] = c;
151 }
152 
153 inline bool ProspectiveTokenizer::lastCharactersMatch(const char *chars, unsigned count) const
154 {
155  unsigned pos = m_lastCharacterIndex;
156  while (count) {
157  if (chars[count - 1] != m_lastCharacters[pos]) {
158  return false;
159  }
160  --count;
161  if (!pos) {
162  pos = lastCharactersBufferSize;
163  }
164  --pos;
165  }
166  return true;
167 }
168 
169 static inline unsigned legalEntityFor(unsigned value)
170 {
171  // FIXME There is a table for more exceptions in the HTML5 specification.
172  if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF)) {
173  return 0xFFFD;
174  }
175  return value;
176 }
177 
178 unsigned ProspectiveTokenizer::consumeEntity(TokenizerString &source, bool &notEnoughCharacters)
179 {
180  enum EntityState {
181  Initial,
182  NumberType,
183  MaybeHex,
184  Hex,
185  Decimal,
186  Named
187  };
188  EntityState entityState = Initial;
189  unsigned result = 0;
190  QVarLengthArray<QChar> seenChars;
191  QVarLengthArray<char> entityName;
192 
193  while (!source.isEmpty()) {
194  seenChars.append(*source);
195  ushort cc = source->unicode();
196  switch (entityState) {
197  case Initial:
198  if (isWhitespace(cc) || cc == '<' || cc == '&') {
199  return 0;
200  } else if (cc == '#') {
201  entityState = NumberType;
202  } else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
203  entityName.append(cc);
204  entityState = Named;
205  } else {
206  return 0;
207  }
208  break;
209  case NumberType:
210  if (cc == 'x' || cc == 'X') {
211  entityState = MaybeHex;
212  } else if (cc >= '0' && cc <= '9') {
213  entityState = Decimal;
214  result = cc - '0';
215  } else {
216  source.push('#');
217  return 0;
218  }
219  break;
220  case MaybeHex:
221  if (cc >= '0' && cc <= '9') {
222  result = cc - '0';
223  } else if (cc >= 'a' && cc <= 'f') {
224  result = 10 + cc - 'a';
225  } else if (cc >= 'A' && cc <= 'F') {
226  result = 10 + cc - 'A';
227  } else {
228  source.push(seenChars[1]);
229  source.push('#');
230  return 0;
231  }
232  entityState = Hex;
233  break;
234  case Hex:
235  if (cc >= '0' && cc <= '9') {
236  result = result * 16 + cc - '0';
237  } else if (cc >= 'a' && cc <= 'f') {
238  result = result * 16 + 10 + cc - 'a';
239  } else if (cc >= 'A' && cc <= 'F') {
240  result = result * 16 + 10 + cc - 'A';
241  } else if (cc == ';') {
242  source.advance();
243  return legalEntityFor(result);
244  } else {
245  return legalEntityFor(result);
246  }
247  break;
248  case Decimal:
249  if (cc >= '0' && cc <= '9') {
250  result = result * 10 + cc - '0';
251  } else if (cc == ';') {
252  source.advance();
253  return legalEntityFor(result);
254  } else {
255  return legalEntityFor(result);
256  }
257  break;
258  case Named:
259  // This is the attribute only version, generic version matches somewhat differently
260  while (entityName.size() <= 8) {
261  if (cc == ';') {
262  int code;
263  const bool found = kde_findEntity(entityName.data(), entityName.size(), &code);
264  if (found) {
265  source.advance();
266  return code;
267  }
268  break;
269  }
270  if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) {
271  int code;
272  const bool found = kde_findEntity(entityName.data(), entityName.size(), &code);
273  if (found) {
274  return code;
275  }
276  break;
277  }
278  entityName.append(cc);
279  source.advance();
280  if (source.isEmpty()) {
281  goto outOfCharacters;
282  }
283  cc = source->unicode();
284  seenChars.append(cc);
285  }
286  if (seenChars.size() == 2) {
287  source.push(seenChars[0]);
288  } else if (seenChars.size() == 3) {
289  source.push(seenChars[1]);
290  source.push(seenChars[0]);
291  } else {
292  source.prepend(TokenizerString(QString(seenChars.data(), seenChars.size() - 1)));
293  }
294  return 0;
295  }
296  source.advance();
297  }
298 outOfCharacters:
299  notEnoughCharacters = true;
300  source.prepend(TokenizerString(QString(seenChars.data(), seenChars.size())));
301  return 0;
302 }
303 
304 void ProspectiveTokenizer::tokenize(const TokenizerString &source)
305 {
306  assert(m_inProgress);
307 
308  m_source.append(source);
309 
310  // This is a simplified HTML5 Tokenizer
311  // https://html.spec.whatwg.org/#tokenization
312  while (!m_source.isEmpty()) {
313  ushort cc = m_source->unicode();
314  switch (m_state) {
315  case Data:
316  while (1) {
317  rememberCharacter(cc);
318  if (cc == '&') {
319  if (m_contentModel == PCDATA || m_contentModel == RCDATA) {
320  m_state = EntityData;
321  break;
322  }
323  } else if (cc == '-') {
324  if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) {
325  if (lastCharactersMatch("<!--", 4)) {
326  m_escape = true;
327  }
328  }
329  } else if (cc == '<') {
330  if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) {
331  m_state = TagOpen;
332  break;
333  }
334  } else if (cc == '>') {
335  if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) {
336  if (lastCharactersMatch("-->", 3)) {
337  m_escape = false;
338  }
339  }
340  }
341  emitCharacter(cc);
342  m_source.advance();
343  if (m_source.isEmpty()) {
344  return;
345  }
346  cc = m_source->unicode();
347  }
348  break;
349  case EntityData:
350  // should try to consume the entity but we only care about entities in attributes
351  m_state = Data;
352  break;
353  case TagOpen:
354  if (m_contentModel == RCDATA || m_contentModel == CDATA) {
355  if (cc == '/') {
356  m_state = CloseTagOpen;
357  } else {
358  m_state = Data;
359  continue;
360  }
361  } else if (m_contentModel == PCDATA) {
362  if (cc == '!') {
363  m_state = MarkupDeclarationOpen;
364  } else if (cc == '/') {
365  m_state = CloseTagOpen;
366  } else if (cc >= 'A' && cc <= 'Z') {
367  m_tagName.clear();
368  m_tagName.append(cc + 0x20);
369  m_closeTag = false;
370  m_state = TagName;
371  } else if (cc >= 'a' && cc <= 'z') {
372  m_tagName.clear();
373  m_tagName.append(cc);
374  m_closeTag = false;
375  m_state = TagName;
376  } else if (cc == '>') {
377  m_state = Data;
378  } else if (cc == '?') {
379  m_state = BogusComment;
380  } else {
381  m_state = Data;
382  continue;
383  }
384  }
385  break;
386  case CloseTagOpen:
387  if (m_contentModel == RCDATA || m_contentModel == CDATA) {
388  if (!m_lastStartTag.size()) {
389  m_state = Data;
390  continue;
391  }
392  if ((unsigned)m_source.length() < m_lastStartTag.size() + 1) {
393  return;
394  }
395  QVector<QChar> tmpString;
396  QChar tmpChar = 0;
397  bool match = true;
398  for (unsigned n = 0; n < m_lastStartTag.size() + 1; n++) {
399  tmpChar = m_source->toLower();
400  if (n < m_lastStartTag.size() && tmpChar != m_lastStartTag[n]) {
401  match = false;
402  }
403  tmpString.append(tmpChar);
404  m_source.advance();
405  }
406  m_source.prepend(TokenizerString(QString(tmpString.data(), tmpString.size())));
407  if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) {
408  m_state = Data;
409  continue;
410  }
411  }
412  if (cc >= 'A' && cc <= 'Z') {
413  m_tagName.clear();
414  m_tagName.append(cc + 0x20);
415  m_closeTag = true;
416  m_state = TagName;
417  } else if (cc >= 'a' && cc <= 'z') {
418  m_tagName.clear();
419  m_tagName.append(cc);
420  m_closeTag = true;
421  m_state = TagName;
422  } else if (cc == '>') {
423  m_state = Data;
424  } else {
425  m_state = BogusComment;
426  }
427  break;
428  case TagName:
429  while (1) {
430  if (isWhitespace(cc)) {
431  m_state = BeforeAttributeName;
432  break;
433  }
434  if (cc == '>') {
435  emitTag();
436  m_state = Data;
437  break;
438  }
439  if (cc == '/') {
440  m_state = BeforeAttributeName;
441  break;
442  }
443  if (cc >= 'A' && cc <= 'Z') {
444  m_tagName.append(cc + 0x20);
445  } else {
446  m_tagName.append(cc);
447  }
448  m_source.advance();
449  if (m_source.isEmpty()) {
450  return;
451  }
452  cc = m_source->unicode();
453  }
454  break;
455  case BeforeAttributeName:
456  if (isWhitespace(cc))
457  ;
458  else if (cc == '>') {
459  emitTag();
460  m_state = Data;
461  } else if (cc >= 'A' && cc <= 'Z') {
462  m_attributeName.clear();
463  m_attributeValue.clear();
464  m_attributeName.append(cc + 0x20);
465  m_state = AttributeName;
466  } else if (cc == '/')
467  ;
468  else {
469  m_attributeName.clear();
470  m_attributeValue.clear();
471  m_attributeName.append(cc);
472  m_state = AttributeName;
473  }
474  break;
475  case AttributeName:
476  while (1) {
477  if (isWhitespace(cc)) {
478  m_state = AfterAttributeName;
479  break;
480  }
481  if (cc == '=') {
482  m_state = BeforeAttributeValue;
483  break;
484  }
485  if (cc == '>') {
486  emitTag();
487  m_state = Data;
488  break;
489  }
490  if (cc == '/') {
491  m_state = BeforeAttributeName;
492  break;
493  }
494  if (cc >= 'A' && cc <= 'Z') {
495  m_attributeName.append(cc + 0x20);
496  } else {
497  m_attributeName.append(cc);
498  }
499  m_source.advance();
500  if (m_source.isEmpty()) {
501  return;
502  }
503  cc = m_source->unicode();
504  }
505  break;
506  case AfterAttributeName:
507  if (isWhitespace(cc))
508  ;
509  else if (cc == '=') {
510  m_state = BeforeAttributeValue;
511  } else if (cc == '>') {
512  emitTag();
513  m_state = Data;
514  } else if (cc >= 'A' && cc <= 'Z') {
515  m_attributeName.clear();
516  m_attributeValue.clear();
517  m_attributeName.append(cc + 0x20);
518  m_state = AttributeName;
519  } else if (cc == '/') {
520  m_state = BeforeAttributeName;
521  } else {
522  m_attributeName.clear();
523  m_attributeValue.clear();
524  m_attributeName.append(cc);
525  m_state = AttributeName;
526  }
527  break;
528  case BeforeAttributeValue:
529  if (isWhitespace(cc))
530  ;
531  else if (cc == '"') {
532  m_state = AttributeValueDoubleQuoted;
533  } else if (cc == '&') {
534  m_state = AttributeValueUnquoted;
535  continue;
536  } else if (cc == '\'') {
537  m_state = AttributeValueSingleQuoted;
538  } else if (cc == '>') {
539  emitTag();
540  m_state = Data;
541  } else {
542  m_attributeValue.append(cc);
543  m_state = AttributeValueUnquoted;
544  }
545  break;
546  case AttributeValueDoubleQuoted:
547  while (1) {
548  if (cc == '"') {
549  processAttribute();
550  m_state = BeforeAttributeName;
551  break;
552  }
553  if (cc == '&') {
554  m_stateBeforeEntityInAttributeValue = m_state;
555  m_state = EntityInAttributeValue;
556  break;
557  }
558  m_attributeValue.append(cc);
559  m_source.advance();
560  if (m_source.isEmpty()) {
561  return;
562  }
563  cc = m_source->unicode();
564  }
565  break;
566  case AttributeValueSingleQuoted:
567  while (1) {
568  if (cc == '\'') {
569  processAttribute();
570  m_state = BeforeAttributeName;
571  break;
572  }
573  if (cc == '&') {
574  m_stateBeforeEntityInAttributeValue = m_state;
575  m_state = EntityInAttributeValue;
576  break;
577  }
578  m_attributeValue.append(cc);
579  m_source.advance();
580  if (m_source.isEmpty()) {
581  return;
582  }
583  cc = m_source->unicode();
584  }
585  break;
586  case AttributeValueUnquoted:
587  while (1) {
588  if (isWhitespace(cc)) {
589  processAttribute();
590  m_state = BeforeAttributeName;
591  break;
592  }
593  if (cc == '&') {
594  m_stateBeforeEntityInAttributeValue = m_state;
595  m_state = EntityInAttributeValue;
596  break;
597  }
598  if (cc == '>') {
599  processAttribute();
600  emitTag();
601  m_state = Data;
602  break;
603  }
604  m_attributeValue.append(cc);
605  m_source.advance();
606  if (m_source.isEmpty()) {
607  return;
608  }
609  cc = m_source->unicode();
610  }
611  break;
612  case EntityInAttributeValue: {
613  bool notEnoughCharacters = false;
614  unsigned entity = consumeEntity(m_source, notEnoughCharacters);
615  if (notEnoughCharacters) {
616  return;
617  }
618  if (entity > 0xFFFF) {
619  m_attributeValue.append(U16_LEAD(entity));
620  m_attributeValue.append(U16_TRAIL(entity));
621  } else if (entity) {
622  m_attributeValue.append(entity);
623  } else {
624  m_attributeValue.append('&');
625  }
626  }
627  m_state = m_stateBeforeEntityInAttributeValue;
628  continue;
629  case BogusComment:
630  while (1) {
631  if (cc == '>') {
632  m_state = Data;
633  break;
634  }
635  m_source.advance();
636  if (m_source.isEmpty()) {
637  return;
638  }
639  cc = m_source->unicode();
640  }
641  break;
642  case MarkupDeclarationOpen: {
643  if (cc == '-') {
644  if (m_source.length() < 2) {
645  return;
646  }
647  m_source.advance();
648  cc = m_source->unicode();
649  if (cc == '-') {
650  m_state = CommentStart;
651  } else {
652  m_state = BogusComment;
653  continue;
654  }
655  // If we cared about the DOCTYPE we would test to enter those states here
656  } else {
657  m_state = BogusComment;
658  continue;
659  }
660  break;
661  }
662  case CommentStart:
663  if (cc == '-') {
664  m_state = CommentStartDash;
665  } else if (cc == '>') {
666  m_state = Data;
667  } else {
668  m_state = Comment;
669  }
670  break;
671  case CommentStartDash:
672  if (cc == '-') {
673  m_state = CommentEnd;
674  } else if (cc == '>') {
675  m_state = Data;
676  } else {
677  m_state = Comment;
678  }
679  break;
680  case Comment:
681  while (1) {
682  if (cc == '-') {
683  m_state = CommentEndDash;
684  break;
685  }
686  m_source.advance();
687  if (m_source.isEmpty()) {
688  return;
689  }
690  cc = m_source->unicode();
691  }
692  break;
693  case CommentEndDash:
694  if (cc == '-') {
695  m_state = CommentEnd;
696  } else {
697  m_state = Comment;
698  }
699  break;
700  case CommentEnd:
701  if (cc == '>') {
702  m_state = Data;
703  } else if (cc == '-')
704  ;
705  else {
706  m_state = Comment;
707  }
708  break;
709  }
710  m_source.advance();
711  }
712 }
713 
714 void ProspectiveTokenizer::processAttribute()
715 {
716  DOMStringImpl tagNameDS(DOMStringImpl::ShallowCopy, m_tagName.data(), m_tagName.size());
717  LocalName tagLocal = LocalName::fromString(&tagNameDS, IDS_NormalizeLower);
718  uint tag = tagLocal.id();
719 
720  switch (tag) {
721  case ID_SCRIPT:
722  case ID_IMAGE:
723  case ID_IMG: {
724  DOMStringImpl attrDS(DOMStringImpl::ShallowCopy, m_attributeName.data(), m_attributeName.size());
725  LocalName attrLocal = LocalName::fromString(&attrDS, IDS_NormalizeLower);
726  uint attribute = attrLocal.id();
727  if (attribute == localNamePart(ATTR_SRC) && m_urlToLoad.isEmpty()) {
728  m_urlToLoad = DOMString(m_attributeValue.data(), m_attributeValue.size()).trimSpaces();
729  }
730  break;
731  }
732  case ID_LINK: {
733  DOMStringImpl attrDS(DOMStringImpl::ShallowCopy, m_attributeName.data(), m_attributeName.size());
734  LocalName attrLocal = LocalName::fromString(&attrDS, IDS_NormalizeLower);
735  uint attribute = attrLocal.id();
736  if (attribute == localNamePart(ATTR_HREF) && m_urlToLoad.isEmpty()) {
737  m_urlToLoad = DOMString(m_attributeValue.data(), m_attributeValue.size()).trimSpaces();
738  } else if (attribute == localNamePart(ATTR_REL)) {
739  DOMStringImpl *lowerAttribute = DOMStringImpl(DOMStringImpl::ShallowCopy, m_attributeValue.data(), m_attributeValue.size()).lower();
740  QString val = lowerAttribute->string();
741  delete lowerAttribute;
742  m_linkIsStyleSheet = val.contains("stylesheet") && !val.contains("alternate") && !val.contains("icon");
743  }
744  }
745  default:
746  break;
747  }
748 }
749 
750 inline void ProspectiveTokenizer::emitCharacter(QChar c)
751 {
752  if (m_contentModel == CDATA && m_lastStartTagId == ID_STYLE) {
753  tokenizeCSS(c);
754  }
755 }
756 
757 inline void ProspectiveTokenizer::tokenizeCSS(QChar c)
758 {
759  // We are just interested in @import rules, no need for real tokenization here
760  // Searching for other types of resources is probably low payoff
761  switch (m_cssState) {
762  case CSSInitial:
763  if (c == '@') {
764  m_cssState = CSSRuleStart;
765  } else if (c == '/') {
766  m_cssState = CSSMaybeComment;
767  }
768  break;
769  case CSSMaybeComment:
770  if (c == '*') {
771  m_cssState = CSSComment;
772  } else {
773  m_cssState = CSSInitial;
774  }
775  break;
776  case CSSComment:
777  if (c == '*') {
778  m_cssState = CSSMaybeCommentEnd;
779  }
780  break;
781  case CSSMaybeCommentEnd:
782  if (c == '/') {
783  m_cssState = CSSInitial;
784  } else if (c == '*')
785  ;
786  else {
787  m_cssState = CSSComment;
788  }
789  break;
790  case CSSRuleStart:
791  if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
792  m_cssRule.clear();
793  m_cssRuleValue.clear();
794  m_cssRule.append(c);
795  m_cssState = CSSRule;
796  } else {
797  m_cssState = CSSInitial;
798  }
799  break;
800  case CSSRule:
801  if (isWhitespace(c)) {
802  m_cssState = CSSAfterRule;
803  } else if (c == ';') {
804  m_cssState = CSSInitial;
805  } else {
806  m_cssRule.append(c);
807  }
808  break;
809  case CSSAfterRule:
810  if (isWhitespace(c))
811  ;
812  else if (c == ';') {
813  m_cssState = CSSInitial;
814  } else {
815  m_cssState = CSSRuleValue;
816  m_cssRuleValue.append(c);
817  }
818  break;
819  case CSSRuleValue:
820  if (isWhitespace(c)) {
821  m_cssState = CSSAferRuleValue;
822  } else if (c == ';') {
823  emitCSSRule();
824  m_cssState = CSSInitial;
825  } else {
826  m_cssRuleValue.append(c);
827  }
828  break;
829  case CSSAferRuleValue:
830  if (isWhitespace(c))
831  ;
832  else if (c == ';') {
833  emitCSSRule();
834  m_cssState = CSSInitial;
835  } else {
836  // FIXME media rules
837  m_cssState = CSSInitial;
838  }
839  break;
840  }
841 }
842 
843 void ProspectiveTokenizer::emitTag()
844 {
845  if (m_closeTag) {
846  m_contentModel = PCDATA;
847  m_cssState = CSSInitial;
848  clearLastCharacters();
849  return;
850  }
851 
852  DOMStringImpl tagNameDS(DOMStringImpl::ShallowCopy, m_tagName.data(), m_tagName.size());
853  LocalName tagLocal = LocalName::fromString(&tagNameDS, IDS_NormalizeLower);
854  uint tag = tagLocal.id();
855  m_lastStartTagId = tag;
856  m_lastStartTag = m_tagName;
857 
858  switch (tag) {
859  case ID_TEXTAREA:
860  case ID_TITLE:
861  m_contentModel = RCDATA;
862  break;
863  case ID_STYLE:
864  case ID_XMP:
865  case ID_SCRIPT:
866  case ID_IFRAME:
867  case ID_NOEMBED:
868  case ID_NOFRAMES:
869  m_contentModel = CDATA;
870  break;
871  case ID_NOSCRIPT:
872  // we wouldn't be here if scripts were disabled
873  m_contentModel = CDATA;
874  break;
875  case ID_PLAINTEXT:
876  m_contentModel = PLAINTEXT;
877  break;
878  default:
879  m_contentModel = PCDATA;
880  }
881 
882  if (m_urlToLoad.isEmpty()) {
883  m_linkIsStyleSheet = false;
884  return;
885  }
886 
887  CachedObject *o = nullptr;
888  if (tag == ID_SCRIPT) {
889  o = m_document->docLoader()->requestScript(m_urlToLoad, m_document->part()->encoding());
890  } else if (tag == ID_IMAGE || tag == ID_IMG) {
891  o = m_document->docLoader()->requestImage(m_urlToLoad);
892  } else if (tag == ID_LINK && m_linkIsStyleSheet) {
893  o = m_document->docLoader()->requestStyleSheet(m_urlToLoad, m_document->part()->encoding());
894  }
895 
896  if (o) {
897  m_document->docLoader()->registerPreload(o);
898  }
899 
900  m_urlToLoad = DOMString();
901  m_linkIsStyleSheet = false;
902 }
903 
904 void ProspectiveTokenizer::emitCSSRule()
905 {
906  QString rule(m_cssRule.data(), m_cssRule.size());
907  if (rule.toLower() == "import" && !m_cssRuleValue.isEmpty()) {
908  DOMString value = DOMString(m_cssRuleValue.data(), m_cssRuleValue.size());
909  DOMString url = parseURL(value);
910  if (!url.isEmpty()) {
911  m_document->docLoader()->registerPreload(m_document->docLoader()->requestStyleSheet(url, m_document->part()->encoding())); // #### charset
912  }
913  }
914  m_cssRule.clear();
915  m_cssRuleValue.clear();
916 }
917 
The CSSNamespaceRule interface represents an.
Definition: css_rule.h:455
void append(const T &value)
void append(const T &t)
This file is part of the HTML rendering engine for KDE.
T * data()
int elapsed() const const
This represents the content of a comment, i.e., all the characters between the starting &#39; <!– &#39; and ...
Definition: dom_text.h:225
This class implements the basic string we use in the DOM.
Definition: dom_string.h:44
KCOREADDONS_EXPORT Result match(QStringView pattern, QStringView str)
ushort unicode() const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
QChar toLower() const const
void reset()
resets the parser
Definition: htmlparser.cpp:205
The CSSRule interface is the abstract base interface for any type of CSS statement ...
Definition: css_rule.h:53
void start()
int size() const const
int size() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sat Oct 16 2021 22:47:55 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.