KHtml

htmlparser.cpp
1 /*
2  This file is part of the KDE libraries
3 
4  Copyright (C) 1997 Martin Jones ([email protected])
5  (C) 1997 Torben Weis ([email protected])
6  (C) 1999,2001 Lars Knoll ([email protected])
7  (C) 2000,2001 Dirk Mueller ([email protected])
8  (C) 2003 Apple Computer, Inc.
9 
10  This library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU Library General Public
12  License as published by the Free Software Foundation; either
13  version 2 of the License, or (at your option) any later version.
14 
15  This library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  Library General Public License for more details.
19 
20  You should have received a copy of the GNU Library General Public License
21  along with this library; see the file COPYING.LIB. If not, write to
22  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23  Boston, MA 02110-1301, USA.
24 */
25 //----------------------------------------------------------------------------
26 //
27 // KDE HTML Widget -- HTML Parser
28 // #define PARSER_DEBUG
29 
30 #include "htmlparser.h"
31 
32 #include <dom/dom_exception.h>
33 
34 #include <html/html_baseimpl.h>
35 #include <html/html_blockimpl.h>
36 #include <html/html_canvasimpl.h>
37 #include <html/html_documentimpl.h>
38 #include <html/html_elementimpl.h>
39 #include <html/html_formimpl.h>
40 #include <html/html_headimpl.h>
41 #include <html/html_imageimpl.h>
42 #include <html/html_inlineimpl.h>
43 #include <html/html_listimpl.h>
44 #include <html/html_miscimpl.h>
45 #include <html/html_tableimpl.h>
46 #include <html/html_objectimpl.h>
47 #include <html/HTMLAudioElement.h>
48 #include <html/HTMLVideoElement.h>
49 #include <html/HTMLSourceElement.h>
50 #include <xml/dom_textimpl.h>
51 #include <xml/dom_nodeimpl.h>
52 #include <html/htmltokenizer.h>
53 #include <khtmlview.h>
54 #include <khtml_part.h>
55 #include <khtml_global.h>
56 #include <css/cssproperties.h>
57 #include <css/cssvalues.h>
58 
59 #include <rendering/render_object.h>
60 
61 #include "khtml_debug.h"
62 #include <klocalizedstring.h>
63 
64 // Turn off gnu90 inlining to avoid linker errors
65 #undef __GNUC_STDC_INLINE__
66 #undef __GNUC_GNU_INLINE__
67 #include <doctypes.h>
68 
69 #undef OPTIONAL // for win32, MinGW
70 
71 using namespace DOM;
72 using namespace khtml;
73 
74 #ifdef PARSER_DEBUG
75 static QString getParserPrintableName(int id)
76 {
77  if (id >= ID_CLOSE_TAG) {
78  return "/" + getPrintableName(id - ID_CLOSE_TAG);
79  } else {
80  return getPrintableName(id);
81  }
82 }
83 #endif
84 
85 //----------------------------------------------------------------------------
86 
87 /**
88  * @internal
89  */
90 class HTMLStackElem
91 {
92 public:
93  HTMLStackElem(int _id,
94  int _level,
95  DOM::NodeImpl *_node,
96  bool _inline_,
97  HTMLStackElem *_next)
98  :
99  id(_id),
100  level(_level),
101  strayTableContent(false),
102  m_inline(_inline_),
103  node(_node),
104  next(_next)
105  {
106  node->ref();
107  }
108 
109  ~HTMLStackElem()
110  {
111  node->deref();
112  }
113 
114  void setNode(NodeImpl *newNode)
115  {
116  newNode->ref();
117  node->deref();
118  node = newNode;
119  }
120 
121  int id;
122  int level;
123  bool strayTableContent;
124  bool m_inline;
125  NodeImpl *node;
126  HTMLStackElem *next;
127 };
128 
129 /**
130  * @internal
131  *
132  * The parser parses tokenized input into the document, building up the
133  * document tree. If the document is wellformed, parsing it is
134  * straightforward.
135  * Unfortunately, people can't write wellformed HTML documents, so the parser
136  * has to be tolerant about errors.
137  *
138  * We have to take care of the following error conditions:
139  * 1. The element being added is explicitly forbidden inside some outer tag.
140  * In this case we should close all tags up to the one, which forbids
141  * the element, and add it afterwards.
142  * 2. We are not allowed to add the element directly. It could be, that
143  * the person writing the document forgot some tag inbetween (or that the
144  * tag inbetween is optional...) This could be the case with the following
145  * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?)
146  * 3. We wan't to add a block element inside to an inline element. Close all
147  * inline elements up to the next higher block element.
148  * 4. If this doesn't help close elements, until we are allowed to add the
149  * element or ignore the tag.
150  *
151  */
152 
153 KHTMLParser::KHTMLParser(KHTMLView *_parent, DocumentImpl *doc)
154 {
155  //qCDebug(KHTML_LOG) << "parser constructor";
156 #if SPEED_DEBUG > 0
157  qt.start();
158 #endif
159 
160  HTMLWidget = _parent;
161  document = doc;
162 
163  blockStack = nullptr;
164  current = nullptr;
165 
166  // ID_CLOSE_TAG == Num of tags
167  forbiddenTag = new ushort[ID_CLOSE_TAG + 1];
168 
169  reset();
170 }
171 
172 KHTMLParser::KHTMLParser(DOM::DocumentFragmentImpl *i, DocumentImpl *doc)
173 {
174  HTMLWidget = nullptr;
175  document = doc;
176 
177  forbiddenTag = new ushort[ID_CLOSE_TAG + 1];
178 
179  blockStack = nullptr;
180  current = nullptr;
181 
182  reset();
183 
184  setCurrent(i);
185 
186  inBody = true;
187 }
188 
189 KHTMLParser::~KHTMLParser()
190 {
191 #if SPEED_DEBUG > 0
192  qCDebug(KHTML_LOG) << "TIME: parsing time was = " << qt.elapsed();
193 #endif
194 
195  freeBlock();
196 
197  if (current) {
198  current->deref();
199  }
200 
201  delete [] forbiddenTag;
202  delete isindex;
203 }
204 
205 void KHTMLParser::reset()
206 {
207  setCurrent(document);
208 
209  freeBlock();
210 
211  // before parsing no tags are forbidden...
212  memset(forbiddenTag, 0, (ID_CLOSE_TAG + 1)*sizeof(ushort));
213 
214  inBody = false;
215  haveFrameSet = false;
216  haveContent = false;
217  haveBody = false;
218  haveTitle = false;
219  inSelect = false;
220  inStrayTableContent = 0;
221  m_inline = false;
222 
223  form = nullptr;
224  map = nullptr;
225  end = false;
226  isindex = nullptr;
227 
228  discard_until = 0;
229 }
230 
231 void KHTMLParser::parseToken(Token *t)
232 {
233  if (t->tid > 2 * ID_CLOSE_TAG) {
234  // qCDebug(KHTML_LOG) << "Unknown tag!! tagID = " << t->tid;
235  return;
236  }
237  if (discard_until) {
238  if (t->tid == discard_until) {
239  discard_until = 0;
240  }
241 
242  // do not skip </iframe>
243  if (discard_until || current->id() + ID_CLOSE_TAG != t->tid) {
244  return;
245  }
246  }
247 
248 #ifdef PARSER_DEBUG
249  qCDebug(KHTML_LOG) << "\n\n==> parser: processing token " << getParserPrintableName(t->tid) << "(" << t->tid << ")"
250  << " current = " << getParserPrintableName(current->id()) << "(" << current->id() << ")";
251  qCDebug(KHTML_LOG) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent;
252 #endif
253 
254  // holy shit. apparently some sites use </br> instead of <br>
255  // be compatible with IE and NS
256  if (t->tid == ID_BR + ID_CLOSE_TAG && document->inCompatMode()) {
257  t->tid -= ID_CLOSE_TAG;
258  }
259 
260  if (t->tid > ID_CLOSE_TAG) {
261  processCloseTag(t);
262  return;
263  }
264 
265  // ignore spaces, if we're not inside a paragraph or other inline code
266  if (t->tid == ID_TEXT && t->text) {
267  if (inBody && !skipMode() &&
268  current->id() != ID_STYLE && current->id() != ID_TITLE &&
269  current->id() != ID_SCRIPT &&
270  !t->text->containsOnlyWhitespace()) {
271  haveContent = true;
272  }
273 #ifdef PARSER_DEBUG
274 
275  qCDebug(KHTML_LOG) << "length=" << t->text->l << " text='" << QString::fromRawData(t->text->s, t->text->l) << "'";
276 #endif
277  }
278 
279  NodeImpl *n = getElement(t);
280  // just to be sure, and to catch currently unimplemented stuff
281  if (!n) {
282  return;
283  }
284 
285  // set attributes
286  if (n->isElementNode() && t->tid != ID_ISINDEX) {
287  ElementImpl *e = static_cast<ElementImpl *>(n);
288  e->setAttributeMap(t->attrs);
289  }
290 
291  // if this tag is forbidden inside the current context, pop
292  // blocks until we are allowed to add it...
293  while (blockStack && forbiddenTag[t->tid]) {
294 #ifdef PARSER_DEBUG
295  qCDebug(KHTML_LOG) << "t->id: " << t->tid << " is forbidden :-( ";
296 #endif
297  popOneBlock();
298  }
299 
300  // sometimes flat doesn't make sense
301  switch (t->tid) {
302  case ID_SELECT:
303  case ID_OPTION:
304  t->flat = false;
305  }
306 
307  // the tokenizer needs the feedback for space discarding
308  if (tagPriority(t->tid) == 0) {
309  t->flat = true;
310  }
311 
312  if (!insertNode(n, t->flat)) {
313  // we couldn't insert the node...
314 #ifdef PARSER_DEBUG
315  qCDebug(KHTML_LOG) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!";
316 #endif
317  if (map == n) {
318 #ifdef PARSER_DEBUG
319  qCDebug(KHTML_LOG) << " --> resetting map!";
320 #endif
321  map = nullptr;
322  }
323  if (form == n) {
324 #ifdef PARSER_DEBUG
325  qCDebug(KHTML_LOG) << " --> resetting form!";
326 #endif
327  form = nullptr;
328  }
329  delete n;
330  }
331 }
332 
333 void KHTMLParser::parseDoctypeToken(DoctypeToken *t)
334 {
335  // Ignore any doctype after the first. TODO It should be also ignored when processing DocumentFragment
336  if (current != document || document->doctype()) {
337  return;
338  }
339 
340  DocumentTypeImpl *doctype = new DocumentTypeImpl(document->implementation(), document, t->name, t->publicID, t->systemID);
341  if (!t->internalSubset.isEmpty()) {
342  doctype->setInternalSubset(t->internalSubset);
343  }
344  document->addChild(doctype);
345 
346  // Determine parse mode here
347  // This code more or less mimics Mozilla's implementation.
348  //
349  // There are three possible parse modes:
350  // COMPAT - quirks mode emulates WinIE
351  // and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
352  // be omitted from numbers.
353  // ALMOST STRICT - This mode is identical to strict mode
354  // except for its treatment of line-height in the inline box model. For
355  // now (until the inline box model is re-written), this mode is identical
356  // to STANDARDS mode.
357  // STRICT - no quirks apply. Web pages will obey the specifications to
358  // the letter.
359 
360  if (!document->isHTMLDocument()) { // FIXME Could document be non-HTML?
361  return;
362  }
363  DOM::HTMLDocumentImpl *htmldoc = static_cast<DOM::HTMLDocumentImpl *>(document);
364  if (t->name.toLower() == "html") {
365  if (!t->internalSubset.isEmpty() || t->publicID.isEmpty()) {
366  // Internal subsets always denote full standards, as does
367  // a doctype without a public ID.
368  htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
369  } else {
370  // We have to check a list of public IDs to see what we
371  // should do.
372  QString lowerPubID = t->publicID.toLower();
373  QByteArray pubIDStr = lowerPubID.toLocal8Bit();
374 
375  // Look up the entry in our gperf-generated table.
376  const PubIDInfo *doctypeEntry = Perfect_Hash::findDoctypeEntry(pubIDStr.constData(), t->publicID.length());
377  if (!doctypeEntry) {
378  // The DOCTYPE is not in the list. Assume strict mode.
379  // ### Doesn't make any sense, but it's what Mozilla does.
380  htmldoc->changeModes(DOM::DocumentImpl::Strict, DOM::DocumentImpl::Html4);
381  } else {
382  switch ((!t->systemID.isEmpty()) ?
383  doctypeEntry->mode_if_sysid :
384  doctypeEntry->mode_if_no_sysid) {
385  case PubIDInfo::eQuirks3:
386  htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
387  break;
388  case PubIDInfo::eQuirks:
389  htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html4);
390  break;
391  case PubIDInfo::eAlmostStandards:
392  htmldoc->changeModes(DOM::DocumentImpl::Transitional, DOM::DocumentImpl::Html4);
393  break;
394  default:
395  assert(!"Unknown parse mode");
396  }
397  }
398  }
399  } else {
400  // Malformed doctype implies quirks mode.
401  htmldoc->changeModes(DOM::DocumentImpl::Compat, DOM::DocumentImpl::Html3);
402  }
403 }
404 
405 static bool isTableRelatedTag(int id)
406 {
407  return (id == ID_TR || id == ID_TD || id == ID_TABLE || id == ID_TBODY || id == ID_TFOOT || id == ID_THEAD ||
408  id == ID_TH);
409 }
410 
411 bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
412 {
413  int id = n->id();
414 
415  // <table> is never allowed inside stray table content. Always pop out of the stray table content
416  // and close up the first table, and then start the second table as a sibling.
417  if (inStrayTableContent && id == ID_TABLE) {
418  popBlock(ID_TABLE);
419  }
420 
421  // let's be stupid and just try to insert it.
422  // this should work if the document is wellformed
423 #ifdef PARSER_DEBUG
424  NodeImpl *tmp = current;
425 #endif
426  NodeImpl *newNode = current->addChild(n);
427  if (newNode) {
428 #ifdef PARSER_DEBUG
429  qCDebug(KHTML_LOG) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string();
430 #endif
431  // We allow TABLE > FORM in dtd.cpp, but do not allow the form have children in this case
432  if (current->id() == ID_TABLE && id == ID_FORM) {
433  flat = true;
434  static_cast<HTMLFormElementImpl *>(n)->setMalformed(true);
435  }
436 
437  // don't push elements without end tag on the stack
438  if (tagPriority(id) != 0 && !flat) {
439 #if SPEED_DEBUG < 2
440  if (!n->attached() && HTMLWidget) {
441  n->attach();
442  }
443 #endif
444  if (n->isInline()) {
445  m_inline = true;
446  }
447  pushBlock(id, tagPriority(id));
448  setCurrent(newNode);
449  } else {
450 #if SPEED_DEBUG < 2
451  if (!n->attached() && HTMLWidget) {
452  n->attach();
453  }
454  if (n->maintainsState()) {
455  document->registerMaintainsState(n);
456  document->attemptRestoreState(n);
457  }
458  n->close();
459 #endif
460  if (n->isInline()) {
461  m_inline = true;
462  }
463  }
464 
465 #if SPEED_DEBUG < 1
466  if (tagPriority(id) == 0 && n->renderer()) {
467  n->renderer()->calcMinMaxWidth();
468  }
469 #endif
470  return true;
471  } else {
472 #ifdef PARSER_DEBUG
473  qCDebug(KHTML_LOG) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string();
474 #endif
475  // error handling...
476  HTMLElementImpl *e;
477  bool handled = false;
478 
479  // first switch on current element for elements with optional end-tag and inline-only content
480  switch (current->id()) {
481  case ID_P:
482  case ID_DT:
483  if (!n->isInline()) {
484  popBlock(current->id());
485  return insertNode(n);
486  }
487  break;
488  case ID_TITLE:
489  popBlock(current->id());
490  return insertNode(n);
491  default:
492  break;
493  }
494 
495  // switch according to the element to insert
496  switch (id) {
497  case ID_TR:
498  case ID_TH:
499  case ID_TD:
500  if (inStrayTableContent && !isTableRelatedTag(current->id())) {
501  // pop out to the nearest enclosing table-related tag.
502  while (blockStack && !isTableRelatedTag(current->id())) {
503  popOneBlock();
504  }
505  return insertNode(n);
506  }
507  break;
508  case ID_HEAD:
509  // ### allow not having <HTML> in at all, as per HTML spec
510  if (!current->isDocumentNode() && current->id() != ID_HTML) {
511  return false;
512  }
513  break;
514  case ID_COMMENT:
515  if (head) {
516  break;
517  }
518  case ID_META:
519  case ID_LINK:
520  case ID_ISINDEX:
521  case ID_BASE:
522  if (!head) {
523  createHead();
524  }
525  if (head) {
526  if (head->addChild(n)) {
527 #if SPEED_DEBUG < 2
528  if (!n->attached() && HTMLWidget) {
529  n->attach();
530  }
531 #endif
532  }
533 
534  return true;
535  }
536 
537  break;
538  case ID_HTML:
539  if (!current->isDocumentNode()) {
540  if (doc()->documentElement()->id() == ID_HTML) {
541  // we have another <HTML> element.... apply attributes to existing one
542  // make sure we don't overwrite already existing attributes
543  NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true);
544  NamedAttrMapImpl *bmap = static_cast<ElementImpl *>(doc()->documentElement())->attributes(false);
545  bool changed = false;
546  for (unsigned long l = 0; map && l < map->length(); ++l) {
547  NodeImpl::Id attrId = map->idAt(l);
548  DOMStringImpl *attrValue = map->valueAt(l);
549  changed = !bmap->getValue(attrId);
550  bmap->setValue(attrId, attrValue);
551  }
552  if (changed) {
553  doc()->recalcStyle(NodeImpl::Inherit);
554  }
555  }
556  return false;
557  }
558  break;
559  case ID_TITLE:
560  case ID_STYLE:
561  if (!head) {
562  createHead();
563  }
564  if (head) {
565  DOM::NodeImpl *newNode = head->addChild(n);
566  if (newNode) {
567  pushBlock(id, tagPriority(id));
568  setCurrent(newNode);
569 #if SPEED_DEBUG < 2
570  if (!n->attached() && HTMLWidget) {
571  n->attach();
572  }
573 #endif
574  } else {
575 #ifdef PARSER_DEBUG
576  qCDebug(KHTML_LOG) << "adding style before to body failed!!!!";
577 #endif
578  discard_until = ID_STYLE + ID_CLOSE_TAG;
579  return false;
580  }
581  return true;
582  } else if (inBody) {
583  discard_until = id + ID_CLOSE_TAG;
584  return false;
585  }
586  break;
587  case ID_SCRIPT:
588  // if we failed to insert it, go into skip mode
589  discard_until = id + ID_CLOSE_TAG;
590  break;
591  case ID_BODY:
592  if (inBody && doc()->body()) {
593  // we have another <BODY> element.... apply attributes to existing one
594  // make sure we don't overwrite already existing attributes
595  // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
596  NamedAttrMapImpl *map = static_cast<ElementImpl *>(n)->attributes(true);
597  NamedAttrMapImpl *bmap = doc()->body()->attributes(false);
598  bool changed = false;
599  for (unsigned long l = 0; map && l < map->length(); ++l) {
600  NodeImpl::Id attrId = map->idAt(l);
601  DOMStringImpl *attrValue = map->valueAt(l);
602  if (!bmap->getValue(attrId)) {
603  bmap->setValue(attrId, attrValue);
604  changed = true;
605  }
606  }
607  if (changed) {
608  doc()->recalcStyle(NodeImpl::Inherit);
609  }
610  } else if (current->isDocumentNode()) {
611  break;
612  }
613  return false;
614  break;
615 
616  // the following is a hack to move non rendered elements
617  // outside of tables.
618  // needed for broken constructs like <table><form ...><tr>....
619  case ID_INPUT: {
620  ElementImpl *e = static_cast<ElementImpl *>(n);
621  DOMString type = e->getAttribute(ATTR_TYPE);
622 
623  if (strcasecmp(type, "hidden") != 0) {
624  break;
625  }
626  // Fall through!
627  }
628  case ID_TEXT: {
629  // Don't try to fit random white-space anywhere
630  TextImpl *t = static_cast<TextImpl *>(n);
631  if (t->containsOnlyWhitespace()) {
632  return false;
633  }
634  // ignore text inside the following elements.
635  switch (current->id()) {
636  case ID_SELECT:
637  return false;
638  default:
639  ;
640  // fall through!!
641  };
642  break;
643  }
644  case ID_DL:
645  popBlock(ID_DT);
646  if (current->id() == ID_DL) {
647  e = new HTMLGenericElementImpl(document, ID_DD);
648  insertNode(e);
649  handled = true;
650  }
651  break;
652  case ID_DT:
653  e = new HTMLDListElementImpl(document);
654  if (insertNode(e)) {
655  insertNode(n);
656  return true;
657  }
658  break;
659  case ID_AREA: {
660  if (map) {
661  map->addChild(n);
662 #if SPEED_DEBUG < 2
663  if (!n->attached() && HTMLWidget) {
664  n->attach();
665  }
666 #endif
667  handled = true;
668  return true;
669  } else {
670  return false;
671  }
672  }
673 
674  case ID_THEAD:
675  case ID_TBODY:
676  case ID_TFOOT:
677  case ID_CAPTION:
678  case ID_COLGROUP: {
679  if (isTableRelatedTag(current->id())) {
680  while (blockStack && current->id() != ID_TABLE && isTableRelatedTag(current->id())) {
681  popOneBlock();
682  }
683  return insertNode(n);
684  }
685  }
686  default:
687  break;
688  }
689 
690  // switch on the currently active element
691  switch (current->id()) {
692  case ID_HTML:
693  switch (id) {
694  case ID_SCRIPT:
695  case ID_STYLE:
696  case ID_META:
697  case ID_LINK:
698  case ID_OBJECT:
699  case ID_EMBED:
700  case ID_TITLE:
701  case ID_ISINDEX:
702  case ID_BASE:
703  if (!head) {
704  head = new HTMLHeadElementImpl(document);
705  insertNode(head.get());
706  handled = true;
707  }
708  break;
709  case ID_TEXT: {
710  TextImpl *t = static_cast<TextImpl *>(n);
711  if (t->containsOnlyWhitespace()) {
712  return false;
713  }
714  /* Fall through to default */
715  }
716  default:
717  if (haveFrameSet) {
718  break;
719  }
720  e = new HTMLBodyElementImpl(document);
721  startBody();
722  insertNode(e);
723  handled = true;
724  break;
725  }
726  break;
727  case ID_HEAD:
728  // we can get here only if the element is not allowed in head.
729  if (id == ID_HTML) {
730  return false;
731  } else {
732  // This means the body starts here...
733  if (haveFrameSet) {
734  break;
735  }
736  popBlock(ID_HEAD);
737  e = new HTMLBodyElementImpl(document);
738  startBody();
739  insertNode(e);
740  handled = true;
741  }
742  break;
743  case ID_BODY:
744  break;
745  case ID_CAPTION:
746  // Illegal content in a caption. Close the caption and try again.
747  popBlock(ID_CAPTION);
748  switch (id) {
749  case ID_THEAD:
750  case ID_TFOOT:
751  case ID_TBODY:
752  case ID_TR:
753  case ID_TD:
754  case ID_TH:
755  return insertNode(n, flat);
756  }
757  break;
758  case ID_TABLE:
759  case ID_THEAD:
760  case ID_TFOOT:
761  case ID_TBODY:
762  case ID_TR:
763  switch (id) {
764  case ID_TABLE:
765  popBlock(ID_TABLE); // end the table
766  handled = checkChild(current->id(), id, doc()->inStrictMode());
767  break;
768  default: {
769  NodeImpl *node = current;
770  NodeImpl *parent = node->parentNode();
771  // A script may have removed the current node's parent from the DOM
772  // http://bugzilla.opendarwin.org/show_bug.cgi?id=7137
773  // FIXME: we should do real recovery here and re-parent with the correct node.
774  if (!parent) {
775  return false;
776  }
777  NodeImpl *parentparent = parent->parentNode();
778 
779  if (n->isTextNode() ||
780  (node->id() == ID_TR &&
781  (parent->id() == ID_THEAD ||
782  parent->id() == ID_TBODY ||
783  parent->id() == ID_TFOOT) && parentparent->id() == ID_TABLE) ||
784  (!checkChild(ID_TR, id) && (node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT) &&
785  parent->id() == ID_TABLE)) {
786  node = (node->id() == ID_TABLE) ? node :
787  ((node->id() == ID_TR) ? parentparent : parent);
788  NodeImpl *parent = node->parentNode();
789  if (!parent) {
790  return false;
791  }
792  int exceptioncode = 0;
793 #ifdef PARSER_DEBUG
794  qCDebug(KHTML_LOG) << "calling insertBefore(" << n->nodeName().string() << "," << node->nodeName().string() << ")";
795 #endif
796  parent->insertBefore(n, node, exceptioncode);
797  if (exceptioncode) {
798 #ifndef PARSER_DEBUG
799  if (!n->isTextNode())
800 #endif
801  // qCDebug(KHTML_LOG) << "adding content before table failed..";
802  break;
803  }
804  if (n->isElementNode() && tagPriority(id) != 0 &&
805  !flat && endTagRequirement(id) != DOM::FORBIDDEN) {
806 
807  pushBlock(id, tagPriority(id));
808  setCurrent(n);
809  inStrayTableContent++;
810  blockStack->strayTableContent = true;
811  }
812  return true;
813  }
814 
815  if (current->id() == ID_TR) {
816  e = new HTMLTableCellElementImpl(document, ID_TD);
817  } else if (current->id() == ID_TABLE) {
818  e = new HTMLTableSectionElementImpl(document, ID_TBODY, true /* implicit */);
819  } else {
820  e = new HTMLTableRowElementImpl(document);
821  }
822 
823  insertNode(e);
824  handled = true;
825  break;
826  } // end default
827  } // end switch
828  break;
829  case ID_OBJECT:
830  discard_until = id + ID_CLOSE_TAG;
831  return false;
832  case ID_UL:
833  case ID_OL:
834  case ID_DIR:
835  case ID_MENU:
836  e = new HTMLLIElementImpl(document);
837  e->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE, CSS_VAL_NONE);
838  insertNode(e);
839  handled = true;
840  break;
841  case ID_FORM:
842  popBlock(ID_FORM);
843  handled = true;
844  break;
845  case ID_SELECT:
846  if (n->isInline()) {
847  return false;
848  }
849  break;
850  case ID_P:
851  case ID_H1:
852  case ID_H2:
853  case ID_H3:
854  case ID_H4:
855  case ID_H5:
856  case ID_H6:
857  if (!n->isInline()) {
858  popBlock(current->id());
859  handled = true;
860  }
861  break;
862  case ID_OPTION:
863  case ID_OPTGROUP:
864  if (id == ID_OPTGROUP) {
865  popBlock(current->id());
866  handled = true;
867  } else if (id == ID_SELECT) {
868  // IE treats a nested select as </select>. Let's do the same
869  popBlock(ID_SELECT);
870  break;
871  }
872  break;
873  // head elements in the body should be ignored.
874 
875  case ID_ADDRESS:
876  case ID_COLGROUP:
877  case ID_FONT:
878  popBlock(current->id());
879  handled = true;
880  break;
881  default:
882  if (current->isDocumentNode()) {
883  DocumentImpl *doc = static_cast<DocumentImpl *>(current);
884  if (!doc->documentElement()) {
885  e = new HTMLHtmlElementImpl(document);
886  insertNode(e);
887  handled = true;
888  }
889  } else if (current->isInline()) {
890  popInlineBlocks();
891  handled = true;
892  }
893  }
894 
895  // if we couldn't handle the error, just rethrow the exception...
896  if (!handled) {
897  //qCDebug(KHTML_LOG) << "Exception handler failed in HTMLPArser::insertNode()";
898  return false;
899  }
900 
901  return insertNode(n);
902  }
903 }
904 
905 NodeImpl *KHTMLParser::getElement(Token *t)
906 {
907  NodeImpl *n = nullptr;
908 
909  switch (t->tid) {
910  case ID_HTML:
911  n = new HTMLHtmlElementImpl(document);
912  break;
913  case ID_HEAD:
914  if (!head && (current->id() == ID_HTML || current->isDocumentNode())) {
915  head = new HTMLHeadElementImpl(document);
916  n = head.get();
917  }
918  break;
919  case ID_BODY:
920  // body no longer allowed if we have a frameset
921  if (haveFrameSet) {
922  break;
923  }
924  popBlock(ID_HEAD);
925  n = new HTMLBodyElementImpl(document);
926  haveBody = true;
927  startBody();
928  break;
929 
930 // head elements
931  case ID_BASE:
932  n = new HTMLBaseElementImpl(document);
933  break;
934  case ID_LINK:
935  n = new HTMLLinkElementImpl(document);
936  break;
937  case ID_META:
938  n = new HTMLMetaElementImpl(document);
939  break;
940  case ID_STYLE:
941  n = new HTMLStyleElementImpl(document);
942  break;
943  case ID_TITLE:
944  // only one non-empty <title> allowed
945  if (haveTitle) {
946  discard_until = ID_TITLE + ID_CLOSE_TAG;
947  break;
948  }
949  n = new HTMLTitleElementImpl(document);
950  // we'll set haveTitle when closing the tag
951  break;
952 
953 // frames
954  case ID_FRAME:
955  n = new HTMLFrameElementImpl(document);
956  break;
957  case ID_FRAMESET:
958  popBlock(ID_HEAD);
959  if (inBody && !haveFrameSet && !haveContent && !haveBody) {
960  popBlock(ID_BODY);
961  // ### actually for IE document.body returns the now hidden "body" element
962  // we can't implement that behavior now because it could cause too many
963  // regressions and the headaches are not worth the work as long as there is
964  // no site actually relying on that detail (Dirk)
965  if (static_cast<HTMLDocumentImpl *>(document)->body())
966  static_cast<HTMLDocumentImpl *>(document)->body()
967  ->addCSSProperty(CSS_PROP_DISPLAY, CSS_VAL_NONE);
968  inBody = false;
969  }
970  if ((haveBody || haveContent || haveFrameSet) && current->id() == ID_HTML) {
971  break;
972  }
973  n = new HTMLFrameSetElementImpl(document);
974  haveFrameSet = true;
975  startBody();
976  break;
977  // a bit a special case, since the frame is inlined...
978  case ID_IFRAME:
979  n = new HTMLIFrameElementImpl(document);
980  break;
981 
982 // form elements
983  case ID_FORM:
984  // thou shall not nest <form> - NS/IE quirk
985  if (form) {
986  break;
987  }
988  n = form = new HTMLFormElementImpl(document, false);
989  break;
990  case ID_BUTTON:
991  n = new HTMLButtonElementImpl(document, form);
992  break;
993  case ID_FIELDSET:
994  n = new HTMLFieldSetElementImpl(document, form);
995  break;
996  case ID_INPUT:
997  if (t->attrs &&
998  KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
999  KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled() &&
1000  !strcasecmp(t->attrs->getValue(ATTR_TYPE), "image")) {
1001  const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string());
1002  if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) {
1003  return nullptr;
1004  }
1005  }
1006  n = new HTMLInputElementImpl(document, form);
1007  break;
1008  case ID_ISINDEX:
1009  n = handleIsindex(t);
1010  if (!inBody) {
1011  isindex = n;
1012  n = nullptr;
1013  } else {
1014  t->flat = true;
1015  }
1016  break;
1017  case ID_KEYGEN:
1018  n = new HTMLKeygenElementImpl(document, form);
1019  break;
1020  case ID_LABEL:
1021  n = new HTMLLabelElementImpl(document);
1022  break;
1023  case ID_LEGEND:
1024  n = new HTMLLegendElementImpl(document, form);
1025  break;
1026  case ID_OPTGROUP:
1027  n = new HTMLOptGroupElementImpl(document, form);
1028  break;
1029  case ID_OPTION:
1030  popOptionalBlock(ID_OPTION);
1031  n = new HTMLOptionElementImpl(document, form);
1032  break;
1033  case ID_SELECT:
1034  inSelect = true;
1035  n = new HTMLSelectElementImpl(document, form);
1036  break;
1037  case ID_TEXTAREA:
1038  n = new HTMLTextAreaElementImpl(document, form);
1039  break;
1040 
1041 // lists
1042  case ID_DL:
1043  n = new HTMLDListElementImpl(document);
1044  break;
1045  case ID_DD:
1046  popOptionalBlock(ID_DT);
1047  popOptionalBlock(ID_DD);
1048  n = new HTMLGenericElementImpl(document, t->tid);
1049  break;
1050  case ID_DT:
1051  popOptionalBlock(ID_DD);
1052  popOptionalBlock(ID_DT);
1053  n = new HTMLGenericElementImpl(document, t->tid);
1054  break;
1055  case ID_UL: {
1056  n = new HTMLUListElementImpl(document);
1057  break;
1058  }
1059  case ID_OL: {
1060  n = new HTMLOListElementImpl(document);
1061  break;
1062  }
1063  case ID_DIR:
1064  n = new HTMLDirectoryElementImpl(document);
1065  break;
1066  case ID_MENU:
1067  n = new HTMLMenuElementImpl(document);
1068  break;
1069  case ID_LI:
1070  popOptionalBlock(ID_LI);
1071  n = new HTMLLIElementImpl(document);
1072  break;
1073 // formatting elements (block)
1074  case ID_BLOCKQUOTE:
1075  n = new HTMLGenericElementImpl(document, t->tid);
1076  break;
1077  case ID_LAYER:
1078  case ID_ILAYER:
1079  n = new HTMLLayerElementImpl(document, t->tid);
1080  break;
1081  case ID_P:
1082  case ID_DIV:
1083  n = new HTMLDivElementImpl(document, t->tid);
1084  break;
1085  case ID_H1:
1086  case ID_H2:
1087  case ID_H3:
1088  case ID_H4:
1089  case ID_H5:
1090  case ID_H6:
1091  n = new HTMLGenericElementImpl(document, t->tid);
1092  break;
1093  case ID_HR:
1094  n = new HTMLHRElementImpl(document);
1095  break;
1096  case ID_PRE:
1097  case ID_XMP:
1098  case ID_PLAINTEXT:
1099  case ID_LISTING:
1100  n = new HTMLPreElementImpl(document, t->tid);
1101  break;
1102 
1103 // font stuff
1104  case ID_BASEFONT:
1105  n = new HTMLBaseFontElementImpl(document);
1106  break;
1107  case ID_FONT:
1108  n = new HTMLFontElementImpl(document);
1109  break;
1110 
1111 // ins/del
1112  case ID_DEL:
1113  case ID_INS:
1114  n = new HTMLGenericElementImpl(document, t->tid);
1115  break;
1116 
1117 // anchor
1118  case ID_A:
1119  popBlock(ID_A);
1120 
1121  n = new HTMLAnchorElementImpl(document);
1122  break;
1123 
1124 // images
1125  case ID_IMAGE:
1126  case ID_IMG:
1127  if (t->attrs &&
1128  KHTMLGlobal::defaultHTMLSettings()->isAdFilterEnabled() &&
1129  KHTMLGlobal::defaultHTMLSettings()->isHideAdsEnabled()) {
1130  const QString url = doc()->completeURL(DOMString(t->attrs->getValue(ATTR_SRC)).trimSpaces().string());
1131  if (KHTMLGlobal::defaultHTMLSettings()->isAdFiltered(url)) {
1132  return nullptr;
1133  }
1134  }
1135  n = new HTMLImageElementImpl(document, form);
1136  break;
1137 
1138  case ID_CANVAS:
1139  n = new HTMLCanvasElementImpl(document);
1140  break;
1141 
1142  case ID_MAP:
1143  map = new HTMLMapElementImpl(document);
1144  n = map;
1145  break;
1146  case ID_AREA:
1147  n = new HTMLAreaElementImpl(document);
1148  break;
1149 
1150 // objects, applets and scripts
1151  case ID_APPLET:
1152  n = new HTMLAppletElementImpl(document);
1153  break;
1154  case ID_EMBED:
1155  n = new HTMLEmbedElementImpl(document);
1156  break;
1157  case ID_OBJECT:
1158  n = new HTMLObjectElementImpl(document);
1159  break;
1160  case ID_PARAM:
1161  n = new HTMLParamElementImpl(document);
1162  break;
1163  case ID_SCRIPT: {
1164  HTMLScriptElementImpl *scriptElement = new HTMLScriptElementImpl(document);
1165  scriptElement->setCreatedByParser(true);
1166  n = scriptElement;
1167  break;
1168  }
1169 
1170 // media
1171  case ID_AUDIO:
1172  n = new HTMLAudioElement(document);
1173  break;
1174  case ID_VIDEO:
1175  n = new HTMLVideoElement(document);
1176  break;
1177  case ID_SOURCE:
1178  n = new HTMLSourceElement(document);
1179  break;
1180 
1181 // tables
1182  case ID_TABLE:
1183  n = new HTMLTableElementImpl(document);
1184  break;
1185  case ID_CAPTION:
1186  n = new HTMLTableCaptionElementImpl(document);
1187  break;
1188  case ID_COLGROUP:
1189  case ID_COL:
1190  n = new HTMLTableColElementImpl(document, t->tid);
1191  break;
1192  case ID_TR:
1193  popBlock(ID_TR);
1194  n = new HTMLTableRowElementImpl(document);
1195  break;
1196  case ID_TD:
1197  case ID_TH:
1198  popBlock(ID_TH);
1199  popBlock(ID_TD);
1200  n = new HTMLTableCellElementImpl(document, t->tid);
1201  break;
1202  case ID_TBODY:
1203  case ID_THEAD:
1204  case ID_TFOOT:
1205  popBlock(ID_THEAD);
1206  popBlock(ID_TBODY);
1207  popBlock(ID_TFOOT);
1208  n = new HTMLTableSectionElementImpl(document, t->tid, false);
1209  break;
1210 
1211 // inline elements
1212  case ID_BR:
1213  n = new HTMLBRElementImpl(document);
1214  break;
1215  case ID_Q:
1216  n = new HTMLGenericElementImpl(document, t->tid);
1217  break;
1218 
1219 // elements with no special representation in the DOM
1220 
1221 // block:
1222  case ID_ADDRESS:
1223  case ID_CENTER:
1224  n = new HTMLGenericElementImpl(document, t->tid);
1225  break;
1226 // inline
1227  // %fontstyle
1228  case ID_TT:
1229  case ID_U:
1230  case ID_B:
1231  case ID_I:
1232  case ID_S:
1233  case ID_STRIKE:
1234  case ID_BIG:
1235  case ID_SMALL:
1236 
1237  // %phrase
1238  case ID_EM:
1239  case ID_STRONG:
1240  case ID_DFN:
1241  case ID_CODE:
1242  case ID_SAMP:
1243  case ID_KBD:
1244  case ID_VAR:
1245  case ID_CITE:
1246  case ID_ABBR:
1247  case ID_ACRONYM:
1248 
1249  // %special
1250  case ID_SUB:
1251  case ID_SUP:
1252  case ID_SPAN:
1253  case ID_WBR:
1254  case ID_NOBR:
1255  if (t->tid == ID_NOBR || t->tid == ID_WBR) {
1256  popOptionalBlock(t->tid);
1257  }
1258  case ID_BDO:
1259  n = new HTMLGenericElementImpl(document, t->tid);
1260  break;
1261 
1262  // these are special, and normally not rendered
1263  case ID_NOEMBED:
1264  if (!t->flat) {
1265  n = new HTMLGenericElementImpl(document, t->tid);
1266  discard_until = ID_NOEMBED + ID_CLOSE_TAG;
1267  }
1268  return n;
1269  case ID_NOFRAMES:
1270  if (!t->flat) {
1271  n = new HTMLGenericElementImpl(document, t->tid);
1272  discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
1273  }
1274  return n;
1275  case ID_NOSCRIPT:
1276  if (!t->flat) {
1277  n = new HTMLGenericElementImpl(document, t->tid);
1278  if (HTMLWidget && HTMLWidget->part()->jScriptEnabled()) {
1279  discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
1280  }
1281  }
1282  return n;
1283  case ID_NOLAYER:
1284 // discard_until = ID_NOLAYER + ID_CLOSE_TAG;
1285  return nullptr;
1286  break;
1287  case ID_MARQUEE:
1288  n = new HTMLMarqueeElementImpl(document);
1289  break;
1290 // text
1291  case ID_TEXT:
1292 // qCDebug(KHTML_LOG) << "ID_TEXT: \"" << DOMString(t->text).string() << "\"";
1293  n = new TextImpl(document, t->text);
1294  break;
1295  case ID_COMMENT:
1296  n = new CommentImpl(document, t->text);
1297  break;
1298  default:
1299  n = new HTMLGenericElementImpl(document, t->tid);
1300  break;
1301 // qCDebug(KHTML_LOG) << "Unknown tag " << t->tid << "!";
1302  }
1303  return n;
1304 }
1305 
1306 void KHTMLParser::processCloseTag(Token *t)
1307 {
1308  // FIXME: the below only behaves according to "in body" insertion mode (HTML5 8.2.5.10)
1309  // - might need fixing when we have other insertion modes.
1310  switch (t->tid) {
1311  case ID_HTML+ID_CLOSE_TAG:
1312  case ID_BODY+ID_CLOSE_TAG:
1313  // we never trust those close tags, since stupid webpages close
1314  // them prematurely
1315  return;
1316  case ID_FORM+ID_CLOSE_TAG: // needs additional error checking. See spec.
1317  form = nullptr;
1318  if (!isElementInScope(ID_FORM)) {
1319  // Parse error. Ignore.
1320  return;
1321  }
1322  // this one is to get the right style on the body element
1323  break;
1324  case ID_MAP+ID_CLOSE_TAG:
1325  map = nullptr;
1326  break;
1327  case ID_SELECT+ID_CLOSE_TAG:
1328  inSelect = false;
1329  break;
1330  case ID_TITLE+ID_CLOSE_TAG:
1331  // Set haveTitle only if <title> isn't empty
1332  if (current->firstChild()) {
1333  haveTitle = true;
1334  }
1335  break;
1336  case ID_P+ID_CLOSE_TAG:
1337  if (!isElementInScope(ID_P)) {
1338  // Parse error. Handle as if <p> had been seen.
1339  t->tid = ID_P;
1340  parseToken(t);
1341  popBlock(ID_P);
1342  return;
1343  }
1344  break;
1345  case ID_ADDRESS+ID_CLOSE_TAG:
1346 // case ID_ARTICLE+ID_CLOSE_TAG:
1347  case ID_BLOCKQUOTE+ID_CLOSE_TAG:
1348  case ID_CENTER+ID_CLOSE_TAG:
1349 // case ID_DATAGRID+ID_CLOSE_TAG:
1350 // case ID_DETAILS+ID_CLOSE_TAG:
1351 // case ID_DIALOG+ID_CLOSE_TAG:
1352  case ID_DIR+ID_CLOSE_TAG:
1353  case ID_DIV+ID_CLOSE_TAG:
1354  case ID_DL+ID_CLOSE_TAG:
1355  case ID_FIELDSET+ID_CLOSE_TAG:
1356 // case ID_FIGURE+ID_CLOSE_TAG:
1357 // case ID_FOOTER+ID_CLOSE_TAG:
1358 // case ID_HEADER+ID_CLOSE_TAG:
1359  case ID_LISTING+ID_CLOSE_TAG:
1360  case ID_MENU+ID_CLOSE_TAG:
1361 // case ID_NAV+ID_CLOSE_TAG:
1362  case ID_OL+ID_CLOSE_TAG:
1363  case ID_PRE+ID_CLOSE_TAG:
1364 // case ID_SECTION+ID_CLOSE_TAG:
1365  case ID_UL+ID_CLOSE_TAG:
1366 
1367  case ID_DD+ID_CLOSE_TAG:
1368  case ID_DT+ID_CLOSE_TAG:
1369  case ID_LI+ID_CLOSE_TAG:
1370 
1371  case ID_APPLET+ID_CLOSE_TAG: // those four should also "Clear the list of active formatting elements
1372  case ID_BUTTON+ID_CLOSE_TAG: // up to the last marker." whenever we implement adoption agency.
1373  case ID_MARQUEE+ID_CLOSE_TAG:
1374  case ID_OBJECT+ID_CLOSE_TAG:
1375 
1376  case ID_HEAD+ID_CLOSE_TAG: // ### according to HTML5, should be treated as 'Any other end tag'
1377  // We'll do that when proper 'Any other end tag' handling is implemented.
1378  // In the meantime, test scoping at least (#170694)
1379 
1380  if (!isElementInScope(t->tid - ID_CLOSE_TAG)) {
1381  // Parse error. Ignore token.
1382  return;
1383  }
1384  break;
1385  case ID_H1:
1386  case ID_H2:
1387  case ID_H3:
1388  case ID_H4:
1389  case ID_H5:
1390  case ID_H6:
1391  if (!isHeadingInScope()) {
1392  // Parse error. Ignore token.
1393  return;
1394  }
1395  break;
1396  case ID_A: // Formatting elements - will need special handling - cf. HTML5 "adoption agency algorithm"
1397  case ID_B: // meant to replace the "residual style" handling we have now.
1398  case ID_BIG:
1399  case ID_CODE:
1400  case ID_EM:
1401  case ID_FONT:
1402  case ID_I:
1403  case ID_NOBR:
1404  case ID_S:
1405  case ID_SMALL:
1406  case ID_STRIKE:
1407  case ID_STRONG:
1408  case ID_TT:
1409  case ID_U:
1410  break;
1411 
1412  default:
1413 // otherTag = true; // FIXME: implement 'Any other end tag' handling
1414  break;
1415  }
1416 
1417 #ifdef PARSER_DEBUG
1418  qCDebug(KHTML_LOG) << "added the following children to " << current->nodeName().string();
1419  NodeImpl *child = current->firstChild();
1420  while (child != 0) {
1421  qCDebug(KHTML_LOG) << " " << child->nodeName().string();
1422  child = child->nextSibling();
1423  }
1424 #endif
1425 
1426  generateImpliedEndTags(t->tid - ID_CLOSE_TAG);
1427  popBlock(t->tid - ID_CLOSE_TAG);
1428 
1429 #ifdef PARSER_DEBUG
1430  qCDebug(KHTML_LOG) << "closeTag --> current = " << current->nodeName().string();
1431 #endif
1432 }
1433 
1434 bool KHTMLParser::isResidualStyleTag(int _id)
1435 {
1436  switch (_id) {
1437  case ID_A:
1438  case ID_B:
1439  case ID_BIG:
1440  case ID_EM:
1441  case ID_FONT:
1442  case ID_I:
1443  case ID_NOBR:
1444  case ID_S:
1445  case ID_SMALL:
1446  case ID_STRIKE:
1447  case ID_STRONG:
1448  case ID_TT:
1449  case ID_U:
1450  case ID_DFN:
1451  case ID_CODE:
1452  case ID_SAMP:
1453  case ID_KBD:
1454  case ID_VAR:
1455  case ID_DEL:
1456  case ID_INS:
1457  return true;
1458  default:
1459  return false;
1460  }
1461 }
1462 
1463 bool KHTMLParser::isAffectedByResidualStyle(int _id)
1464 {
1465  if (isResidualStyleTag(_id)) {
1466  return true;
1467  }
1468 
1469  switch (_id) {
1470  case ID_P:
1471  case ID_DIV:
1472  case ID_BLOCKQUOTE:
1473  case ID_ADDRESS:
1474  case ID_H1:
1475  case ID_H2:
1476  case ID_H3:
1477  case ID_H4:
1478  case ID_H5:
1479  case ID_H6:
1480  case ID_CENTER:
1481  case ID_UL:
1482  case ID_OL:
1483  case ID_LI:
1484  case ID_DL:
1485  case ID_DT:
1486  case ID_DD:
1487  case ID_PRE:
1488  case ID_LISTING:
1489  return true;
1490  default:
1491  return false;
1492  }
1493 }
1494 
1495 void KHTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem *elem)
1496 {
1497  // Find the element that crosses over to a higher level.
1498  // ### For now, if there is more than one, we will only make sure we close the residual style.
1499  int exceptionCode = 0;
1500  HTMLStackElem *curr = blockStack;
1501  HTMLStackElem *maxElem = nullptr;
1502  HTMLStackElem *endElem = nullptr;
1503  HTMLStackElem *prev = nullptr;
1504  HTMLStackElem *prevMaxElem = nullptr;
1505  bool advancedResidual = false; // ### if set we only close the residual style
1506  while (curr && curr != elem) {
1507  if (curr->level > elem->level) {
1508  if (!isAffectedByResidualStyle(curr->id)) {
1509  return;
1510  }
1511  if (maxElem) {
1512  advancedResidual = true;
1513  } else {
1514  endElem = curr;
1515  }
1516  maxElem = curr;
1517  prevMaxElem = prev;
1518  }
1519 
1520  prev = curr;
1521  curr = curr->next;
1522  }
1523 
1524  if (!curr || !maxElem) {
1525  return;
1526  }
1527 
1528  NodeImpl *residualElem = prev->node;
1529  NodeImpl *blockElem = prevMaxElem ? prevMaxElem->node : current;
1530  RefPtr<NodeImpl> parentElem = elem->node;
1531 
1532  // Check to see if the reparenting that is going to occur is allowed according to the DOM.
1533  // FIXME: We should either always allow it or perform an additional fixup instead of
1534  // just bailing here.
1535  // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
1536  if (!parentElem->childAllowed(blockElem)) {
1537  return;
1538  }
1539 
1540  if (maxElem->node->parentNode() != elem->node && !advancedResidual) {
1541  // Walk the stack and remove any elements that aren't residual style tags. These
1542  // are basically just being closed up. Example:
1543  // <font><span>Moo<p>Goo</font></p>.
1544  // In the above example, the <span> doesn't need to be reopened. It can just close.
1545  HTMLStackElem *currElem = maxElem->next;
1546  HTMLStackElem *prevElem = maxElem;
1547  while (currElem != elem) {
1548  HTMLStackElem *nextElem = currElem->next;
1549  if (!isResidualStyleTag(currElem->id)) {
1550  prevElem->next = nextElem;
1551  prevElem->setNode(currElem->node);
1552  delete currElem;
1553  } else {
1554  prevElem = currElem;
1555  }
1556  currElem = nextElem;
1557  }
1558 
1559  // We have to reopen residual tags in between maxElem and elem. An example of this case s:
1560  // <font><i>Moo<p>Foo</font>.
1561  // In this case, we need to transform the part before the <p> into:
1562  // <font><i>Moo</i></font><i>
1563  // so that the <i> will remain open. This involves the modification of elements
1564  // in the block stack.
1565  // This will also affect how we ultimately reparent the block, since we want it to end up
1566  // under the reopened residual tags (e.g., the <i> in the above example.)
1567  RefPtr<NodeImpl> prevNode = nullptr;
1568  RefPtr<NodeImpl> currNode = nullptr;
1569  currElem = maxElem;
1570  while (currElem->node != residualElem) {
1571  if (isResidualStyleTag(currElem->node->id())) {
1572  // Create a clone of this element.
1573  currNode = currElem->node->cloneNode(false);
1574  currElem->node->close();
1575  removeForbidden(currElem->id, forbiddenTag);
1576 
1577  // Change the stack element's node to point to the clone.
1578  currElem->setNode(currNode.get());
1579 
1580  // Attach the previous node as a child of this new node.
1581  if (prevNode) {
1582  currNode->appendChild(prevNode.get(), exceptionCode);
1583  } else { // The new parent for the block element is going to be the innermost clone.
1584  parentElem = currNode;
1585  }
1586 
1587  prevNode = currNode;
1588  }
1589 
1590  currElem = currElem->next;
1591  }
1592 
1593  // Now append the chain of new residual style elements if one exists.
1594  if (prevNode) {
1595  elem->node->appendChild(prevNode.get(), exceptionCode);
1596  }
1597  }
1598 
1599  // We need to make a clone of |residualElem| and place it just inside |blockElem|.
1600  // All content of |blockElem| is reparented to be under this clone. We then
1601  // reparent |blockElem| using real DOM calls so that attachment/detachment will
1602  // be performed to fix up the rendering tree.
1603  // So for this example: <b>...<p>Foo</b>Goo</p>
1604  // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
1605  //
1606  // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
1607  SharedPtr<NodeImpl> guard(blockElem);
1608  blockElem->parentNode()->removeChild(blockElem, exceptionCode);
1609 
1610  if (!advancedResidual) {
1611  // Step 2: Clone |residualElem|.
1612  RefPtr<NodeImpl> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
1613 
1614  // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
1615  // before we've put |newElem| into the document. That way we'll only do one attachment of all
1616  // the new content (instead of a bunch of individual attachments).
1617  NodeImpl *currNode = blockElem->firstChild();
1618  while (currNode) {
1619  NodeImpl *nextNode = currNode->nextSibling();
1620  SharedPtr<NodeImpl> guard(currNode); //Protect from deletion while moving
1621  blockElem->removeChild(currNode, exceptionCode);
1622  newNode->appendChild(currNode, exceptionCode);
1623  currNode = nextNode;
1624 
1625 // TODO - To be replaced.
1626  // Re-register form elements with currently active form, step 1 will have removed them
1627  if (form && currNode && currNode->isGenericFormElement()) {
1628  HTMLGenericFormElementImpl *e = static_cast<HTMLGenericFormElementImpl *>(currNode);
1629  form->registerFormElement(e);
1630  }
1631  }
1632 
1633  // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
1634  // attachment can occur yet.
1635  blockElem->appendChild(newNode.get(), exceptionCode);
1636  }
1637 
1638  // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
1639  parentElem->appendChild(blockElem, exceptionCode);
1640 
1641  // Step 6: Elide |elem|, since it is effectively no longer open. Also update
1642  // the node associated with the previous stack element so that when it gets popped,
1643  // it doesn't make the residual element the next current node.
1644  HTMLStackElem *currElem = maxElem;
1645  HTMLStackElem *prevElem = nullptr;
1646  while (currElem != elem) {
1647  prevElem = currElem;
1648  currElem = currElem->next;
1649  }
1650  prevElem->next = elem->next;
1651  prevElem->setNode(elem->node);
1652  delete elem;
1653 
1654  // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
1655  // In the above example, Goo should stay italic.
1656  curr = blockStack;
1657  HTMLStackElem *residualStyleStack = nullptr;
1658  while (curr && curr != endElem) {
1659  // We will actually schedule this tag for reopening
1660  // after we complete the close of this entire block.
1661  NodeImpl *currNode = current;
1662  if (isResidualStyleTag(curr->id)) {
1663  // We've overloaded the use of stack elements and are just reusing the
1664  // struct with a slightly different meaning to the variables. Instead of chaining
1665  // from innermost to outermost, we build up a list of all the tags we need to reopen
1666  // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1667  // to the outermost tag we need to reopen.
1668  // We also set curr->node to be the actual element that corresponds to the ID stored in
1669  // curr->id rather than the node that you should pop to when the element gets pulled off
1670  // the stack.
1671  popOneBlock(false);
1672  curr->setNode(currNode);
1673  curr->next = residualStyleStack;
1674  residualStyleStack = curr;
1675  } else {
1676  popOneBlock();
1677  }
1678 
1679  curr = blockStack;
1680  }
1681 
1682  reopenResidualStyleTags(residualStyleStack, nullptr); // FIXME: Deal with stray table content some day
1683  // if it becomes necessary to do so.
1684 }
1685 
1686 void KHTMLParser::reopenResidualStyleTags(HTMLStackElem *elem, DOM::NodeImpl *malformedTableParent)
1687 {
1688  // Loop for each tag that needs to be reopened.
1689  while (elem) {
1690  // Create a shallow clone of the DOM node for this element.
1691  RefPtr<NodeImpl> newNode = elem->node->cloneNode(false);
1692 
1693  // Append the new node. In the malformed table case, we need to insert before the table,
1694  // which will be the last child.
1695  int exceptionCode = 0;
1696  if (malformedTableParent) {
1697  malformedTableParent->insertBefore(newNode.get(), malformedTableParent->lastChild(), exceptionCode);
1698  } else {
1699  current->appendChild(newNode.get(), exceptionCode);
1700  }
1701  // FIXME: Is it really OK to ignore the exceptions here?
1702 
1703  // Now push a new stack element for this node we just created.
1704  pushBlock(elem->id, elem->level);
1705 
1706  // Set our strayTableContent boolean if needed, so that the reopened tag also knows
1707  // that it is inside a malformed table.
1708  blockStack->strayTableContent = malformedTableParent != nullptr;
1709  if (blockStack->strayTableContent) {
1710  inStrayTableContent++;
1711  }
1712 
1713  // Clear our malformed table parent variable.
1714  malformedTableParent = nullptr;
1715 
1716  // Update |current| manually to point to the new node.
1717  setCurrent(newNode.get());
1718 
1719  // Advance to the next tag that needs to be reopened.
1720  HTMLStackElem *next = elem->next;
1721  delete elem;
1722  elem = next;
1723  }
1724 }
1725 
1726 void KHTMLParser::pushBlock(int _id, int _level)
1727 {
1728  HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, m_inline, blockStack);
1729 
1730  blockStack = Elem;
1731  addForbidden(_id, forbiddenTag);
1732 }
1733 
1734 void KHTMLParser::generateImpliedEndTags(int _id)
1735 {
1736  HTMLStackElem *Elem = blockStack;
1737 
1738  int level = tagPriority(_id);
1739  while (Elem && Elem->id != _id) {
1740  HTMLStackElem *NextElem = Elem->next;
1741  if (endTagRequirement(Elem->id) == DOM::OPTIONAL && Elem->level <= level) {
1742  popOneBlock();
1743  } else {
1744  break;
1745  }
1746  Elem = NextElem;
1747  }
1748 }
1749 
1750 void KHTMLParser::popOptionalBlock(int _id)
1751 {
1752  bool found = false;
1753  HTMLStackElem *Elem = blockStack;
1754 
1755  int level = tagPriority(_id);
1756  while (Elem) {
1757  if (Elem->id == _id) {
1758  found = true;
1759  break;
1760  }
1761  if (Elem->level > level || (endTagRequirement(Elem->id) != DOM::OPTIONAL && !isResidualStyleTag(Elem->id))) {
1762  break;
1763  }
1764  Elem = Elem->next;
1765  }
1766 
1767  if (found) {
1768  generateImpliedEndTags(_id);
1769  popBlock(_id);
1770  }
1771 }
1772 
1773 bool KHTMLParser::isElementInScope(int _id)
1774 {
1775  // HTML5 8.2.3.2
1776  HTMLStackElem *Elem = blockStack;
1777  while (Elem && Elem->id != _id) {
1778  if (DOM::checkIsScopeBoundary(Elem->id)) {
1779  return false;
1780  }
1781  Elem = Elem->next;
1782  }
1783  return Elem;
1784 }
1785 
1786 bool KHTMLParser::isHeadingInScope()
1787 {
1788  HTMLStackElem *Elem = blockStack;
1789  while (Elem && (Elem->id < ID_H1 || Elem->id > ID_H6)) {
1790  if (DOM::checkIsScopeBoundary(Elem->id)) {
1791  return false;
1792  }
1793  Elem = Elem->next;
1794  }
1795  return Elem;
1796 }
1797 
1798 void KHTMLParser::popBlock(int _id)
1799 {
1800  HTMLStackElem *Elem = blockStack;
1801  int maxLevel = 0;
1802 
1803 #ifdef PARSER_DEBUG
1804  qCDebug(KHTML_LOG) << "popBlock(" << getParserPrintableName(_id) << ")";
1805  while (Elem) {
1806  qCDebug(KHTML_LOG) << " > " << getParserPrintableName(Elem->id);
1807  Elem = Elem->next;
1808  }
1809  Elem = blockStack;
1810 #endif
1811 
1812  while (Elem && (Elem->id != _id)) {
1813  if (maxLevel < Elem->level) {
1814  maxLevel = Elem->level;
1815  }
1816  Elem = Elem->next;
1817  }
1818  if (!Elem) {
1819  return;
1820  }
1821 
1822  if (maxLevel > Elem->level) {
1823  // We didn't match because the tag is in a different scope, e.g.,
1824  // <b><p>Foo</b>. Try to correct the problem.
1825  if (!isResidualStyleTag(_id)) {
1826  return;
1827  }
1828  return handleResidualStyleCloseTagAcrossBlocks(Elem);
1829  }
1830 
1831  bool isAffectedByStyle = isAffectedByResidualStyle(Elem->id);
1832  HTMLStackElem *residualStyleStack = nullptr;
1833  NodeImpl *malformedTableParent = nullptr;
1834 
1835  Elem = blockStack;
1836 
1837  while (Elem) {
1838  if (Elem->id == _id) {
1839  int strayTable = inStrayTableContent;
1840  popOneBlock();
1841  Elem = nullptr;
1842 
1843  // This element was the root of some malformed content just inside an implicit or
1844  // explicit <tbody> or <tr>.
1845  // If we end up needing to reopen residual style tags, the root of the reopened chain
1846  // must also know that it is the root of malformed content inside a <tbody>/<tr>.
1847  if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
1848  NodeImpl *curr = current;
1849  while (curr && curr->id() != ID_TABLE) {
1850  curr = curr->parentNode();
1851  }
1852  malformedTableParent = curr ? curr->parentNode() : nullptr;
1853  }
1854  } else {
1855  // Schedule this tag for reopening
1856  // after we complete the close of this entire block.
1857  NodeImpl *currNode = current;
1858  if (isAffectedByStyle && isResidualStyleTag(Elem->id)) {
1859  // We've overloaded the use of stack elements and are just reusing the
1860  // struct with a slightly different meaning to the variables. Instead of chaining
1861  // from innermost to outermost, we build up a list of all the tags we need to reopen
1862  // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
1863  // to the outermost tag we need to reopen.
1864  // We also set Elem->node to be the actual element that corresponds to the ID stored in
1865  // Elem->id rather than the node that you should pop to when the element gets pulled off
1866  // the stack.
1867  popOneBlock(false);
1868  Elem->next = residualStyleStack;
1869  Elem->setNode(currNode);
1870  residualStyleStack = Elem;
1871  } else {
1872  popOneBlock();
1873  }
1874  Elem = blockStack;
1875  }
1876  }
1877 
1878  reopenResidualStyleTags(residualStyleStack, malformedTableParent);
1879 }
1880 
1881 void KHTMLParser::popOneBlock(bool delBlock)
1882 {
1883  HTMLStackElem *Elem = blockStack;
1884 
1885  // we should never get here, but some bad html might cause it.
1886 #ifndef PARSER_DEBUG
1887  if (!Elem) {
1888  return;
1889  }
1890 #else
1891  qCDebug(KHTML_LOG) << "popping block: " << getParserPrintableName(Elem->id) << "(" << Elem->id << ")";
1892 #endif
1893 
1894 #if SPEED_DEBUG < 1
1895  if ((Elem->node != current)) {
1896  if (current->maintainsState() && document) {
1897  document->registerMaintainsState(current);
1898  document->attemptRestoreState(current);
1899  }
1900  current->close();
1901  }
1902 #endif
1903 
1904  removeForbidden(Elem->id, forbiddenTag);
1905 
1906  blockStack = Elem->next;
1907  // we only set inline to false, if the element we close is a block level element.
1908  // This helps getting cases as <p><b>bla</b> <b>bla</b> right.
1909 
1910  m_inline = Elem->m_inline;
1911 
1912  if (current->id() == ID_FORM && form && inStrayTableContent) {
1913  form->setMalformed(true);
1914  }
1915 
1916  setCurrent(Elem->node);
1917 
1918  if (Elem->strayTableContent) {
1919  inStrayTableContent--;
1920  }
1921 
1922  if (delBlock) {
1923  delete Elem;
1924  }
1925 }
1926 
1927 void KHTMLParser::popInlineBlocks()
1928 {
1929  while (blockStack && current->isInline() && current->id() != ID_FONT) {
1930  popOneBlock();
1931  }
1932 }
1933 
1934 void KHTMLParser::freeBlock()
1935 {
1936  while (blockStack) {
1937  popOneBlock();
1938  }
1939  blockStack = nullptr;
1940 }
1941 
1942 void KHTMLParser::createHead()
1943 {
1944  if (head || !doc()->documentElement()) {
1945  return;
1946  }
1947 
1948  head = new HTMLHeadElementImpl(document);
1949  HTMLElementImpl *body = doc()->body();
1950  int exceptioncode = 0;
1951  doc()->documentElement()->insertBefore(head.get(), body, exceptioncode);
1952  if (exceptioncode) {
1953 #ifdef PARSER_DEBUG
1954  qCDebug(KHTML_LOG) << "creation of head failed!!!!:" << exceptioncode;
1955 #endif
1956  delete head.get();
1957  head = nullptr;
1958  }
1959 
1960  // If the body does not exist yet, then the <head> should be pushed as the current block.
1961  if (head && !body) {
1962  pushBlock(head->id(), tagPriority(head->id()));
1963  setCurrent(head.get());
1964  }
1965 }
1966 
1967 NodeImpl *KHTMLParser::handleIsindex(Token *t)
1968 {
1969  NodeImpl *n;
1970  HTMLFormElementImpl *myform = form;
1971  if (!myform) {
1972  myform = new HTMLFormElementImpl(document, true);
1973  n = myform;
1974  } else {
1975  n = new HTMLDivElementImpl(document, ID_DIV);
1976  }
1977  NodeImpl *child = new HTMLHRElementImpl(document);
1978  n->addChild(child);
1979  DOMStringImpl *a = t->attrs ? t->attrs->getValue(ATTR_PROMPT) : nullptr;
1980  DOMString text = i18n("This is a searchable index. Enter search keywords: ");
1981  if (a) {
1982  text = a;
1983  }
1984  child = new TextImpl(document, text.implementation());
1985  n->addChild(child);
1986  child = new HTMLIsIndexElementImpl(document, myform);
1987  static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex");
1988  n->addChild(child);
1989  child = new HTMLHRElementImpl(document);
1990  n->addChild(child);
1991 
1992  return n;
1993 }
1994 
1995 void KHTMLParser::startBody()
1996 {
1997  if (inBody) {
1998  return;
1999  }
2000 
2001  inBody = true;
2002 
2003  if (isindex) {
2004  insertNode(isindex, true /* don't decend into this node */);
2005  isindex = nullptr;
2006  }
2007 }
This file is part of the HTML rendering engine for KDE.
MESSAGECORE_EXPORT KMime::Content * next(KMime::Content *node, bool allowChildren=true)
Renders and displays HTML in a QScrollArea.
Definition: khtmlview.h:97
QString fromRawData(const QChar *unicode, int size)
bool isAdFiltered(const QString &url) const
tests whether url is filtered.
const char * constData() const const
This class implements the basic string we use in the DOM.
Definition: dom_string.h:44
QString toLower() const const
QByteArray toLocal8Bit() const const
QStringView level(QStringView ifopt)
QString i18n(const char *text, const TYPE &arg...)
This library provides a full-featured HTML parser and widget.
DOMStringImpl * implementation() const
Definition: dom_string.h:145
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sun Oct 24 2021 22:48:03 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.