KHtml

step.cpp
1 /*
2  * step.cc - Copyright 2005 Frerich Raabe <[email protected]>
3  * Copyright 2010 Maksim Orlovich <[email protected]>
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  * notice, this list of conditions and the following disclaimer in the
13  * documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 #include "step.h"
27 
28 #include "dom/dom3_xpath.h"
29 #include "xml/dom_docimpl.h"
30 #include "xml/dom_nodeimpl.h"
31 #include "xml/dom_textimpl.h"
32 #include "xml/dom3_xpathimpl.h"
33 
34 #include "step.h"
35 
36 #include <QtDebug>
37 
38 using namespace DOM;
39 using namespace DOM::XPath;
40 using namespace khtml;
41 using namespace khtml::XPath;
42 
43 static bool areAdjacentTextNodes(NodeImpl *n, NodeImpl *m)
44 {
45  if (!n || !m) {
46  return false;
47  }
48 
49  if (n->nodeType() != Node::TEXT_NODE && n->nodeType() != Node::CDATA_SECTION_NODE) {
50  return false;
51  }
52  if (m->nodeType() != Node::TEXT_NODE && m->nodeType() != Node::CDATA_SECTION_NODE) {
53  return false;
54  }
55 
56  // ###
57 #ifdef __GNUC__
58 #warning "Might need more generic adjacency -- check"
59 #endif
60 
61  return (n->nextSibling() && (n->nextSibling() == m)) ||
62  (m->nextSibling() && (m->nextSibling() == n));
63 }
64 
65 static DomNodeList compressTextNodes(const DomNodeList &nodes)
66 {
67  DomNodeList outNodes = new StaticNodeListImpl;
68 
69  for (unsigned long n = 0; n < nodes->length(); ++n) {
70  NodeImpl *node = nodes->item(n);
71  NodeImpl *next = n + 1 < nodes->length() ? nodes->item(n + 1) : nullptr;
72 
73  if (!next || !areAdjacentTextNodes(node, next)) {
74  outNodes->append(node);
75  } else if (areAdjacentTextNodes(node, next)) {
76  QString s = node->nodeValue().string();
77 
78  // n2 is a potential successor, and is always in-range
79  unsigned long n2 = n + 1;
80  while (n2 < nodes->length() && areAdjacentTextNodes(nodes->item(n2), nodes->item(n2 - 1))) {
81  s += nodes->item(n2)->nodeValue().string();
82  ++n2;
83  }
84  outNodes->append(node->document()->createTextNode(new DOMStringImpl(s.data(), s.length())));
85  }
86  }
87  return outNodes;
88 }
89 
90 QString Step::axisAsString(AxisType axis)
91 {
92  switch (axis) {
93  case AncestorAxis: return "ancestor";
94  case AncestorOrSelfAxis: return "ancestor-or-self";
95  case AttributeAxis: return "attribute";
96  case ChildAxis: return "child";
97  case DescendantAxis: return "descendant";
98  case DescendantOrSelfAxis: return "descendant-or-self";
99  case FollowingAxis: return "following";
100  case FollowingSiblingAxis: return "following-sibling";
101  case NamespaceAxis: return "namespace";
102  case ParentAxis: return "parent";
103  case PrecedingAxis: return "preceding";
104  case PrecedingSiblingAxis: return "preceding-sibling";
105  case SelfAxis: return "self";
106  }
107  return QString();
108 }
109 
110 Step::Step(): m_compileState(NotCompiled)
111 {
112 }
113 
114 Step::Step(AxisType axis, const DOMString &nodeTest,
115  const QList<Predicate *> &predicates)
116  : m_axis(axis),
117  m_nodeTest(nodeTest),
118  m_compileState(NotCompiled),
119  m_predicates(predicates)
120 {
121 }
122 
123 Step::~Step()
124 {
125  qDeleteAll(m_predicates);
126 }
127 
128 DomNodeList Step::evaluate(NodeImpl *context) const
129 {
130  assert(context);
131 #ifdef XPATH_VERBOSE
132  qCDebug(KHTML_LOG) << "Evaluating step, axis='" << axisAsString(m_axis) << "'"
133  << ", nodetest='" << m_nodeTest << "'"
134  << ", " << m_predicates.count() << " predicates";
135  qCDebug(KHTML_LOG) << DOM::getPrintableName(context->id());
136 #endif
137 
138  DomNodeList inNodes = nodesInAxis(context), outNodes;
139 
140  // ### optimization opportunity: can say DocumentOrder for most
141  StaticNodeListImpl::NormalizationKind known = StaticNodeListImpl::AxisOrder;
142  inNodes->setKnownNormalization(known);
143 
144 #ifdef XPATH_VERBOSE
145  qCDebug(KHTML_LOG) << "Axis " << axisAsString(m_axis) << " matches " << inNodes->length() << " nodes.";
146 #endif
147 
148  inNodes = nodeTestMatches(context, inNodes);
149  inNodes->setKnownNormalization(known); // nodeTest doesn't change order
150 
151 #ifdef XPATH_VERBOSE
152  qCDebug(KHTML_LOG) << "\tNodetest " << m_nodeTest << " trims this number to " << inNodes->length();
153 #endif
154 
155  if (m_predicates.isEmpty()) {
156  return inNodes;
157  }
158 
159  foreach (Predicate *predicate, m_predicates) {
160  outNodes = new StaticNodeListImpl;
161  Expression::evaluationContext().size = int(inNodes->length());
162  Expression::evaluationContext().position = 1;
163 
164  for (unsigned long n = 0; n < inNodes->length(); ++n) {
165  NodeImpl *node = inNodes->item(n);
166  Expression::evaluationContext().node = node;
167  EvaluationContext backupCtx = Expression::evaluationContext();
168 #ifdef XPATH_VERBOSE
169  qCDebug(KHTML_LOG) << Expression::evaluationContext().position << "/" << node;
170 #endif
171  if (predicate->evaluate()) {
172  outNodes->append(node);
173  }
174  Expression::evaluationContext() = backupCtx;
175  ++Expression::evaluationContext().position;
176  }
177 #ifdef XPATH_VERBOSE
178  qCDebug(KHTML_LOG) << "\tPredicate trims this number to " << outNodes->length();
179 #endif
180  inNodes = outNodes;
181  inNodes->setKnownNormalization(known); // predicates don't change order
182  }
183 
184  return outNodes;
185 }
186 
187 DomNodeList Step::nodesInAxis(NodeImpl *context) const
188 {
189  //assert(context);
190  DomNodeList nodes = new StaticNodeListImpl;
191  switch (m_axis) {
192  case ChildAxis: {
193  NodeImpl *n = xpathFirstChild(context);
194  while (n) {
195  nodes->append(n);
196  n = n->nextSibling();
197  }
198  return nodes;
199  }
200  case DescendantAxis: {
201  collectChildrenRecursively(nodes, context);
202  return nodes;
203  }
204  case ParentAxis: {
205  NodeImpl *p = xpathParentNode(context);
206 
207  if (p) {
208  nodes->append(p);
209  }
210  return nodes;
211  }
212  case AncestorAxis: {
213  NodeImpl *n = xpathParentNode(context);
214  while (n) {
215  nodes->append(n);
216  n = xpathParentNode(n);
217  }
218  return nodes;
219  }
220  case FollowingSiblingAxis: {
221  if (context->nodeType() == Node::ATTRIBUTE_NODE ||
222  context->nodeType() == Node::XPATH_NAMESPACE_NODE) {
223  return nodes; // empty
224  }
225 
226  NodeImpl *n = context->nextSibling();
227  while (n) {
228  nodes->append(n);
229  n = n->nextSibling();
230  }
231  return nodes;
232  }
233  case PrecedingSiblingAxis: {
234  if (context->nodeType() == Node::ATTRIBUTE_NODE ||
235  context->nodeType() == Node::XPATH_NAMESPACE_NODE) {
236  return nodes; // empty
237  }
238 
239  NodeImpl *n = context->previousSibling();
240  while (n) {
241  nodes->append(n);
242  n = n->previousSibling();
243  }
244  return nodes;
245  }
246  case FollowingAxis: {
247  NodeImpl *p = context;
248  while (!isRootDomNode(p)) {
249  NodeImpl *n = nextSiblingForFollowing(p);
250  while (n) {
251  nodes->append(n);
252  collectChildrenRecursively(nodes, n);
253  n = n->nextSibling();
254  }
255  p = xpathParentNode(p);
256  }
257  return nodes;
258  }
259  case PrecedingAxis: {
260  NodeImpl *p = context;
261  while (!isRootDomNode(p)) {
262  NodeImpl *n = p->previousSibling();
263  while (n) {
264  collectChildrenReverse(nodes, n);
265  nodes->append(n);
266  n = n->previousSibling();
267  }
268  p = xpathParentNode(p);
269  }
270  return nodes;
271  }
272  case AttributeAxis: {
273  if (context->nodeType() != Node::ELEMENT_NODE) {
274  return nodes; // empty
275  }
276 
277  NamedAttrMapImpl *attrs = static_cast<ElementImpl *>(context)->attributes(true /*read-only*/);
278  if (!attrs) {
279  return nodes;
280  }
281 
282  for (unsigned long i = 0; i < attrs->length(); ++i) {
283  nodes->append(attrs->item(i));
284  }
285  return nodes;
286  }
287  case NamespaceAxis: {
288  // ### TODO: Need to implement this, but not a priority, since
289  // other impls don't.
290 #if 0
291  if (context->nodeType() != Node::ELEMENT_NODE) {
292  return nodes;
293  }
294 
295  bool foundXmlNsNode = false;
296  NodeImpl *node = context;
297  while (node) {
298  NamedAttrMapImpl *attrs =
299  node->isElementNode() ? static_cast<ElementImpl *>(node)->attributes() : 0;
300  if (!attrs) {
301  node = xpathParentNode(node);
302  continue;
303  }
304 
305  for (unsigned long i = 0; i < attrs->length(); ++i) {
306  NodeImpl *n = attrs->item(i);
307  if (n->nodeName().string().startsWith("xmlns:")) {
308  nodes->append(n);
309  } else if (n->nodeName() == "xmlns" &&
310  !foundXmlNsNode
311  ) {
312  foundXmlNsNode = true;
313  if (!n->nodeValue().string().isEmpty()) {
314  nodes->append(n);
315  }
316  }
317  }
318  node = xpathParentNode(node);
319  }
320 #endif
321  return nodes;
322  }
323  case SelfAxis:
324  nodes->append(context);
325  return nodes;
326  case DescendantOrSelfAxis:
327  nodes->append(context);
328  collectChildrenRecursively(nodes, context);
329  return nodes;
330  case AncestorOrSelfAxis: {
331  nodes->append(context);
332  NodeImpl *n = xpathParentNode(context);
333  while (n) {
334  nodes->append(n);
335  n = xpathParentNode(n);
336  }
337  return nodes;
338  }
339  default:
340  qCWarning(KHTML_LOG) << "Unknown axis " << axisAsString(m_axis) << " passed to Step::nodesInAxis";
341  }
342 
343  return nodes; // empty
344 }
345 
346 void Step::compileNodeTest(bool htmlCompat) const
347 {
348  m_compileState = htmlCompat ? CompiledForHTML : CompiledForXML;
349 
350  if (m_nodeTest == "*") {
351  m_nodeTestType = NT_Star;
352  } else if (m_nodeTest == "text()") {
353  m_nodeTestType = NT_Text;
354  } else if (m_nodeTest == "comment()") {
355  m_nodeTestType = NT_Comment;
356  } else if (m_nodeTest.startsWith("processing-instruction")) {
357  DOMString param;
358 
359  // ### is this right? (parens?)
360  const int space = m_nodeTest.find(' ');
361  if (space > -1) {
362  param = m_nodeTest.substring(space + 1);
363  }
364 
365  if (param.isEmpty()) {
366  m_nodeTestType = NT_PI;
367  } else {
368  m_nodeTestType = NT_PI_Lit;
369  m_piInfo = param;
370  }
371  } else if (m_nodeTest == "node()") {
372  m_nodeTestType = NT_AnyNode;
373  } else {
374  // Some sort of name combo.
375  PrefixName prefix;
376  LocalName localName;
377 
378  splitPrefixLocalName(m_nodeTest, prefix, localName, htmlCompat);
379 
380  if (prefix.id() == DOM::emptyPrefix) {
381  // localname only
382  m_nodeTestType = NT_LocalName;
383  m_localName = localName;
384  } else if (localName.toString() == "*") {
385  // namespace only
386  m_nodeTestType = NT_Namespace;
387  m_namespace = NamespaceName::fromString(namespaceFromNodetest(m_nodeTest));
388  } else {
389  // Both parts.
390  m_nodeTestType = NT_QName;
391  m_localName = localName;
392  m_namespace = NamespaceName::fromString(namespaceFromNodetest(m_nodeTest));
393  }
394  }
395 }
396 
397 DomNodeList Step::nodeTestMatches(NodeImpl *ctx, const DomNodeList &nodes) const
398 {
399  CompileState desired = ctx->htmlCompat() ? CompiledForHTML : CompiledForXML;
400  if (m_compileState != desired) {
401  compileNodeTest(ctx->htmlCompat());
402  }
403 
404  if (m_nodeTestType == NT_AnyNode) { // no need for a new set
405  return nodes;
406  }
407 
408  DomNodeList matches = new StaticNodeListImpl;
409 
410  // A lot of things can be handled by just checking the type,
411  // and maybe a hair more special handling
412  int matchType;
413  switch (m_nodeTestType) {
414  case NT_Star:
415  matchType = primaryNodeType(m_axis);
416  break;
417  case NT_Comment:
418  matchType = Node::COMMENT_NODE;
419  break;
420  case NT_Text:
421  matchType = Node::TEXT_NODE;
422  break;
423  case NT_PI:
424  case NT_PI_Lit:
425  matchType = Node::PROCESSING_INSTRUCTION_NODE;
426  break;
427  default:
428  matchType = -1;
429  }
430 
431  if (matchType != -1) {
432  for (unsigned long n = 0; n < nodes->length(); ++n) {
433  NodeImpl *node = nodes->item(n);
434  int nodeType = node->nodeType();
435  if (nodeType == matchType) {
436  if (m_nodeTestType == NT_PI_Lit && node->nodeName() != m_piInfo) {
437  continue;
438  }
439 
440  matches->append(node);
441  }
442 
443  if (matchType == NT_Text && nodeType == Node::CDATA_SECTION_NODE) {
444  matches->append(node);
445  }
446  }
447  return matches;
448  }
449 
450  // Now we are checking by name. We always want to filter out
451  // the primary axis here
452  // ### CHECK: this used to have special handling for some axes.
453  matchType = primaryNodeType(m_axis);
454  for (unsigned long n = 0; n < nodes->length(); ++n) {
455  NodeImpl *node = nodes->item(n);
456  if (node->nodeType() != matchType) {
457  continue;
458  }
459 
460  if (m_nodeTestType == NT_LocalName) {
461  if (localNamePart(node->id()) == m_localName.id()) {
462  matches->append(node);
463  }
464  } else if (m_nodeTestType == NT_Namespace) {
465  if (namespacePart(node->id()) == m_namespace.id()) {
466  matches->append(node);
467  }
468  } else {
469  // NT_QName
470  if (node->id() == makeId(m_namespace.id(), m_localName.id())) {
471  matches->append(node);
472  }
473  }
474  }
475  return matches;
476 }
477 
478 void Step::optimize()
479 {
480  foreach (Predicate *predicate, m_predicates) {
481  predicate->optimize();
482  }
483 }
484 
485 QString Step::dump() const
486 {
487  QString s = QString("<step axis=\"%1\" nodetest=\"%2\">").arg(axisAsString(m_axis)).arg(m_nodeTest.string());
488  foreach (Predicate *predicate, m_predicates) {
489  s += predicate->dump();
490  }
491  s += "</step>";
492  return s;
493 }
494 
495 DOMString Step::namespaceFromNodetest(const DOMString &nodeTest) const
496 {
497  int i = nodeTest.find(':');
498  if (i == -1) {
499  return DOMString();
500  }
501 
502  DOMString prefix(nodeTest.substring(0, i));
503  XPathNSResolverImpl *resolver = Expression::evaluationContext().resolver;
504 
505  DOM::DOMString ns;
506  if (resolver) {
507  ns = resolver->lookupNamespaceURI(prefix);
508  }
509 
510  if (ns.isNull()) {
511  Expression::reportNamespaceErr();
512  }
513 
514  return ns;
515 }
516 
517 unsigned int Step::primaryNodeType(AxisType axis) const
518 {
519  switch (axis) {
520  case AttributeAxis:
521  return Node::ATTRIBUTE_NODE;
522  case NamespaceAxis:
523  return Node::XPATH_NAMESPACE_NODE;
524  default:
525  return Node::ELEMENT_NODE;
526  }
527 }
528 
QString & append(QChar ch)
This file is part of the HTML rendering engine for KDE.
MESSAGECORE_EXPORT KMime::Content * next(KMime::Content *node, bool allowChildren=true)
This class implements the basic string we use in the DOM.
Definition: dom_string.h:44
This library provides a full-featured HTML parser and widget.
QString arg(qlonglong a, int fieldWidth, int base, QChar fillChar) const const
int length() const const
QChar * data()
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Mon Oct 25 2021 22:48:22 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.