KItinerary

htmldocument.h
1/*
2 SPDX-FileCopyrightText: 2018 Volker Krause <vkrause@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5*/
6
7#pragma once
8
9#include "kitinerary_export.h"
10
11#include <QObject>
12
13#include <memory>
14
15struct _xmlNode;
16
17namespace KItinerary {
18
19class HtmlDocument;
20class HtmlDocumentPrivate;
21
22/** HTML document element. */
23class KITINERARY_EXPORT HtmlElement
24{
25 Q_GADGET
26 Q_PROPERTY(bool isNull READ isNull)
27 Q_PROPERTY(QString name READ name)
28 Q_PROPERTY(KItinerary::HtmlElement parent READ parent)
29 Q_PROPERTY(KItinerary::HtmlElement firstChild READ firstChild)
30 Q_PROPERTY(KItinerary::HtmlElement nextSibling READ nextSibling)
31 Q_PROPERTY(QString content READ content)
32 Q_PROPERTY(QString recursiveContent READ recursiveContent)
33public:
36
37 /** Check if the element is null. */
38 bool isNull() const;
39 /** The element name. */
40 QString name() const;
41 /** Value of the attribute @p attr. */
42 Q_INVOKABLE QString attribute(const QString &attr) const;
43 /** Returns the parent element of this node. */
44 HtmlElement parent() const;
45 /** Returns the first child element of this node. */
46 HtmlElement firstChild() const;
47 /** Returns the next sibling element of this node. */
48 HtmlElement nextSibling() const;
49 /** Returns the content of this element.
50 * That is, all text nodes that are immediate children of this element.
51 * The content is trimmed from leading or trailing whitespaces.
52 */
53 QString content() const;
54 /** Returns the content of this element and all its children. */
55 QString recursiveContent() const;
56 /** Checks whether an attribute with name @p attr exists. */
57 bool hasAttribute(const QString &attr) const;
58 /** Returns the list of all attributes of this node. */
59 QStringList attributes() const;
60
61 /** Evaluate an XPath expression relative to this node. */
62 Q_INVOKABLE QVariant eval(const QString &xpath) const;
63
64 /** Checks if two HtmlElement instances refer to the same DOM node. */
65 bool operator==(const HtmlElement &other) const;
66
67private:
68 friend class HtmlDocument;
69 HtmlElement(_xmlNode *dd);
70 _xmlNode *d;
71};
72
73/** HTML document for extraction.
74 * This is used as input for ExtractorEngine and the JS extractor scripts.
75 * @note This class is only functional if libxml is available as a dependency,
76 * otherwise all methods return empty values.
77 */
78class KITINERARY_EXPORT HtmlDocument : public QObject
79{
80 Q_OBJECT
81 Q_PROPERTY(KItinerary::HtmlElement root READ root)
82 Q_PROPERTY(QString rawData READ rawData CONSTANT)
83public:
85
86 /** Creates a HtmlDocument from the given raw data.
87 * @returns @c nullptr if loading fails or libxml was not found.
88 */
89 static HtmlDocument* fromData(const QByteArray &data, QObject *parent = nullptr);
90 /** Creates a HtmlDocument from a given (unicode) string.
91 * @returns @c nullptr if loading fails or libxml was not found.
92 */
93 static HtmlDocument* fromString(const QString &data, QObject *parent = nullptr);
94
95 /** Returns the root element of the document. */
96 HtmlElement root() const;
97
98 /** Returns the raw textual HTML data. */
99 QString rawData() const;
100
101 /** Evaluate an XPath expression relative to the document root. */
102 Q_INVOKABLE QVariant eval(const QString &xpath) const;
103
104private:
105 explicit HtmlDocument(QObject *parent = nullptr);
106 std::unique_ptr<HtmlDocumentPrivate> d;
107};
108
109}
110
111Q_DECLARE_METATYPE(KItinerary::HtmlElement)
112
HTML document for extraction.
HTML document element.
Classes for reservation/travel data models, data extraction and data augmentation.
Definition berelement.h:17
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:50:00 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.