7#include "extractordocumentnodefactory.h"
8#include "extractordocumentnode.h"
9#include "extractordocumentprocessor.h"
12#include "processors/binarydocumentprocessor.h"
13#include "processors/eradocumentprocessor.h"
14#include "processors/externalprocessor.h"
15#include "processors/htmldocumentprocessor.h"
16#include "processors/httpresponseprocessor.h"
17#include "processors/iatabcbpdocumentprocessor.h"
18#include "processors/icaldocumentprocessor.h"
19#include "processors/imagedocumentprocessor.h"
20#include "processors/jsonlddocumentprocessor.h"
21#include "processors/mimedocumentprocessor.h"
22#include "processors/pdfdocumentprocessor.h"
23#include "processors/pkpassdocumentprocessor.h"
24#include "processors/plistdocumentprocessor.h"
25#include "processors/textdocumentprocessor.h"
26#include "processors/uic9183documentprocessor.h"
27#include "processors/vdvdocumentprocessor.h"
30#include <QMimeDatabase>
36 MaxDocumentSize = 10000000,
40class ExtractorDocumentNodeFactoryStatic {
42 ExtractorDocumentNodeFactoryStatic();
44 void registerProcessor(std::unique_ptr<ExtractorDocumentProcessor> &&processor,
QStringView canonicalMimeType,
45 std::initializer_list<QStringView> aliasMimeTypes = {},
QStringView fallbackMimeType = {});
48 inline void registerProcessor(
QStringView canonicalMimeType, std::initializer_list<QStringView> aliasMimeTypes = {},
QStringView fallbackMimeType = {})
50 registerProcessor(std::make_unique<T>(), canonicalMimeType, aliasMimeTypes, fallbackMimeType);
53 void registerBuiltIn();
56 struct ProcessorData {
60 std::vector<ProcessorData> m_probeProcessors;
61 std::vector<ProcessorData> m_fallbackProbeProcessors;
62 std::vector<ProcessorData> m_mimetypeProcessorMap;
66 std::vector<std::unique_ptr<ExtractorDocumentProcessor>> processorPool;
71class ExtractorDocumentNodeFactoryPrivate {
73 ExtractorDocumentNodeFactoryStatic *s;
74 std::unique_ptr<ExtractorDocumentProcessor> interceptProcessor;
78ExtractorDocumentNodeFactoryStatic::ExtractorDocumentNodeFactoryStatic()
89 const auto it = std::lower_bound(procMap.begin(), procMap.end(), mimeType, [](
const auto &proc,
auto mt) {
90 return proc.mimeType < mt;
92 if (it != procMap.end() && (*it).mimeType == mimeType) {
93 qCWarning(
Log) <<
"Document processor already registered for mimetype:" <<
mimeType;
97 procMap.insert(it, {
mimeType.toString(), proc });
100void ExtractorDocumentNodeFactoryStatic::registerProcessor(std::unique_ptr<ExtractorDocumentProcessor> &&processor,
QStringView canonicalMimeType,
101 std::initializer_list<QStringView> aliasMimeTypes,
QStringView fallbackMimeType)
103 insertProcessor(processor.get(), canonicalMimeType, m_probeProcessors);
104 insertProcessor(processor.get(), canonicalMimeType, m_mimetypeProcessorMap);
105 for (
const auto mt : aliasMimeTypes) {
108 if (!fallbackMimeType.
isEmpty()) {
109 m_fallbackProbeProcessors.push_back({ fallbackMimeType.
toString(), processor.get() });
111 insertProcessor(processor.get(), fallbackMimeType, m_mimetypeProcessorMap);
112 processorPool.push_back(std::move(processor));
115void ExtractorDocumentNodeFactoryStatic::registerBuiltIn()
117 registerProcessor<PdfDocumentProcessor>(u
"application/pdf");
118 registerProcessor<PkPassDocumentProcessor>(u
"application/vnd.apple.pkpass");
119 registerProcessor<IcalEventProcessor>(u
"internal/event");
120 registerProcessor<ImageDocumentProcessor>(u
"internal/qimage", {u
"image/png", u
"image/jpeg", u
"image/gif"});
121 registerProcessor<ElbDocumentProcessor>(u
"internal/era-elb");
122 registerProcessor<SsbDocumentProcessor>(u
"internal/era-ssb");
123 registerProcessor<IataBcbpDocumentProcessor>(u
"internal/iata-bcbp");
124 registerProcessor<Uic9183DocumentProcessor>(u
"internal/uic9183");
125 registerProcessor<VdvDocumentProcessor>(u
"internal/vdv");
126 registerProcessor<IcalCalendarProcessor>(u
"text/calendar");
127 registerProcessor<PListDocumentProcessor>(u
"application/x-plist");
128 registerProcessor<HttpResponseProcessor>(u
"internal/http-response");
129 registerProcessor<HarDocumentProcessor>(u
"internal/har-archive");
133 registerProcessor<JsonLdDocumentProcessor>({}, {u
"application/json"}, u
"application/ld+json");
134 registerProcessor<MimeDocumentProcessor>({}, {u
"application/mbox"}, u
"message/rfc822");
135 registerProcessor<HtmlDocumentProcessor>({}, {u
"application/xhtml+xml"}, u
"text/html");
136 registerProcessor<TextDocumentProcessor>({}, {}, u
"text/plain");
137 registerProcessor<BinaryDocumentProcessor>({}, {}, u
"application/octet-stream");
143 if (it != m_aliasMap.
end()) {
150ExtractorDocumentNodeFactory::ExtractorDocumentNodeFactory()
151 : d(std::make_unique<ExtractorDocumentNodeFactoryPrivate>())
153 static ExtractorDocumentNodeFactoryStatic s_factory;
157ExtractorDocumentNodeFactory::~ExtractorDocumentNodeFactory() =
default;
161 if (data.
size() <= MinDocumentSize || data.
size() > MaxDocumentSize) {
165 if (d->interceptProcessor && d->interceptProcessor->canHandleData(data, fileName)) {
166 auto node = d->interceptProcessor->createNodeFromData(data);
167 if (node.mimeType().isEmpty()) {
168 node.setMimeType(QStringLiteral(
"internal/external-process"));
170 node.setProcessor(d->interceptProcessor.get());
182 mimeType = d->s->resolveAlias(autoDetectedMimeType);
185 for (
const auto &p : d->s->m_probeProcessors) {
186 if (p.processor->canHandleData(data, fileName) || (!mimeType.
isEmpty() && p.mimeType == mimeType)) {
187 auto node = p.processor->createNodeFromData(data);
188 if (node.content().isNull()) {
192 node.setMimeType(p.mimeType);
193 node.setProcessor(p.processor);
199 for (
const auto &p : d->s->m_fallbackProbeProcessors) {
200 if (p.processor->canHandleData(data, fileName)) {
201 auto node = p.processor->createNodeFromData(data);
202 if (node.content().isNull()) {
206 node.setMimeType(p.mimeType);
207 node.setProcessor(p.processor);
216 mimeType = d->s->resolveAlias(mimeType);
217 const auto it = std::lower_bound(d->s->m_mimetypeProcessorMap.begin(), d->s->m_mimetypeProcessorMap.end(), mimeType, [](
const auto &proc,
auto mt) {
218 return proc.mimeType < mt;
220 if (it == d->s->m_mimetypeProcessorMap.end() || (*it).mimeType != mimeType) {
221 qCDebug(
Log) <<
"No document processor found for mimetype" << mimeType;
225 auto node = (*it).processor->createNodeFromData(data);
226 node.setMimeType((*it).mimeType);
227 node.setProcessor((*it).processor);
233 mimeType = d->s->resolveAlias(mimeType);
234 const auto it = std::lower_bound(d->s->m_mimetypeProcessorMap.begin(), d->s->m_mimetypeProcessorMap.end(), mimeType, [](
const auto &proc,
auto mt) {
235 return proc.mimeType < mt;
237 if (it == d->s->m_mimetypeProcessorMap.end() || (*it).mimeType != mimeType) {
238 qCDebug(
Log) <<
"No document processor found for mimetype" << mimeType;
242 auto node = (*it).processor->createNodeFromContent(decodedData);
243 node.setMimeType((*it).mimeType);
244 node.setProcessor((*it).processor);
249 std::initializer_list<QStringView> aliasMimeTypes)
251 d->s->registerProcessor(std::move(processor), mimeType, aliasMimeTypes);
256 if (separateProcess && !d->interceptProcessor) {
257 d->interceptProcessor = std::make_unique<ExternalProcessor>();
258 }
else if (!separateProcess && d->interceptProcessor) {
259 d->interceptProcessor.reset();
KCALUTILS_EXPORT QString mimeType()
Classes for reservation/travel data models, data extraction and data augmentation.
qsizetype size() const const
iterator find(const Key &key)
iterator insert(const Key &key, const T &value)
QMimeType mimeTypeForData(QIODevice *device) const const
QMimeType mimeTypeForFileNameAndData(const QString &fileName, QIODevice *device) const const
bool isEmpty() const const
bool isEmpty() const const
QString toString() const const