KFileMetaData

plaintextextractor.cpp
1/*
2 SPDX-FileCopyrightText: 2012 Vishesh Handa <me@vhanda.in>
3
4 SPDX-License-Identifier: LGPL-2.1-or-later
5*/
6
7
8#include "plaintextextractor.h"
9
10#include <QDebug>
11#include <QStringDecoder>
12#include <QFile>
13
14#include <fstream>
15
16#if defined(Q_OS_LINUX) || defined(__GLIBC__)
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <fcntl.h>
20 #include <unistd.h>
21#endif
22
23using namespace KFileMetaData;
24
25PlainTextExtractor::PlainTextExtractor(QObject* parent)
26 : ExtractorPlugin(parent)
27{
28
29}
30
31const QStringList supportedMimeTypes = {
32 QStringLiteral("text/plain"),
33};
34
35QStringList PlainTextExtractor::mimetypes() const
36{
37 return supportedMimeTypes;
38}
39
40void PlainTextExtractor::extract(ExtractionResult* result)
41{
42#if defined(Q_OS_LINUX) || defined(__GLIBC__)
43 QByteArray filePath = QFile::encodeName(result->inputUrl());
44
45#ifdef O_NOATIME
46 int fd = open(filePath.constData(), O_RDONLY | O_NOATIME);
47 if (fd < 0)
48#else
49 int fd;
50#endif
51 {
52 fd = open(filePath.constData(), O_RDONLY);
53 }
54
55 if (fd < 0) {
56 return;
57 }
58
59 result->addType(Type::Text);
60 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
61 close(fd);
62 return;
63 }
64
65 QStringDecoder codec(QStringConverter::System);
66
67 char* line = nullptr;
68 size_t len = 0;
69 int lines = 0;
70 int r = 0;
71
72 FILE* fp = fdopen(fd, "r");
73
74 while ( (r = getline(&line, &len, fp)) != -1) {
75 QString text = codec.decode(QByteArrayView(line, r - 1));
76
77 if (codec.hasError()) {
78 qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
79 free(line);
80 close(fd);
81 return;
82 }
83 result->append(text);
84
85 lines += 1;
86 }
87 if (result->inputFlags() & ExtractionResult::ExtractMetaData) {
88 result->add(Property::LineCount, lines);
89 }
90
91 free(line);
92 close(fd);
93
94#else
95 std::string line;
96 int lines = 0;
97
98 std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
99 if (!fstream.is_open()) {
100 return;
101 }
102
103 result->addType(Type::Text);
104 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
105 return;
106 }
107
108 QStringDecoder codec(QStringConverter::System);
109 while (std::getline(fstream, line)) {
110 QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
111
112 QString text = codec.decode(arr);
113
114 if (codec.hasError()) {
115 qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
116 return;
117 }
118 result->append(text);
119
120 lines += 1;
121 }
122
123 result->add(Property::LineCount, lines);
124#endif
125}
126
127#include "moc_plaintextextractor.cpp"
The ExtractionResult class is where all the data extracted by the indexer is saved.
QString inputUrl() const
The input url which the plugins will use to locate the file.
virtual void addType(Type::Type type)=0
This function is called by the plugins.
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
The ExtractorPlugin is the base class for all file metadata extractors.
const QList< QKeySequence > & close()
const QList< QKeySequence > & open()
const char * constData() const const
QByteArray fromRawData(const char *data, int size)
QByteArray encodeName(const QString &fileName)
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Sun Feb 25 2024 18:44:24 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.