KFileMetaData

plaintextextractor.cpp
1 /*
2  SPDX-FileCopyrightText: 2012 Vishesh Handa <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 
7 
8 #include "plaintextextractor.h"
9 
10 #include <QFile>
11 #include <QTextCodec>
12 #include <QDebug>
13 
14 #include <fstream>
15 
16 #if defined(Q_OS_LINUX) || defined(__GLIBC__)
17  #include <sys/types.h>
18  #include <sys/stat.h>
19  #include <fcntl.h>
20  #include <unistd.h>
21 #endif
22 
23 using namespace KFileMetaData;
24 
25 PlainTextExtractor::PlainTextExtractor(QObject* parent)
26  : ExtractorPlugin(parent)
27 {
28 
29 }
30 
31 const QStringList supportedMimeTypes = {
32  QStringLiteral("text/plain"),
33 };
34 
35 QStringList PlainTextExtractor::mimetypes() const
36 {
37  return supportedMimeTypes;
38 }
39 
40 void PlainTextExtractor::extract(ExtractionResult* result)
41 {
42 #if defined(Q_OS_LINUX) || defined(__GLIBC__)
43  QByteArray filePath = QFile::encodeName(result->inputUrl());
44 
45 #ifdef O_NOATIME
46  int fd = open(filePath.constData(), O_RDONLY | O_NOATIME);
47  if (fd < 0)
48 #else
49  int fd;
50 #endif
51  {
52  fd = open(filePath.constData(), O_RDONLY);
53  }
54 
55  if (fd < 0) {
56  return;
57  }
58 
59  result->addType(Type::Text);
60  if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
61  close(fd);
62  return;
63  }
64 
66 
67  char* line = nullptr;
68  size_t len = 0;
69  int lines = 0;
70  int r = 0;
71 
72  FILE* fp = fdopen(fd, "r");
73 
74  while ( (r = getline(&line, &len, fp)) != -1) {
76  QString text = codec->toUnicode(line, r - 1, &state);
77 
78  if (state.invalidChars > 0) {
79  qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
80  free(line);
81  close(fd);
82  return;
83  }
84  result->append(text);
85 
86  lines += 1;
87  }
88  if (result->inputFlags() & ExtractionResult::ExtractMetaData) {
89  result->add(Property::LineCount, lines);
90  }
91 
92  free(line);
93  close(fd);
94 
95 #else
96  std::string line;
97  int lines = 0;
98 
99  std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
100  if (!fstream.is_open()) {
101  return;
102  }
103 
104  result->addType(Type::Text);
105  if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
106  return;
107  }
108 
110  while (std::getline(fstream, line)) {
111  QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
112 
114  QString text = codec->toUnicode(arr.constData(), arr.size(), &state);
115 
116  if (state.invalidChars > 0) {
117  qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
118  return;
119  }
120  result->append(text);
121 
122  lines += 1;
123  }
124 
125  result->add(Property::LineCount, lines);
126 #endif
127 }
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
The ExtractorPlugin is the base class for all file metadata extractors.
QByteArray fromRawData(const char *data, int size)
const QList< QKeySequence > & close()
QTextCodec * codecForLocale()
virtual void addType(Type::Type type)=0
This function is called by the plugins.
const char * constData() const const
KIOCORE_EXPORT FileJob * open(const QUrl &url, QIODevice::OpenMode mode)
int size() const const
The ExtractionResult class is where all the data extracted by the indexer is saved.
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
QString toUnicode(const QByteArray &a) const const
QByteArray encodeName(const QString &fileName)
QString inputUrl() const
The input url which the plugins will use to locate the file.
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sat Jul 4 2020 22:54:24 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.