KFileMetaData

plaintextextractor.cpp
1 /*
2  SPDX-FileCopyrightText: 2012 Vishesh Handa <[email protected]>
3 
4  SPDX-License-Identifier: LGPL-2.1-or-later
5 */
6 
7 
8 #include "plaintextextractor.h"
9 
10 #include <QFile>
11 #include <QTextCodec>
12 #include <QDebug>
13 
14 #include <fstream>
15 
16 #if defined(Q_OS_LINUX) || defined(__GLIBC__)
17  #include <sys/types.h>
18  #include <sys/stat.h>
19  #include <fcntl.h>
20  #include <unistd.h>
21 #endif
22 
23 using namespace KFileMetaData;
24 
25 PlainTextExtractor::PlainTextExtractor(QObject* parent)
26  : ExtractorPlugin(parent)
27 {
28 
29 }
30 
31 const QStringList supportedMimeTypes = {
32  QStringLiteral("text/plain"),
33 };
34 
35 QStringList PlainTextExtractor::mimetypes() const
36 {
37  return supportedMimeTypes;
38 }
39 
40 void PlainTextExtractor::extract(ExtractionResult* result)
41 {
42 #if defined(Q_OS_LINUX) || defined(__GLIBC__)
43  QByteArray filePath = QFile::encodeName(result->inputUrl());
44 
45 #ifdef O_NOATIME
46  int fd = open(filePath.constData(), O_RDONLY | O_NOATIME);
47  if (fd < 0)
48 #else
49  int fd;
50 #endif
51  {
52  fd = open(filePath.constData(), O_RDONLY);
53  }
54 
55  if (fd < 0) {
56  return;
57  }
58 
59  result->addType(Type::Text);
60  if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
61  close(fd);
62  return;
63  }
64 
66 
67  char* line = nullptr;
68  size_t len = 0;
69  int lines = 0;
70  int r = 0;
71 
72  FILE* fp = fdopen(fd, "r");
73 
74  while ( (r = getline(&line, &len, fp)) != -1) {
76  QString text = codec->toUnicode(line, r - 1, &state);
77 
78  if (state.invalidChars > 0) {
79  qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
80  free(line);
81  close(fd);
82  return;
83  }
84  result->append(text);
85 
86  lines += 1;
87  }
88  if (result->inputFlags() & ExtractionResult::ExtractMetaData) {
89  result->add(Property::LineCount, lines);
90  }
91 
92  free(line);
93  close(fd);
94 
95 #else
96  std::string line;
97  int lines = 0;
98 
99  std::ifstream fstream(QFile::encodeName(result->inputUrl()).constData());
100  if (!fstream.is_open()) {
101  return;
102  }
103 
104  result->addType(Type::Text);
105  if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
106  return;
107  }
108 
110  while (std::getline(fstream, line)) {
111  QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
112 
114  QString text = codec->toUnicode(arr.constData(), arr.size(), &state);
115 
116  if (state.invalidChars > 0) {
117  qDebug() << "Invalid encoding. Ignoring" << result->inputUrl();
118  return;
119  }
120  result->append(text);
121 
122  lines += 1;
123  }
124 
125  result->add(Property::LineCount, lines);
126 #endif
127 }
virtual void addType(Type::Type type)=0
This function is called by the plugins.
The ExtractionResult class is where all the data extracted by the indexer is saved....
QByteArray fromRawData(const char *data, int size)
QAction * open(const QObject *recvr, const char *slot, QObject *parent)
QByteArray encodeName(const QString &fileName)
QString inputUrl() const
The input url which the plugins will use to locate the file.
QTextCodec * codecForLocale()
Flags inputFlags() const
The flags which the extraction plugin should considering following when extracting metadata from the ...
QAction * close(const QObject *recvr, const char *slot, QObject *parent)
virtual void append(const QString &text)=0
This function is called by plugins when they wish for some plain text to be indexed without any prope...
const char * constData() const const
virtual void add(Property::Property property, const QVariant &value)=0
This function is called by the plugins when they wish to add a key value pair which should be indexed...
The ExtractorPlugin is the base class for all file metadata extractors. It is responsible for extract...
int size() const const
QString toUnicode(const QByteArray &a) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2022 The KDE developers.
Generated on Fri May 27 2022 03:47:54 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.