Baloo

basicindexingjob.cpp
1 /*
2  This file is part of the KDE Baloo Project
3  SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6 */
7 
8 #include "basicindexingjob.h"
9 #include "termgenerator.h"
10 #include "idutils.h"
11 
12 #include <QStringList>
13 #include <QFile>
14 
15 #include <KFileMetaData/Types>
16 #include <KFileMetaData/UserMetaData>
17 
18 using namespace Baloo;
19 
20 BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype,
21  IndexingLevel level)
22  : m_filePath(filePath)
23  , m_mimetype(mimetype)
24  , m_indexingLevel(level)
25 {
26  if (m_filePath.endsWith(QLatin1Char('/'))) {
27  m_filePath.chop(1);
28  }
29 }
30 
31 namespace {
32 
33 void indexXAttr(const QString& url, Document& doc)
34 {
35  KFileMetaData::UserMetaData userMetaData(url);
36 
37  using Attribute = KFileMetaData::UserMetaData::Attribute;
38  auto attributes = userMetaData.queryAttributes(Attribute::Tags |
39  Attribute::Rating | Attribute::Comment);
40  if (attributes == Attribute::None) {
41  return;
42  }
43 
44  TermGenerator tg(doc);
45 
46  const QStringList tags = userMetaData.tags();
47  for (const QString& tag : tags) {
48  tg.indexXattrText(tag, QByteArray("TA"));
49  doc.addXattrTerm(QByteArray("TAG-") + tag.toUtf8());
50  }
51 
52  int rating = userMetaData.rating();
53  if (rating) {
54  doc.addXattrTerm(QByteArray("R") + QByteArray::number(rating));
55  }
56 
57  QString comment = userMetaData.userComment();
58  if (!comment.isEmpty()) {
59  tg.indexXattrText(comment, QByteArray("C"));
60  }
61 }
62 
63 QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType)
64 {
65  using namespace KFileMetaData;
67  types.reserve(2);
68 
69  // Basic types
70  if (mimeType.startsWith(QLatin1String("audio/"))) {
71  types << Type::Audio;
72  }
73  if (mimeType.startsWith(QLatin1String("video/"))) {
74  types << Type::Video;
75  }
76  if (mimeType.startsWith(QLatin1String("image/"))) {
77  types << Type::Image;
78  }
79  if (mimeType.startsWith(QLatin1String("text/"))) {
80  types << Type::Text;
81  }
82  if (mimeType.contains(QLatin1String("document"))) {
83  types << Type::Document;
84  }
85 
86  if (mimeType.contains(QLatin1String("powerpoint"))) {
87  types << Type::Presentation;
88  types << Type::Document;
89  }
90  if (mimeType.contains(QLatin1String("excel"))) {
91  types << Type::Spreadsheet;
92  types << Type::Document;
93  }
94  // Compressed tar archives: "application/x-<compression>-compressed-tar"
95  if ((mimeType.startsWith(QLatin1String("application/x-"))) &&
96  (mimeType.endsWith(QLatin1String("-compressed-tar")))) {
97  types << Type::Archive;
98  }
99 
100  static QMultiHash<QString, Type::Type> typeMapper {
101  {QStringLiteral("text/plain"), Type::Document},
102  // MS Office
103  {QStringLiteral("application/msword"), Type::Document},
104  {QStringLiteral("application/x-scribus"), Type::Document},
105  // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above:
106  // - application/vnd.ms-powerpoint
107  // - application/vnd.ms-excel
108  // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document
109  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
110  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
111  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
112  // - application/vnd.oasis.opendocument.text
113  // - application/vnd.oasis.opendocument.spreadsheet
114  // - application/vnd.oasis.opendocument.presentation
115  // Office 2007
116  {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation"), Type::Presentation},
117  {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow"), Type::Presentation},
118  {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template"), Type::Presentation},
119  {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), Type::Spreadsheet},
120  // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification
121  {QStringLiteral("application/vnd.oasis.opendocument.presentation"), Type::Presentation},
122  {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"), Type::Spreadsheet},
123  {QStringLiteral("application/pdf"), Type::Document},
124  {QStringLiteral("application/postscript"), Type::Document},
125  {QStringLiteral("application/x-dvi"), Type::Document},
126  {QStringLiteral("application/rtf"), Type::Document},
127  // EBooks
128  {QStringLiteral("application/epub+zip"), Type::Document},
129  {QStringLiteral("application/vnd.amazon.mobi8-ebook"), Type::Document},
130  {QStringLiteral("application/x-mobipocket-ebook"), Type::Document},
131  // Graphic EBooks
132  {QStringLiteral("application/vnd.comicbook-rar"), Type::Document},
133  {QStringLiteral("application/vnd.comicbook+zip"), Type::Document},
134  {QStringLiteral("application/x-cb7"), Type::Document},
135  {QStringLiteral("application/x-cbt"), Type::Document},
136  // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats
137  {QStringLiteral("application/gzip"), Type::Archive},
138  {QStringLiteral("application/x-tar"), Type::Archive},
139  {QStringLiteral("application/x-tarz"), Type::Archive},
140  {QStringLiteral("application/x-arc"), Type::Archive},
141  {QStringLiteral("application/x-archive"), Type::Archive},
142  {QStringLiteral("application/x-bzip"), Type::Archive},
143  {QStringLiteral("application/x-cpio"), Type::Archive},
144  {QStringLiteral("application/x-lha"), Type::Archive},
145  {QStringLiteral("application/x-lhz"), Type::Archive},
146  {QStringLiteral("application/x-lrzip"), Type::Archive},
147  {QStringLiteral("application/x-lz4"), Type::Archive},
148  {QStringLiteral("application/x-lzip"), Type::Archive},
149  {QStringLiteral("application/x-lzma"), Type::Archive},
150  {QStringLiteral("application/x-lzop"), Type::Archive},
151  {QStringLiteral("application/x-7z-compressed"), Type::Archive},
152  {QStringLiteral("application/x-ace"), Type::Archive},
153  {QStringLiteral("application/x-astrotite-afa"), Type::Archive},
154  {QStringLiteral("application/x-alz"), Type::Archive},
155  {QStringLiteral("application/vnd.android.package-archive"), Type::Archive},
156  {QStringLiteral("application/x-arj"), Type::Archive},
157  {QStringLiteral("application/vnd.ms-cab-compressed"), Type::Archive},
158  {QStringLiteral("application/x-cfs-compressed"), Type::Archive},
159  {QStringLiteral("application/x-dar"), Type::Archive},
160  {QStringLiteral("application/x-lzh"), Type::Archive},
161  {QStringLiteral("application/x-lzx"), Type::Archive},
162  {QStringLiteral("application/vnd.rar"), Type::Archive},
163  {QStringLiteral("application/x-stuffit"), Type::Archive},
164  {QStringLiteral("application/x-stuffitx"), Type::Archive},
165  {QStringLiteral("application/x-tzo"), Type::Archive},
166  {QStringLiteral("application/x-ustar"), Type::Archive},
167  {QStringLiteral("application/x-xar"), Type::Archive},
168  {QStringLiteral("application/x-xz"), Type::Archive},
169  {QStringLiteral("application/x-zoo"), Type::Archive},
170  {QStringLiteral("application/zip"), Type::Archive},
171  {QStringLiteral("application/zlib"), Type::Archive},
172  {QStringLiteral("application/zstd"), Type::Archive},
173  // WPS office
174  {QStringLiteral("application/wps-office.doc"), Type::Document},
175  {QStringLiteral("application/wps-office.xls"), Type::Document},
176  {QStringLiteral("application/wps-office.xls"), Type::Spreadsheet},
177  {QStringLiteral("application/wps-office.pot"), Type::Document},
178  {QStringLiteral("application/wps-office.pot"), Type::Presentation},
179  {QStringLiteral("application/wps-office.wps"), Type::Document},
180  {QStringLiteral("application/wps-office.docx"), Type::Document},
181  {QStringLiteral("application/wps-office.xlsx"), Type::Document},
182  {QStringLiteral("application/wps-office.xlsx"), Type::Spreadsheet},
183  {QStringLiteral("application/wps-office.pptx"), Type::Document},
184  {QStringLiteral("application/wps-office.pptx"), Type::Presentation},
185  // Other
186  {QStringLiteral("text/markdown"), Type::Document},
187  {QStringLiteral("image/vnd.djvu+multipage"), Type::Document},
188  {QStringLiteral("application/x-lyx"), Type::Document}
189  };
190 
191  auto hashIt = typeMapper.find(mimeType);
192  while (hashIt != typeMapper.end() && hashIt.key() == mimeType) {
193  types.append(hashIt.value());
194  ++hashIt;
195  }
196 
197  return types;
198 }
199 } // namespace
200 
201 BasicIndexingJob::~BasicIndexingJob()
202 {
203 }
204 
205 bool BasicIndexingJob::index()
206 {
207  const QByteArray url = QFile::encodeName(m_filePath);
208  auto lastSlash = url.lastIndexOf('/');
209 
210  const QByteArray fileName = url.mid(lastSlash + 1);
211  const QByteArray filePath = url.left(lastSlash);
212 
213  QT_STATBUF statBuf;
214  if (filePathToStat(filePath, statBuf) != 0) {
215  return false;
216  }
217 
218  Document doc;
219  doc.setParentId(statBufToId(statBuf));
220 
221  if (filePathToStat(url, statBuf) != 0) {
222  return false;
223  }
224  doc.setId(statBufToId(statBuf));
225  doc.setUrl(url);
226 
227  TermGenerator tg(doc);
228  tg.indexFileNameText(QFile::decodeName(fileName));
229  if (statBuf.st_size == 0) {
230  tg.indexText(QStringLiteral("application/x-zerosize"), QByteArray("M"));
231  } else {
232  tg.indexText(m_mimetype, QByteArray("M"));
233  }
234 
235  // (Content) Modification time, Metadata (e.g. XAttr) change time
236  doc.setMTime(statBuf.st_mtime);
237  doc.setCTime(statBuf.st_ctime);
238 
239  if (S_ISDIR(statBuf.st_mode)) {
240  static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder));
241  doc.addTerm(type);
242  // For folders we do not need to go through file indexing, so we do not set contentIndexing
243 
244  } else if (statBuf.st_size > 0) {
245  if (m_indexingLevel == MarkForContentIndexing) {
246  doc.setContentIndexing(true);
247  }
248  // Types
249  const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(m_mimetype);
250  for (KFileMetaData::Type::Type type : tList) {
251  QByteArray num = QByteArray::number(static_cast<int>(type));
252  doc.addTerm(QByteArray("T") + num);
253  }
254  }
255 
256  indexXAttr(m_filePath, doc);
257 
258  m_doc = doc;
259  return true;
260 }
void append(const T &value)
bool endsWith(const QString &s, Qt::CaseSensitivity cs) const const
Type type(const QSqlDatabase &db)
QByteArray encodeName(const QString &fileName)
QByteArray number(int n, int base)
QStringView level(QStringView ifopt)
void setContentIndexing(bool val)
This flag is used to signify if the file needs its contents to be indexed.
Definition: document.cpp:92
KCALUTILS_EXPORT QString mimeType()
quint64 statBufToId(const QT_STATBUF &stBuf)
Convert the QT_STATBUF into a 64 bit unique identifier for the file.
Definition: idutils.h:37
KIOCORE_EXPORT MimetypeJob * mimetype(const QUrl &url, JobFlags flags=DefaultFlags)
QStringList types(Mode mode=Writing)
int lastIndexOf(char ch, int from) const const
void reserve(int alloc)
Implements storage for docIds without any associated data Instantiated for:
Definition: coding.cpp:11
QByteArray mid(int pos, int len) const const
bool isEmpty() const const
bool startsWith(const QString &s, Qt::CaseSensitivity cs) const const
typename QHash< Key, T >::iterator find(const Key &key, const T &value)
QByteArray left(int len) const const
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
A document represents an indexed file to be stored in the Baloo engine.
Definition: document.h:30
QString decodeName(const QByteArray &localFileName)
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Wed Nov 29 2023 03:56:26 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.