Baloo

basicindexingjob.cpp
1/*
2 This file is part of the KDE Baloo Project
3 SPDX-FileCopyrightText: 2013-2015 Vishesh Handa <me@vhanda.in>
4
5 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
6*/
7
8#include "basicindexingjob.h"
9#include "termgenerator.h"
10#include "idutils.h"
11
12#include <QStringList>
13#include <QFile>
14
15#include <KFileMetaData/Types>
16#include <KFileMetaData/UserMetaData>
17
18using namespace Baloo;
19
20BasicIndexingJob::BasicIndexingJob(const QString& filePath, const QString& mimetype,
21 IndexingLevel level)
22 : m_filePath(filePath)
23 , m_mimetype(mimetype)
24 , m_indexingLevel(level)
25{
26 if (m_filePath.endsWith(QLatin1Char('/'))) {
27 m_filePath.chop(1);
28 }
29}
30
31namespace {
32
33void indexXAttr(const QString& url, Document& doc)
34{
35 KFileMetaData::UserMetaData userMetaData(url);
36
38 auto attributes = userMetaData.queryAttributes(Attribute::Tags |
39 Attribute::Rating | Attribute::Comment);
40 if (attributes == Attribute::None) {
41 return;
42 }
43
44 TermGenerator tg(doc);
45
46 const QStringList tags = userMetaData.tags();
47 for (const QString& tag : tags) {
48 tg.indexXattrText(tag, QByteArray("TA"));
49 doc.addXattrTerm(QByteArray("TAG-") + tag.toUtf8());
50 }
51
52 int rating = userMetaData.rating();
53 if (rating) {
54 doc.addXattrTerm(QByteArray("R") + QByteArray::number(rating));
55 }
56
57 QString comment = userMetaData.userComment();
58 if (!comment.isEmpty()) {
59 tg.indexXattrText(comment, QByteArray("C"));
60 }
61}
62
63QVector<KFileMetaData::Type::Type> typesForMimeType(const QString& mimeType)
64{
65 using namespace KFileMetaData;
67 types.reserve(2);
68
69 // Basic types
70 if (mimeType.startsWith(QLatin1String("audio/"))) {
71 types << Type::Audio;
72 }
73 if (mimeType.startsWith(QLatin1String("video/"))) {
74 types << Type::Video;
75 }
76 if (mimeType.startsWith(QLatin1String("image/"))) {
77 types << Type::Image;
78 }
79 if (mimeType.startsWith(QLatin1String("text/"))) {
80 types << Type::Text;
81 }
82 if (mimeType.contains(QLatin1String("document"))) {
83 types << Type::Document;
84 }
85
86 if (mimeType.contains(QLatin1String("powerpoint"))) {
87 types << Type::Presentation;
88 types << Type::Document;
89 }
90 if (mimeType.contains(QLatin1String("excel"))) {
91 types << Type::Spreadsheet;
92 types << Type::Document;
93 }
94 // Compressed tar archives: "application/x-<compression>-compressed-tar"
95 if ((mimeType.startsWith(QLatin1String("application/x-"))) &&
96 (mimeType.endsWith(QLatin1String("-compressed-tar")))) {
97 types << Type::Archive;
98 }
99
100 static QMultiHash<QString, Type::Type> typeMapper {
101 {QStringLiteral("text/plain"), Type::Document},
102 // MS Office
103 {QStringLiteral("application/msword"), Type::Document},
104 {QStringLiteral("application/x-scribus"), Type::Document},
105 // The old pre-XML MS Office formats are already covered by the excel/powerpoint "contains" above:
106 // - application/vnd.ms-powerpoint
107 // - application/vnd.ms-excel
108 // "openxmlformats-officedocument" and "opendocument" contain "document", i.e. already have Type::Document
109 // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
110 // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
111 // - application/vnd.openxmlformats-officedocument.presentationml.presentation
112 // - application/vnd.oasis.opendocument.text
113 // - application/vnd.oasis.opendocument.spreadsheet
114 // - application/vnd.oasis.opendocument.presentation
115 // Office 2007
116 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.presentation"), Type::Presentation},
117 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.slideshow"), Type::Presentation},
118 {QStringLiteral("application/vnd.openxmlformats-officedocument.presentationml.template"), Type::Presentation},
119 {QStringLiteral("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), Type::Spreadsheet},
120 // Open Document Formats - https://en.wikipedia.org/wiki/OpenDocument_technical_specification
121 {QStringLiteral("application/vnd.oasis.opendocument.presentation"), Type::Presentation},
122 {QStringLiteral("application/vnd.oasis.opendocument.spreadsheet"), Type::Spreadsheet},
123 {QStringLiteral("application/pdf"), Type::Document},
124 {QStringLiteral("application/postscript"), Type::Document},
125 {QStringLiteral("application/x-dvi"), Type::Document},
126 {QStringLiteral("application/rtf"), Type::Document},
127 // EBooks
128 {QStringLiteral("application/epub+zip"), Type::Document},
129 {QStringLiteral("application/vnd.amazon.mobi8-ebook"), Type::Document},
130 {QStringLiteral("application/x-mobipocket-ebook"), Type::Document},
131 // Graphic EBooks
132 {QStringLiteral("application/vnd.comicbook-rar"), Type::Document},
133 {QStringLiteral("application/vnd.comicbook+zip"), Type::Document},
134 {QStringLiteral("application/x-cb7"), Type::Document},
135 {QStringLiteral("application/x-cbt"), Type::Document},
136 // Archives - https://en.wikipedia.org/wiki/List_of_archive_formats
137 {QStringLiteral("application/gzip"), Type::Archive},
138 {QStringLiteral("application/x-tar"), Type::Archive},
139 {QStringLiteral("application/x-tarz"), Type::Archive},
140 {QStringLiteral("application/x-arc"), Type::Archive},
141 {QStringLiteral("application/x-archive"), Type::Archive},
142 {QStringLiteral("application/x-bzip"), Type::Archive},
143 {QStringLiteral("application/x-cpio"), Type::Archive},
144 {QStringLiteral("application/x-lha"), Type::Archive},
145 {QStringLiteral("application/x-lhz"), Type::Archive},
146 {QStringLiteral("application/x-lrzip"), Type::Archive},
147 {QStringLiteral("application/x-lz4"), Type::Archive},
148 {QStringLiteral("application/x-lzip"), Type::Archive},
149 {QStringLiteral("application/x-lzma"), Type::Archive},
150 {QStringLiteral("application/x-lzop"), Type::Archive},
151 {QStringLiteral("application/x-7z-compressed"), Type::Archive},
152 {QStringLiteral("application/x-ace"), Type::Archive},
153 {QStringLiteral("application/x-astrotite-afa"), Type::Archive},
154 {QStringLiteral("application/x-alz"), Type::Archive},
155 {QStringLiteral("application/vnd.android.package-archive"), Type::Archive},
156 {QStringLiteral("application/x-arj"), Type::Archive},
157 {QStringLiteral("application/vnd.ms-cab-compressed"), Type::Archive},
158 {QStringLiteral("application/x-cfs-compressed"), Type::Archive},
159 {QStringLiteral("application/x-dar"), Type::Archive},
160 {QStringLiteral("application/x-lzh"), Type::Archive},
161 {QStringLiteral("application/x-lzx"), Type::Archive},
162 {QStringLiteral("application/vnd.rar"), Type::Archive},
163 {QStringLiteral("application/x-stuffit"), Type::Archive},
164 {QStringLiteral("application/x-stuffitx"), Type::Archive},
165 {QStringLiteral("application/x-tzo"), Type::Archive},
166 {QStringLiteral("application/x-ustar"), Type::Archive},
167 {QStringLiteral("application/x-xar"), Type::Archive},
168 {QStringLiteral("application/x-xz"), Type::Archive},
169 {QStringLiteral("application/x-zoo"), Type::Archive},
170 {QStringLiteral("application/zip"), Type::Archive},
171 {QStringLiteral("application/zlib"), Type::Archive},
172 {QStringLiteral("application/zstd"), Type::Archive},
173 // WPS office
174 {QStringLiteral("application/wps-office.doc"), Type::Document},
175 {QStringLiteral("application/wps-office.xls"), Type::Document},
176 {QStringLiteral("application/wps-office.xls"), Type::Spreadsheet},
177 {QStringLiteral("application/wps-office.pot"), Type::Document},
178 {QStringLiteral("application/wps-office.pot"), Type::Presentation},
179 {QStringLiteral("application/wps-office.wps"), Type::Document},
180 {QStringLiteral("application/wps-office.docx"), Type::Document},
181 {QStringLiteral("application/wps-office.xlsx"), Type::Document},
182 {QStringLiteral("application/wps-office.xlsx"), Type::Spreadsheet},
183 {QStringLiteral("application/wps-office.pptx"), Type::Document},
184 {QStringLiteral("application/wps-office.pptx"), Type::Presentation},
185 // Other
186 {QStringLiteral("text/markdown"), Type::Document},
187 {QStringLiteral("image/vnd.djvu+multipage"), Type::Document},
188 {QStringLiteral("application/x-lyx"), Type::Document}
189 };
190
191 auto hashIt = typeMapper.find(mimeType);
192 while (hashIt != typeMapper.end() && hashIt.key() == mimeType) {
193 types.append(hashIt.value());
194 ++hashIt;
195 }
196
197 return types;
198}
199} // namespace
200
201BasicIndexingJob::~BasicIndexingJob()
202{
203}
204
205bool BasicIndexingJob::index()
206{
207 const QByteArray url = QFile::encodeName(m_filePath);
208 auto lastSlash = url.lastIndexOf('/');
209
210 const QByteArray fileName = url.mid(lastSlash + 1);
211 const QByteArray filePath = url.left(lastSlash);
212
213 QT_STATBUF statBuf;
214 if (filePathToStat(filePath, statBuf) != 0) {
215 return false;
216 }
217
218 Document doc;
219 doc.setParentId(statBufToId(statBuf));
220
221 if (filePathToStat(url, statBuf) != 0) {
222 return false;
223 }
224 doc.setId(statBufToId(statBuf));
225 doc.setUrl(url);
226
227 TermGenerator tg(doc);
228 tg.indexFileNameText(QFile::decodeName(fileName));
229 if (statBuf.st_size == 0) {
230 tg.indexText(QStringLiteral("application/x-zerosize"), QByteArray("M"));
231 } else {
232 tg.indexText(m_mimetype, QByteArray("M"));
233 }
234
235 // (Content) Modification time, Metadata (e.g. XAttr) change time
236 doc.setMTime(statBuf.st_mtime);
237 doc.setCTime(statBuf.st_ctime);
238
239 if (S_ISDIR(statBuf.st_mode)) {
240 static const QByteArray type = QByteArray("T") + QByteArray::number(static_cast<int>(KFileMetaData::Type::Folder));
241 doc.addTerm(type);
242 // For folders we do not need to go through file indexing, so we do not set contentIndexing
243
244 } else if (statBuf.st_size > 0) {
245 if (m_indexingLevel == MarkForContentIndexing) {
246 doc.setContentIndexing(true);
247 }
248 // Types
249 const QVector<KFileMetaData::Type::Type> tList = typesForMimeType(m_mimetype);
250 for (KFileMetaData::Type::Type type : tList) {
251 QByteArray num = QByteArray::number(static_cast<int>(type));
252 doc.addTerm(QByteArray("T") + num);
253 }
254 }
255
256 indexXAttr(m_filePath, doc);
257
258 m_doc = doc;
259 return true;
260}
Type type(const QSqlDatabase &db)
Implements storage for docIds without any associated data Instantiated for:
Definition coding.cpp:11
quint64 statBufToId(const QT_STATBUF &stBuf)
Convert the QT_STATBUF into a 64 bit unique identifier for the file.
Definition idutils.h:37
KCALUTILS_EXPORT QString mimeType()
KIOCORE_EXPORT MimetypeJob * mimetype(const QUrl &url, JobFlags flags=DefaultFlags)
QStringView level(QStringView ifopt)
qsizetype lastIndexOf(QByteArrayView bv) const const
QByteArray left(qsizetype len) const const
QByteArray mid(qsizetype pos, qsizetype len) const const
QByteArray number(double n, char format, int precision)
QString decodeName(const QByteArray &localFileName)
QByteArray encodeName(const QString &fileName)
void append(QList< T > &&value)
void reserve(qsizetype size)
iterator find(const Key &key, const T &value)
bool contains(QChar ch, Qt::CaseSensitivity cs) const const
bool endsWith(QChar c, Qt::CaseSensitivity cs) const const
bool isEmpty() const const
bool startsWith(QChar c, Qt::CaseSensitivity cs) const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:51:40 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.