Baloo

databasesanitizer.cpp
1/*
2 This file is part of the KDE Baloo project.
3 SPDX-FileCopyrightText: 2018 Michael Heidelbach <ottwolt@gmail.com>
4
5 SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
6*/
7
8#include "databasesanitizer.h"
9#include "documenturldb.h"
10#include "idutils.h"
11
12#include <sys/sysmacros.h>
13
14#include <KLocalizedString>
15#include <QFileInfo>
16#include <QStorageInfo>
17#include <QDebug>
18
19namespace Baloo
20{
21
22class DatabaseSanitizerImpl {
23public:
24 DatabaseSanitizerImpl(const Database& db, Transaction::TransactionType type)
25 : m_transaction(new Transaction(db, type))
26 {
27 }
28
29public:
30
31 /**
32 * \brief Basic info about database items
33 */
34 struct FileInfo {
35 quint32 deviceId = 0;
36 quint32 inode = 0;
37 quint64 id = 0;
38 bool isSymLink = false;
39 bool accessible = true;
40 QString url;
41 };
42
43 void printProgress(QTextStream& out, uint& cur, const uint max, const uint step) const
44 {
45 if (cur % step == 0) {
46 out << QStringLiteral("%1%2\r").arg(100 * cur / max, 6).arg("%", -16);
47 out.flush();
48 }
49 cur++;
50 }
51
52 /**
53 * Summary of createList() actions
54 */
55 struct Summary {
56 quint64 total = 0; ///Count of all files
57 quint64 ignored = 0; ///Count of filtered out files
58 quint64 accessible = 0; ///Count of checked and accessible files
59 };
60 /**
61 * Create a list of \a FileInfo items.
62 *
63 * \p deviceIDs filter by device ids. If the vector is empty no filtering is done
64 * and every item is collected.
65 * Positive numbers are including filters collecting only the mentioned device ids.
66 * Negative numbers are excluding filters collecting everything but the mentioned device ids.
67 *
68 * \p accessFilter Flags to filter items by accessibility.
69 *
70 * \p urlFilter Filter result urls. Default is null = Collect everything.
71 */
72 QPair<QVector<FileInfo>, Summary> createList(
73 const QVector<qint64>& deviceIds,
74 const DatabaseSanitizer::ItemAccessFilters accessFilter,
76 ) const
77 {
78 Q_ASSERT(m_transaction);
79
80 const auto docUrlDb = DocumentUrlDB(m_transaction->m_dbis.idTreeDbi,
81 m_transaction->m_dbis.idFilenameDbi,
82 m_transaction->m_txn);
83 const auto map = docUrlDb.toTestMap();
84 const auto keys = map.keys();
85 QVector<FileInfo> result;
86 uint max = map.size();
87 uint i = 0;
88 result.reserve(max);
89 QVector<quint32> includeIds;
90 QVector<quint32> excludeIds;
91 for (qint64 deviceId : deviceIds) {
92 if (deviceId > 0) {
93 includeIds.append(deviceId);
94 } else if (deviceId < 0) {
95 excludeIds.append(-deviceId);
96 }
97 }
98 Summary summary;
99 summary.total = max;
100 summary.ignored = max;
101 QTextStream err(stderr);
102
103 for (auto it = map.constBegin(), end = map.constEnd(); it != end; it++) {
104 printProgress(err, i, max, 100);
105 const quint64 id = it.key();
106 const quint32 deviceId = idToDeviceId(id);
107 if (!includeIds.isEmpty() && !includeIds.contains(deviceId)) {
108 continue;
109 } else if (excludeIds.contains(deviceId)) {
110 continue;
111 } else if (urlFilter && !urlFilter->match(it.value()).hasMatch()) {
112 continue;
113 }
114
115 FileInfo info;
116 info.deviceId = deviceId;
117 info.inode = idToInode(id);
118 info.url = QFile::decodeName(it.value());
119 info.id = id;
120 QFileInfo fileInfo(info.url);
121 info.accessible = !info.url.isEmpty() && fileInfo.exists();
122
123 if (info.accessible && (accessFilter & DatabaseSanitizer::IgnoreAvailable)) {
124 continue;
125 } else if (!info.accessible && (accessFilter & DatabaseSanitizer::IgnoreUnavailable)) {
126 continue;
127 }
128
129 info.isSymLink = fileInfo.isSymLink();
130
131 result.append(info);
132 summary.ignored--;
133 if (info.accessible) {
134 summary.accessible++;
135 }
136 }
137 return {result, summary};
138 }
139
140 QStorageInfo getStorageInfo(const quint32 id) {
141 static QMap<quint32, QStorageInfo> storageInfos = []() {
143 const auto volumes = QStorageInfo::mountedVolumes();
144 for (const auto& vol : volumes) {
145 const QByteArray rootPath = QFile::encodeName(vol.rootPath());
146 const auto id = filePathToId(rootPath);
147 const quint32 deviceId = idToDeviceId(id);
148 // qDebug() << vol;
149 result[deviceId] = vol;
150 }
151 return result;
152 }();
153
154 QStorageInfo info = storageInfos.value(id);
155 return info;
156 }
157
158 QMap<quint32, bool> deviceFilters(QVector<FileInfo>& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter)
159 {
160 QMap<quint32, bool> result;
161 for (const auto& info : infos) {
162 result[info.deviceId] = false;
163 }
164
165 for (auto it = result.begin(), end = result.end(); it != end; it++) {
166 const auto storageInfo = getStorageInfo(it.key());
167 it.value() = isIgnored(storageInfo, accessFilter);
168 }
169 return result;
170 }
171
172 bool isIgnored(const QStorageInfo& storageInfo, const DatabaseSanitizer::ItemAccessFilters accessFilter)
173 {
174 const bool mounted = storageInfo.isValid();
175 if (mounted && (accessFilter & DatabaseSanitizer::IgnoreMounted)) {
176 return true;
177 } else if (!mounted && (accessFilter & DatabaseSanitizer::IgnoreUnmounted)) {
178 return true;
179 }
180
181 if (storageInfo.fileSystemType() == QLatin1String("tmpfs")) {
182 // Due to the volatility of device ids, an id known by baloo may
183 // appear as mounted, but is not what baloo expects.
184 // For example at indexing time 43 was the id of a smb share, but
185 // at runtime 43 is the id of /run/media/<uid> when other users are
186 // logged in. The latter have a type of 'tmpfs' and should be ignored.
187 return true;
188 }
189
190 return false;
191 }
192
193 void removeDocument(const quint64 id) {
194 m_transaction->removeDocument(id);
195 }
196
197 void commit() {
198 m_transaction->commit();
199 }
200
201 void abort() {
202 m_transaction->abort();
203 }
204
205private:
206 Transaction* m_transaction;
207};
208}
209
210using namespace Baloo;
211
212DatabaseSanitizer::DatabaseSanitizer(const Database& db, Baloo::Transaction::TransactionType type)
213 : m_pimpl(new DatabaseSanitizerImpl(db, type))
214{
215}
216
217DatabaseSanitizer::DatabaseSanitizer(Database* db, Transaction::TransactionType type)
218 : DatabaseSanitizer(*db, type)
219{
220}
221
222DatabaseSanitizer::~DatabaseSanitizer()
223{
224 delete m_pimpl;
225 m_pimpl = nullptr;
226}
227
228/**
229* Create a list of \a FileInfo items and print it to stdout.
230*
231* \p deviceIDs filter by device ids. If the vector is empty no filtering is done
232* and everything is printed.
233* Positive numbers are including filters printing only the mentioned device ids.
234* Negative numbers are excluding filters printing everything but the mentioned device ids.
235*
236* \p missingOnly Simulate purging operation. Only inaccessible items are printed.
237*
238* \p urlFilter Filter result urls. Default is null = Print everything.
239*/
241 const QVector<qint64>& deviceIds,
242 const ItemAccessFilters accessFilter,
243 const QSharedPointer<QRegularExpression>& urlFilter)
244{
245 auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter);
246 const auto sep = QLatin1Char(' ');
247 QTextStream out(stdout);
248 QTextStream err(stderr);
249 for (const auto& info: listResult.first) {
250 out << QStringLiteral("%1").arg(info.accessible ? "+" : "!")
251 << sep << QStringLiteral("device: %1").arg(info.deviceId)
252 << sep << QStringLiteral("inode: %1").arg(info.inode)
253 << sep << QStringLiteral("url: %1").arg(info.url)
254 << endl;
255 }
256
257 const auto& summary = listResult.second;
258 if (accessFilter & IgnoreAvailable) {
259 err << i18n("Total: %1, Inaccessible: %2",
260 summary.total,
261 summary.total - (summary.ignored + summary.accessible)) << endl;
262 } else {
263 err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4",
264 summary.total,
265 summary.ignored,
266 summary.accessible,
267 summary.total - (summary.ignored + summary.accessible)) << endl;
268 }
269}
270
271void DatabaseSanitizer::printDevices(const QVector<qint64>& deviceIds, const ItemAccessFilters accessFilter)
272{
273 auto infos = m_pimpl->createList(deviceIds, accessFilter, nullptr);
274
275 QMap<quint32, quint64> useCount;
276 for (const auto& info : infos.first) {
277 useCount[info.deviceId]++;
278 }
279
280 const auto sep = QLatin1Char(' ');
281 QTextStream out(stdout);
282 QTextStream err(stderr);
283 int matchCount = 0;
284 for (auto it = useCount.cbegin(); it != useCount.cend(); it++) {
285 auto id = it.key();
286 auto info = m_pimpl->getStorageInfo(id);
287 auto mounted = info.isValid();
288 if (info.fileSystemType() == QLatin1String("tmpfs")) {
289 continue;
290 } else if (mounted && (accessFilter & IgnoreMounted)) {
291 continue;
292 } else if (!mounted && (accessFilter & IgnoreUnmounted)) {
293 continue;
294 }
295 matchCount++;
296 // TODO coloring would be nice, but "...|grep '^!'" does not work with it.
297 // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!")
298 // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263
299 out << QStringLiteral("%1").arg(mounted ? "+" : "!")
300 << sep << QStringLiteral("device:%1").arg(id)
301 << sep << QStringLiteral("[%1:%2]")
302 .arg(major(id), 4, 16, QLatin1Char('0'))
303 .arg(minor(id), 4, 16, QLatin1Char('0'))
304 << sep << QStringLiteral("indexed-items:%1").arg(it.value());
305
306 if (mounted) {
307 out
308 << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData())
309 << sep << QStringLiteral("device:%1").arg(info.device().constData())
310 << sep << QStringLiteral("path:%1").arg(info.rootPath())
311 ;
312 }
313 // TODO: see above
314 // out << QStringLiteral("\033[0m") << endl;
315 out << endl;
316 }
317
318 err << i18n("Found %1 matching in %2 devices", matchCount, useCount.size()) << endl;
319}
320
321void DatabaseSanitizer::removeStaleEntries(const QVector<qint64>& deviceIds,
322 const DatabaseSanitizer::ItemAccessFilters accessFilter,
323 const bool dryRun,
324 const QSharedPointer<QRegularExpression>& urlFilter)
325{
326 auto listResult = m_pimpl->createList(deviceIds, IgnoreAvailable, urlFilter);
327
328 const auto ignoredDevices = m_pimpl->deviceFilters(listResult.first, accessFilter);
329
330 const auto sep = QLatin1Char(' ');
331 auto& summary = listResult.second;
332 QTextStream out(stdout);
333 QTextStream err(stderr);
334 for (const auto& info: listResult.first) {
335 if (ignoredDevices[info.deviceId] == true) {
336 summary.ignored++;
337 } else {
338 if (info.isSymLink) {
339 out << i18n("IgnoredSymbolicLink:");
340 summary.ignored++;
341 } else {
342 m_pimpl->removeDocument(info.id);
343 out << i18n("Removing:");
344 }
345 out << sep << QStringLiteral("device: %1").arg(info.deviceId)
346 << sep << QStringLiteral("inode: %1").arg(info.inode)
347 << sep << QStringLiteral("url: %1").arg(info.url)
348 << endl;
349 }
350 }
351 if (dryRun) {
352 m_pimpl->abort();
353 } else {
354 m_pimpl->commit();
355 }
356 Q_ASSERT(summary.accessible == 0);
357 err << i18nc("numbers", "Removed: %1, Total: %2, Ignored: %3",
358 summary.total - summary.ignored,
359 summary.total,
360 summary.ignored)
361 << endl;
362}
Provide methods to show database problems and sanitize them.
void printDevices(const QVector< qint64 > &deviceIds, const ItemAccessFilters accessFilter=IgnoreNone)
Print info about known devices to stdout.
void printList(const QVector< qint64 > &deviceIds, const ItemAccessFilters accessFilter=IgnoreNone, const QSharedPointer< QRegularExpression > &urlFilter=nullptr)
Print database content to stdout.
QString i18nc(const char *context, const char *text, const TYPE &arg...)
QString i18n(const char *text, const TYPE &arg...)
Type type(const QSqlDatabase &db)
Implements storage for docIds without any associated data Instantiated for:
Definition coding.cpp:11
const char * constData() const const
QByteArray toPercentEncoding(const QByteArray &exclude, const QByteArray &include, char percent) const const
QString decodeName(const QByteArray &localFileName)
QByteArray encodeName(const QString &fileName)
void append(QList< T > &&value)
bool contains(const AT &value) const const
bool isEmpty() const const
void reserve(qsizetype size)
iterator begin()
const_iterator cbegin() const const
const_iterator cend() const const
iterator end()
size_type size() const const
T value(const Key &key, const T &defaultValue) const const
QByteArray device() const const
QByteArray fileSystemType() const const
bool isValid() const const
QList< QStorageInfo > mountedVolumes()
QString rootPath() const const
QTextStream & endl(QTextStream &stream)
QFuture< void > map(Iterator begin, Iterator end, MapFunctor &&function)
void flush()
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:51:40 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.