Baloo

databasesanitizer.cpp
1 /*
2  This file is part of the KDE Baloo project.
3  SPDX-FileCopyrightText: 2018 Michael Heidelbach <[email protected]>
4 
5  SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL
6 */
7 
8 #include "databasesanitizer.h"
9 #include "documenturldb.h"
10 #include "idutils.h"
11 
12 #include <sys/sysmacros.h>
13 
14 #include <KLocalizedString>
15 #include <QFileInfo>
16 #include <QStorageInfo>
17 #include <QDebug>
18 
19 namespace Baloo
20 {
21 
22 class DatabaseSanitizerImpl {
23 public:
24  DatabaseSanitizerImpl(const Database& db, Transaction::TransactionType type)
25  : m_transaction(new Transaction(db, type))
26  {
27  }
28 
29 public:
30 
31  /**
32  * \brief Basic info about database items
33  */
34  struct FileInfo {
35  quint32 deviceId = 0;
36  quint32 inode = 0;
37  quint64 id = 0;
38  bool isSymLink = false;
39  bool accessible = true;
40  QString url;
41  };
42 
43  void printProgress(QTextStream& out, uint& cur, const uint max, const uint step) const
44  {
45  if (cur % step == 0) {
46  out << QStringLiteral("%1%2\r").arg(100 * cur / max, 6).arg("%", -16);
47  out.flush();
48  }
49  cur++;
50  }
51 
52  /**
53  * Summary of createList() actions
54  */
55  struct Summary {
56  quint64 total = 0; ///Count of all files
57  quint64 ignored = 0; ///Count of filtered out files
58  quint64 accessible = 0; ///Count of checked and accessible files
59  };
60  /**
61  * Create a list of \a FileInfo items.
62  *
63  * \p deviceIDs filter by device ids. If the vector is empty no filtering is done
64  * and every item is collected.
65  * Positive numbers are including filters collecting only the mentioned device ids.
66  * Negative numbers are excluding filters collecting everything but the mentioned device ids.
67  *
68  * \p accessFilter Flags to filter items by accessibility.
69  *
70  * \p urlFilter Filter result urls. Default is null = Collect everything.
71  */
72  QPair<QVector<FileInfo>, Summary> createList(
73  const QVector<qint64>& deviceIds,
74  const DatabaseSanitizer::ItemAccessFilters accessFilter,
75  const QSharedPointer<QRegularExpression>& urlFilter
76  ) const
77  {
78  Q_ASSERT(m_transaction);
79 
80  const auto docUrlDb = DocumentUrlDB(m_transaction->m_dbis.idTreeDbi,
81  m_transaction->m_dbis.idFilenameDbi,
82  m_transaction->m_txn);
83  const auto map = docUrlDb.toTestMap();
84  const auto keys = map.keys();
85  QVector<FileInfo> result;
86  uint max = map.size();
87  uint i = 0;
88  result.reserve(max);
89  QVector<quint32> includeIds;
90  QVector<quint32> excludeIds;
91  for (qint64 deviceId : deviceIds) {
92  if (deviceId > 0) {
93  includeIds.append(deviceId);
94  } else if (deviceId < 0) {
95  excludeIds.append(-deviceId);
96  }
97  }
98  Summary summary;
99  summary.total = max;
100  summary.ignored = max;
101  QTextStream err(stderr);
102 
103  for (auto it = map.constBegin(), end = map.constEnd(); it != end; it++) {
104  printProgress(err, i, max, 100);
105  const quint64 id = it.key();
106  const quint32 deviceId = idToDeviceId(id);
107  if (!includeIds.isEmpty() && !includeIds.contains(deviceId)) {
108  continue;
109  } else if (excludeIds.contains(deviceId)) {
110  continue;
111  } else if (urlFilter && !urlFilter->match(it.value()).hasMatch()) {
112  continue;
113  }
114 
115  FileInfo info;
116  info.deviceId = deviceId;
117  info.inode = idToInode(id);
118  info.url = QFile::decodeName(it.value());
119  info.id = id;
120  QFileInfo fileInfo(info.url);
121  info.accessible = !info.url.isEmpty() && fileInfo.exists();
122 
123  if (info.accessible && (accessFilter & DatabaseSanitizer::IgnoreAvailable)) {
124  continue;
125  } else if (!info.accessible && (accessFilter & DatabaseSanitizer::IgnoreUnavailable)) {
126  continue;
127  }
128 
129  info.isSymLink = fileInfo.isSymLink();
130 
131  result.append(info);
132  summary.ignored--;
133  if (info.accessible) {
134  summary.accessible++;
135  }
136  }
137  return {result, summary};
138  }
139 
140  QStorageInfo getStorageInfo(const quint32 id) {
141  static QMap<quint32, QStorageInfo> storageInfos = []() {
143  const auto volumes = QStorageInfo::mountedVolumes();
144  for (const auto& vol : volumes) {
145  const QByteArray rootPath = QFile::encodeName(vol.rootPath());
146  const auto id = filePathToId(rootPath);
147  const quint32 deviceId = idToDeviceId(id);
148  // qDebug() << vol;
149  result[deviceId] = vol;
150  }
151  return result;
152  }();
153 
154  QStorageInfo info = storageInfos.value(id);
155  return info;
156  }
157 
158  QMap<quint32, bool> deviceFilters(QVector<FileInfo>& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter)
159  {
160  QMap<quint32, bool> result;
161  for (const auto& info : infos) {
162  result[info.deviceId] = false;
163  }
164 
165  for (auto it = result.begin(), end = result.end(); it != end; it++) {
166  const auto storageInfo = getStorageInfo(it.key());
167  it.value() = isIgnored(storageInfo, accessFilter);
168  }
169  return result;
170  }
171 
172  bool isIgnored(const QStorageInfo& storageInfo, const DatabaseSanitizer::ItemAccessFilters accessFilter)
173  {
174  const bool mounted = storageInfo.isValid();
175  if (mounted && (accessFilter & DatabaseSanitizer::IgnoreMounted)) {
176  return true;
177  } else if (!mounted && (accessFilter & DatabaseSanitizer::IgnoreUnmounted)) {
178  return true;
179  }
180 
181  if (storageInfo.fileSystemType() == QLatin1String("tmpfs")) {
182  // Due to the volatility of device ids, an id known by baloo may
183  // appear as mounted, but is not what baloo expects.
184  // For example at indexing time 43 was the id of a smb share, but
185  // at runtime 43 is the id of /run/media/<uid> when other users are
186  // logged in. The latter have a type of 'tmpfs' and should be ignored.
187  return true;
188  }
189 
190  return false;
191  }
192 
193  void removeDocument(const quint64 id) {
194  m_transaction->removeDocument(id);
195  }
196 
197  void commit() {
198  m_transaction->commit();
199  }
200 
201  void abort() {
202  m_transaction->abort();
203  }
204 
205 private:
206  Transaction* m_transaction;
207 };
208 }
209 
210 using namespace Baloo;
211 
212 DatabaseSanitizer::DatabaseSanitizer(const Database& db, Baloo::Transaction::TransactionType type)
213  : m_pimpl(new DatabaseSanitizerImpl(db, type))
214 {
215 }
216 
217 DatabaseSanitizer::DatabaseSanitizer(Database* db, Transaction::TransactionType type)
218  : DatabaseSanitizer(*db, type)
219 {
220 }
221 
222 DatabaseSanitizer::~DatabaseSanitizer()
223 {
224  delete m_pimpl;
225  m_pimpl = nullptr;
226 }
227 
228 /**
229 * Create a list of \a FileInfo items and print it to stdout.
230 *
231 * \p deviceIDs filter by device ids. If the vector is empty no filtering is done
232 * and everything is printed.
233 * Positive numbers are including filters printing only the mentioned device ids.
234 * Negative numbers are excluding filters printing everything but the mentioned device ids.
235 *
236 * \p missingOnly Simulate purging operation. Only inaccessible items are printed.
237 *
238 * \p urlFilter Filter result urls. Default is null = Print everything.
239 */
241  const QVector<qint64>& deviceIds,
242  const ItemAccessFilters accessFilter,
243  const QSharedPointer<QRegularExpression>& urlFilter)
244 {
245  auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter);
246  const auto sep = QLatin1Char(' ');
247  QTextStream out(stdout);
248  QTextStream err(stderr);
249  for (const auto& info: listResult.first) {
250  out << QStringLiteral("%1").arg(info.accessible ? "+" : "!")
251  << sep << QStringLiteral("device: %1").arg(info.deviceId)
252  << sep << QStringLiteral("inode: %1").arg(info.inode)
253  << sep << QStringLiteral("url: %1").arg(info.url)
254  << endl;
255  }
256 
257  const auto& summary = listResult.second;
258  if (accessFilter & IgnoreAvailable) {
259  err << i18n("Total: %1, Inaccessible: %2",
260  summary.total,
261  summary.total - (summary.ignored + summary.accessible)) << endl;
262  } else {
263  err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4",
264  summary.total,
265  summary.ignored,
266  summary.accessible,
267  summary.total - (summary.ignored + summary.accessible)) << endl;
268  }
269 }
270 
271 void DatabaseSanitizer::printDevices(const QVector<qint64>& deviceIds, const ItemAccessFilters accessFilter)
272 {
273  auto infos = m_pimpl->createList(deviceIds, accessFilter, nullptr);
274 
275  QMap<quint32, quint64> useCount;
276  for (const auto& info : infos.first) {
277  useCount[info.deviceId]++;
278  }
279 
280  const auto sep = QLatin1Char(' ');
281  QTextStream out(stdout);
282  QTextStream err(stderr);
283  int matchCount = 0;
284  for (auto it = useCount.cbegin(); it != useCount.cend(); it++) {
285  auto id = it.key();
286  auto info = m_pimpl->getStorageInfo(id);
287  auto mounted = info.isValid();
288  if (info.fileSystemType() == QLatin1String("tmpfs")) {
289  continue;
290  } else if (mounted && (accessFilter & IgnoreMounted)) {
291  continue;
292  } else if (!mounted && (accessFilter & IgnoreUnmounted)) {
293  continue;
294  }
295  matchCount++;
296  // TODO coloring would be nice, but "...|grep '^!'" does not work with it.
297  // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!")
298  // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263
299  out << QStringLiteral("%1").arg(mounted ? "+" : "!")
300  << sep << QStringLiteral("device:%1").arg(id)
301  << sep << QStringLiteral("[%1:%2]")
302  .arg(major(id), 4, 16, QLatin1Char('0'))
303  .arg(minor(id), 4, 16, QLatin1Char('0'))
304  << sep << QStringLiteral("indexed-items:%1").arg(it.value());
305 
306  if (mounted) {
307  out
308  << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData())
309  << sep << QStringLiteral("device:%1").arg(info.device().constData())
310  << sep << QStringLiteral("path:%1").arg(info.rootPath())
311  ;
312  }
313  // TODO: see above
314  // out << QStringLiteral("\033[0m") << endl;
315  out << endl;
316  }
317 
318  err << i18n("Found %1 matching in %2 devices", matchCount, useCount.size()) << endl;
319 }
320 
321 void DatabaseSanitizer::removeStaleEntries(const QVector<qint64>& deviceIds,
322  const DatabaseSanitizer::ItemAccessFilters accessFilter,
323  const bool dryRun,
324  const QSharedPointer<QRegularExpression>& urlFilter)
325 {
326  auto listResult = m_pimpl->createList(deviceIds, IgnoreAvailable, urlFilter);
327 
328  const auto ignoredDevices = m_pimpl->deviceFilters(listResult.first, accessFilter);
329 
330  const auto sep = QLatin1Char(' ');
331  auto& summary = listResult.second;
332  QTextStream out(stdout);
333  QTextStream err(stderr);
334  for (const auto& info: listResult.first) {
335  if (ignoredDevices[info.deviceId] == true) {
336  summary.ignored++;
337  } else {
338  if (info.isSymLink) {
339  out << i18n("IgnoredSymbolicLink:");
340  summary.ignored++;
341  } else {
342  m_pimpl->removeDocument(info.id);
343  out << i18n("Removing:");
344  }
345  out << sep << QStringLiteral("device: %1").arg(info.deviceId)
346  << sep << QStringLiteral("inode: %1").arg(info.inode)
347  << sep << QStringLiteral("url: %1").arg(info.url)
348  << endl;
349  }
350  }
351  if (dryRun) {
352  m_pimpl->abort();
353  } else {
354  m_pimpl->commit();
355  }
356  Q_ASSERT(summary.accessible == 0);
357  err << i18nc("numbers", "Removed: %1, Total: %2, Ignored: %3",
358  summary.total - summary.ignored,
359  summary.total,
360  summary.ignored)
361  << endl;
362 }
bool isEmpty() const const
QTextStream & endl(QTextStream &stream)
int size() const const
Type type(const QSqlDatabase &db)
const T value(const Key &key, const T &defaultValue) const const
QByteArray encodeName(const QString &fileName)
QMap::iterator begin()
void append(const T &value)
Provide methods to show database problems and sanitize them.
QMap::const_iterator cbegin() const const
void flush()
QMap::iterator end()
QByteArray fileSystemType() const const
QString i18n(const char *text, const TYPE &arg...)
QMap::const_iterator cend() const const
Implements storage for docIds without any associated data Instantiated for:
Definition: coding.cpp:11
void printList(const QVector< qint64 > &deviceIds, const ItemAccessFilters accessFilter=IgnoreNone, const QSharedPointer< QRegularExpression > &urlFilter=nullptr)
Print database content to stdout.
void printDevices(const QVector< qint64 > &deviceIds, const ItemAccessFilters accessFilter=IgnoreNone)
Print info about known devices to stdout.
bool contains(const T &value) const const
bool isValid() const const
QList< QStorageInfo > mountedVolumes()
void reserve(int size)
QByteArray device() const const
const char * constData() const const
QByteArray toPercentEncoding(const QByteArray &exclude, const QByteArray &include, char percent) const const
Q_SCRIPTABLE Q_NOREPLY void abort()
QString i18nc(const char *context, const char *text, const TYPE &arg...)
QString rootPath() const const
QFuture< void > map(Sequence &sequence, MapFunctor function)
QString decodeName(const QByteArray &localFileName)
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Wed Nov 29 2023 03:56:26 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.