Baloo

writetransaction.cpp
1 /*
2  This file is part of the KDE Baloo project.
3  SPDX-FileCopyrightText: 2015 Vishesh Handa <[email protected]>
4 
5  SPDX-License-Identifier: LGPL-2.1-or-later
6 */
7 
8 #include "writetransaction.h"
9 #include "transaction.h"
10 
11 #include "postingdb.h"
12 #include "documentdb.h"
13 #include "documentiddb.h"
14 #include "positiondb.h"
15 #include "documenttimedb.h"
16 #include "documentdatadb.h"
17 #include "mtimedb.h"
18 #include "idutils.h"
19 
20 using namespace Baloo;
21 
22 void WriteTransaction::addDocument(const Document& doc)
23 {
24  quint64 id = doc.id();
25 
26  DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
27  DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
28  DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
29  DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn);
30  DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn);
31  DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn);
32  MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn);
33  DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
34 
35  Q_ASSERT(!documentTermsDB.contains(id));
36  Q_ASSERT(!documentXattrTermsDB.contains(id));
37  Q_ASSERT(!documentFileNameTermsDB.contains(id));
38  Q_ASSERT(!docTimeDB.contains(id));
39  Q_ASSERT(!docDataDB.contains(id));
40  Q_ASSERT(!contentIndexingDB.contains(id));
41  Q_ASSERT(doc.parentId());
42 
43  {
44  auto url = doc.url();
45  int pos = url.lastIndexOf('/');
46  auto filename = url.mid(pos + 1);
47  auto parentId = doc.parentId();
48 
49  if (pos > 0) {
50  docUrlDB.addPath(url.left(pos));
51  } else {
52  parentId = 0;
53  }
54 
55  if (!docUrlDB.put(id, parentId, filename)) {
56  return;
57  }
58  }
59 
60  QVector<QByteArray> docTerms = addTerms(id, doc.m_terms);
61  Q_ASSERT(!docTerms.empty());
62  documentTermsDB.put(id, docTerms);
63 
64  QVector<QByteArray> docXattrTerms = addTerms(id, doc.m_xattrTerms);
65  if (!docXattrTerms.isEmpty()) {
66  documentXattrTermsDB.put(id, docXattrTerms);
67  }
68 
69  QVector<QByteArray> docFileNameTerms = addTerms(id, doc.m_fileNameTerms);
70  if (!docFileNameTerms.isEmpty()) {
71  documentFileNameTermsDB.put(id, docFileNameTerms);
72  }
73 
74  if (doc.contentIndexing()) {
75  contentIndexingDB.put(doc.id());
76  }
77 
78  DocumentTimeDB::TimeInfo info;
79  info.mTime = doc.m_mTime;
80  info.cTime = doc.m_cTime;
81 
82  docTimeDB.put(id, info);
83  mtimeDB.put(doc.m_mTime, id);
84 
85  if (!doc.m_data.isEmpty()) {
86  docDataDB.put(id, doc.m_data);
87  }
88 }
89 
90 QVector<QByteArray> WriteTransaction::addTerms(quint64 id, const QMap<QByteArray, Document::TermData>& terms)
91 {
92  QVector<QByteArray> termList;
93  termList.reserve(terms.size());
94  m_pendingOperations.reserve(m_pendingOperations.size() + terms.size());
95 
96  for (auto it = terms.cbegin(), end = terms.cend(); it != end; ++it) {
97  const QByteArray& term = it.key();
98  termList.append(term);
99 
100  Operation op;
101  op.type = AddId;
102  op.data.docId = id;
103  op.data.positions = it.value().positions;
104 
105  m_pendingOperations[term].append(op);
106  }
107 
108  return termList;
109 }
110 
111 void WriteTransaction::removeDocument(quint64 id)
112 {
113  DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
114  DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
115  DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
116  DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn);
117  DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn);
118  DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn);
119  DocumentIdDB failedIndexingDB(m_dbis.failedIdDbi, m_txn);
120  MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn);
121  DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
122 
123  removeTerms(id, documentTermsDB.get(id));
124  removeTerms(id, documentFileNameTermsDB.get(id));
125  if (documentXattrTermsDB.contains(id)) {
126  removeTerms(id, documentXattrTermsDB.get(id));
127  }
128 
129  documentTermsDB.del(id);
130  documentXattrTermsDB.del(id);
131  documentFileNameTermsDB.del(id);
132 
133  docUrlDB.del(id);
134 
135  contentIndexingDB.del(id);
136  failedIndexingDB.del(id);
137 
138  DocumentTimeDB::TimeInfo info = docTimeDB.get(id);
139  docTimeDB.del(id);
140  mtimeDB.del(info.mTime, id);
141 
142  docDataDB.del(id);
143 }
144 
145 void WriteTransaction::removeTerms(quint64 id, const QVector<QByteArray>& terms)
146 {
147  for (const QByteArray& term : terms) {
148  Operation op;
149  op.type = RemoveId;
150  op.data.docId = id;
151 
152  m_pendingOperations[term].append(op);
153  }
154 }
155 
156 void WriteTransaction::removeRecursively(quint64 parentId)
157 {
158  DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
159 
160  const QVector<quint64> children = docUrlDB.getChildren(parentId);
161  for (quint64 id : children) {
162  if (id) {
163  removeRecursively(id);
164  }
165  }
166  removeDocument(parentId);
167 }
168 
169 bool WriteTransaction::removeRecursively(quint64 parentId, const std::function<bool(quint64)> &shouldDelete)
170 {
171  DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
172 
173  if (parentId && !shouldDelete(parentId)) {
174  return false;
175  }
176 
177  bool isEmpty = true;
178  const QVector<quint64> children = docUrlDB.getChildren(parentId);
179  for (quint64 id : children) {
180  isEmpty &= removeRecursively(id, shouldDelete);
181  }
182  // refetch
183  if (isEmpty && docUrlDB.getChildren(parentId).isEmpty()) {
184  removeDocument(parentId);
185  return true;
186  }
187  return false;
188 }
189 
190 void WriteTransaction::replaceDocument(const Document& doc, DocumentOperations operations)
191 {
192  DocumentDB documentTermsDB(m_dbis.docTermsDbi, m_txn);
193  DocumentDB documentXattrTermsDB(m_dbis.docXattrTermsDbi, m_txn);
194  DocumentDB documentFileNameTermsDB(m_dbis.docFilenameTermsDbi, m_txn);
195  DocumentTimeDB docTimeDB(m_dbis.docTimeDbi, m_txn);
196  DocumentDataDB docDataDB(m_dbis.docDataDbi, m_txn);
197  DocumentIdDB contentIndexingDB(m_dbis.contentIndexingDbi, m_txn);
198  MTimeDB mtimeDB(m_dbis.mtimeDbi, m_txn);
199  DocumentUrlDB docUrlDB(m_dbis.idTreeDbi, m_dbis.idFilenameDbi, m_txn);
200 
201  const quint64 id = doc.id();
202 
203  if (operations & DocumentTerms) {
204  Q_ASSERT(!doc.m_terms.isEmpty());
205  QVector<QByteArray> prevTerms = documentTermsDB.get(id);
206  QVector<QByteArray> docTerms = replaceTerms(id, prevTerms, doc.m_terms);
207 
208  if (docTerms != prevTerms) {
209  documentTermsDB.put(id, docTerms);
210  }
211  }
212 
213  if (operations & XAttrTerms) {
214  QVector<QByteArray> prevTerms = documentXattrTermsDB.get(id);
215  QVector<QByteArray> docXattrTerms = replaceTerms(id, prevTerms, doc.m_xattrTerms);
216 
217  if (docXattrTerms != prevTerms) {
218  if (!docXattrTerms.isEmpty()) {
219  documentXattrTermsDB.put(id, docXattrTerms);
220  } else {
221  documentXattrTermsDB.del(id);
222  }
223  }
224  }
225 
226  if (operations & FileNameTerms) {
227  QVector<QByteArray> prevTerms = documentFileNameTermsDB.get(id);
228  QVector<QByteArray> docFileNameTerms = replaceTerms(id, prevTerms, doc.m_fileNameTerms);
229 
230  if (docFileNameTerms != prevTerms) {
231  if (!docFileNameTerms.isEmpty()) {
232  documentFileNameTermsDB.put(id, docFileNameTerms);
233  } else {
234  documentFileNameTermsDB.del(id);
235  }
236  }
237  }
238 
239  if (doc.contentIndexing()) {
240  contentIndexingDB.put(doc.id());
241  }
242 
243  if (operations & DocumentTime) {
244  DocumentTimeDB::TimeInfo info = docTimeDB.get(id);
245  if (info.mTime != doc.m_mTime) {
246  mtimeDB.del(info.mTime, id);
247  mtimeDB.put(doc.m_mTime, id);
248  }
249 
250  info.mTime = doc.m_mTime;
251  info.cTime = doc.m_cTime;
252  docTimeDB.put(id, info);
253  }
254 
255  if (operations & DocumentData) {
256  if (!doc.m_data.isEmpty()) {
257  docDataDB.put(id, doc.m_data);
258  } else {
259  docDataDB.del(id);
260  }
261  }
262 
263  if (operations & DocumentUrl) {
264  auto url = doc.url();
265  int pos = url.lastIndexOf('/');
266  auto newname = url.mid(pos + 1);
267  docUrlDB.updateUrl(doc.id(), doc.parentId(), newname);
268  }
269 }
270 
271 QVector< QByteArray > WriteTransaction::replaceTerms(quint64 id, const QVector<QByteArray>& prevTerms,
273 {
274  m_pendingOperations.reserve(m_pendingOperations.size() + prevTerms.size() + terms.size());
275  for (const QByteArray& term : prevTerms) {
276  Operation op;
277  op.type = RemoveId;
278  op.data.docId = id;
279 
280  m_pendingOperations[term].append(op);
281  }
282 
283  return addTerms(id, terms);
284 }
285 
286 void WriteTransaction::commit()
287 {
288  PostingDB postingDB(m_dbis.postingDbi, m_txn);
289  PositionDB positionDB(m_dbis.positionDBi, m_txn);
290 
291  QHashIterator<QByteArray, QVector<Operation> > iter(m_pendingOperations);
292  while (iter.hasNext()) {
293  iter.next();
294 
295  const QByteArray& term = iter.key();
296  const QVector<Operation> operations = iter.value();
297 
298  PostingList list = postingDB.get(term);
299 
300  bool fetchedPositionList = false;
301  QVector<PositionInfo> positionList;
302 
303  for (const Operation& op : operations) {
304  quint64 id = op.data.docId;
305 
306  if (op.type == AddId) {
307  sortedIdInsert(list, id);
308 
309  if (!op.data.positions.isEmpty()) {
310  if (!fetchedPositionList) {
311  positionList = positionDB.get(term);
312  fetchedPositionList = true;
313  }
314  sortedIdInsert(positionList, op.data);
315  }
316  }
317  else {
318  sortedIdRemove(list, id);
319  if (!fetchedPositionList) {
320  positionList = positionDB.get(term);
321  fetchedPositionList = true;
322  }
323  sortedIdRemove(positionList, PositionInfo(id));
324  }
325  }
326 
327  if (!list.isEmpty()) {
328  postingDB.put(term, list);
329  } else {
330  postingDB.del(term);
331  }
332 
333  if (fetchedPositionList) {
334  if (!positionList.isEmpty()) {
335  positionDB.put(term, positionList);
336  } else {
337  positionDB.del(term);
338  }
339  }
340  }
341 
342  m_pendingOperations.clear();
343 }
bool isEmpty() const const
int size() const const
void append(const T &value)
KIOFILEWIDGETS_EXPORT QStringList list(const QString &fileClass)
T * data()
QMap::const_iterator cbegin() const const
int lastIndexOf(char ch, int from) const const
T value(int i) const const
The MTime DB maps the file mtime to its id.
Definition: mtimedb.h:24
QMap::const_iterator cend() const const
Implements storage for docIds without any associated data Instantiated for:
Definition: coding.cpp:11
bool isEmpty() const const
void reserve(int size)
Implements storage for a set of s for the given docId Instantiated for:
Definition: documentdb.h:25
bool isEmpty() const const
void sortedIdInsert(T &vec, const V &id)
Definition: idutils.h:98
int size() const const
The PostingDB is the main database that maps -> <id1> <id2> <id2> ...
Definition: postingdb.h:27
A document represents an indexed file to be stored in the Baloo engine.
Definition: document.h:30
bool empty() const const
bool isEmpty() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon Dec 11 2023 03:53:57 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.