• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdesdk API Reference
  • KDE Home
  • Contact Us
 

lokalize

  • sources
  • kde-4.14
  • kdesdk
  • lokalize
  • src
  • common
diff.cpp
Go to the documentation of this file.
1 /* **************************************************************************
2  This file is part of Lokalize
3 
4  wordDiff algorithm adoption and further refinement:
5  2007 (C) Nick Shaforostoff <shafff@ukr.net>
6  (based on Markus Stengel's GPL implementation of LCS-Delta algorithm as it is described in "Introduction to Algorithms", MIT Press, 2001, Second Edition, written by Thomas H. Cormen et. al. It uses dynamic programming to solve the Longest Common Subsequence (LCS) problem. - http://www.markusstengel.de/text/en/i_4_1_5_3.html)
7 
8  This program is free software; you can redistribute it and/or
9  modify it under the terms of the GNU General Public License as
10  published by the Free Software Foundation; either version 2 of
11  the License or (at your option) version 3 or any later version
12  accepted by the membership of KDE e.V. (or its successor approved
13  by the membership of KDE e.V.), which shall act as a proxy
14  defined in Section 14 of version 3 of the license.
15 
16  This program is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 
24 ************************************************************************** */
25 
26 #include "diff.h"
27 
28 // #include "project.h"
29 #include "prefs_lokalize.h"
30 
31 #include <QVector>
32 #include <QStringList>
33 #include <QStringMatcher>
34 #include <QStringBuilder>
35 #include <QLinkedList>
36 
37 #include <kdebug.h>
38 
39 
40 typedef enum
41 {
42  NOTHING = 0,
43  ARROW_UP = 1,
44  ARROW_LEFT = 2,
45  ARROW_UP_LEFT = 3,
46  FINAL = 4
47 } LCSMarker;
48 
49 static QString addMarkerStart="<KBABELADD>";
50 static QString addMarkerEnd="</KBABELADD>";
51 static QString delMarkerStart="<KBABELDEL>";
52 static QString delMarkerEnd="</KBABELDEL>";
53 
54 QStringList calcLCS(const QStringList& s1Words,
55  const QStringList& s2Words,
56  const QStringList& s1Space,
57  const QStringList& s2Space
58  );
59 
60 
67  class LCSprinter
68 {
69  public:
70  LCSprinter(const QStringList &s_1,
71  const QStringList& s_2,
72  QVector<LCSMarker> *b_,
73  const uint nT_,
74  uint index,
75  const QStringList& s1Space_,
76  const QStringList& s2Space_
77  );
78  ~LCSprinter() {};
79  void printLCS(uint index);
80  inline QStringList operator()();
81 
82  private:
83  QStringList s1, s2;
84  QLinkedList<QString> resultString;
85  QStringList s1Space, s2Space;
86  QStringList::const_iterator it1, it2;
87  QStringList::const_iterator it1Space, it2Space;
88  uint nT:31;//we're using 1d vector as 2d
89  bool haveSpaces:1;//"word: sfdfs" space is ": "
90  QVector<LCSMarker> *b;
91  //QStringList::iterator it1Space, it2Space;
92 };
93 
94 inline
95 QStringList LCSprinter::operator()()
96 {
97  QStringList result;
98  foreach(const QString& str, resultString)
99  result<<str;
100 
101  return result;
102 }
103 
104 
105 inline
106 LCSprinter::LCSprinter(const QStringList& s_1,
107  const QStringList& s_2,
108  QVector<LCSMarker> *b_,
109  const uint nT_,
110  uint index,
111  const QStringList& s1Space_,
112  const QStringList& s2Space_
113  )
114  : s1(s_1)
115  , s2(s_2)
116  , s1Space(s1Space_)
117  , s2Space(s2Space_)
118  , it1(s1.constBegin())
119  , it2(s2.constBegin())
120  , it1Space(s1Space.constBegin())
121  , it2Space(s2Space.constBegin())
122  , nT(nT_)
123  , b(b_)
124 
125 {
126  haveSpaces=!s1Space_.isEmpty();
127  printLCS(index);
128 }
129 
130 
131 static QStringList prepareForInternalDiff(const QString& str)
132 {
133  QStringList result;
134  int i=str.size();
135  while(--i>=0)
136  result.prepend(QString(str.at(i)));
137  result.prepend(QString());
138  return result;
139 }
140 
141 void LCSprinter::printLCS(uint index)
142 {
143  //fprintf(stderr,"%2d. %2d. %2d. %2d\n",(uint)(*b)[index],nT,index%nT, index);
144  if (index % nT == 0 || index < nT)
145  {
146  // original LCS algo does not have to deal with ins before first common
147  uint bound = index%nT;
148  for (index=0; index<bound; ++index)
149  {
150  resultString.append(addMarkerStart);
151  resultString.append(*it2);
152  ++it2;
153  if (haveSpaces)
154  {
155  resultString.append(*it2Space);
156  ++it2Space;
157  }
158  resultString.append(addMarkerEnd);
159  }
160 
161  return;
162  }
163 
164  if (ARROW_UP_LEFT == b->at(index))
165  {
166  printLCS(index-nT-1);
167  if (it1!=s1.constEnd())
168  {
169  //kWarning() << "upleft '" << *it1 <<"'";
170  //kWarning() << "upleft 1s" << *it1Space;
171  //kWarning() << "upleft 2s" << *it2Space;
172  if (haveSpaces)
173  {
174  if((*it1)==(*it2))//case and accels
175  resultString.append(*it1);
176  else
177  {
178  QStringList word1=prepareForInternalDiff(*it1);
179  QStringList word2=prepareForInternalDiff(*it2);
180 
181  QStringList empty;
182  resultString.append(calcLCS(word1,word2,empty,empty).join(QString()));
183  }
184 
185  if((*it1Space)==(*it2Space))
186  resultString.append(*it1Space);
187  else
188  {
189  QStringList word1=prepareForInternalDiff(*it1Space);
190  QStringList word2=prepareForInternalDiff(*it2Space);
191 
192  QStringList empty;
193  //empty=calcLCS(word1,word2,empty,empty);
194 //???this is not really good if we use diff result in autosubst
195 
196  empty=calcLCS(word2,word1,empty,empty);
197  empty.replaceInStrings("KBABELADD>","KBABELTMP>");
198  empty.replaceInStrings("KBABELDEL>","KBABELADD>");
199  empty.replaceInStrings("KBABELTMP>","KBABELDEL>");
200 
201  resultString.append(empty.join(QString()));
202  }
203  ++it1Space;
204  ++it2Space;
205  //kWarning() << " common " << *it1;
206  }
207  else
208  resultString.append(*it1);//we may guess that this is a batch job, i.e. TM search
209  ++it1;
210  ++it2;
211  }
212  }
213  else if (ARROW_UP == b->at(index))
214  {
215  printLCS(index-nT);
216 // if (it1!=s1.end())
217  {
218  //kWarning()<<"APPENDDEL "<<*it1;
219  //kWarning()<<"APPENDDEL "<<*it1Space;
220  resultString.append(delMarkerStart);
221  resultString.append(*it1);
222  ++it1;
223  if (haveSpaces)
224  {
225  resultString.append(*it1Space);
226  ++it1Space;
227  }
228  resultString.append(delMarkerEnd);
229  }
230  }
231  else
232  {
233  printLCS(index-1);
234  resultString.append(addMarkerStart);
235  resultString.append(*it2);
236  ++it2;
237  if (haveSpaces)
238  {
239  //kWarning() << "add2 " << *it2;
240  resultString.append(*it2Space);
241  ++it2Space;
242  }
243  resultString.append(addMarkerEnd);
244  }
245 }
246 
247 
248 
249 
250 // calculate the LCS
251 QStringList calcLCS(const QStringList& s1Words,
252  const QStringList& s2Words,
253  const QStringList& s1Space,
254  const QStringList& s2Space
255  )
256 {
257 
258  uint i;
259  uint j;
260 
261  uint mX = s1Words.count();
262  uint nY = s2Words.count();
263 
264  //create lowered lists for matching,
265  //and use original ones for printing (but only for non-batch)
266  QStringList s1(s1Words);
267  QStringList s2(s2Words);
268 
269  if (!s1Space.isEmpty())
270  {
271  //accels are only removed by batch jobs
272  //and this is not the one
273  //also, lower things a bit :)
274 
275  for (i=0;i<mX;++i)
276  s1[i]=s1.at(i).toLower();
277  for (i=0;i<nY;++i)
278  s2[i]=s2.at(i).toLower();
279 #if 0 //i'm too lazy...
280  QString accel(Project::instance()->accel());
281  i=mX;
282  while(--i>0)
283  {
284  if ((s1Space.at(i)==accel))
285  {
286  s1[i]+=s1[i+1];
287  s1.removeAt(i+1);
288  s1Space.removeAt(i);
289  s1Words[i]+=s1[i+1];
290  s1Words.removeAt(i+1);
291  --mX;
292  --nY;
293  }
294  }
295 #endif
296  }
297 
298 
299  uint mT = mX+1;
300  uint nT = nY+1;
301 
302  QVector<LCSMarker> b(mT*nT, NOTHING);
303  QVector<uint> c(mT*nT, 0);
304 
305 
306 
307  b[0] = FINAL;
308  uint index_cache = 0;
309  QStringList::const_iterator it1, it2;
310 
311  for (i=1, it1 = s1.constBegin(); i<mT; ++i, ++it1)
312  {
313  for (j=1, it2 = s2.constBegin(); j<nT; ++j, ++it2)
314  {
315  index_cache = i*nT+j;
316  if ((*it1)==(*it2))
317  {
318  c[index_cache] = c.at(index_cache-nT-1) + 1;
319  b[index_cache] = ARROW_UP_LEFT;
320  }
321  else if (c.at(index_cache-nT) >= c.at(index_cache-1))
322  {
323  c[index_cache] = c.at(index_cache-nT);
324  b[index_cache] = ARROW_UP;
325  }
326  else
327  {
328  c[index_cache] = c.at(index_cache-1);
329  b[index_cache] = ARROW_LEFT;
330  }
331  }
332  }
333 
334  c.clear();
335 
336  LCSprinter printer(s1Words, s2Words, &b, nT, index_cache, s1Space, s2Space);
337 
338  return printer();
339 
340 }
341 
342 QString wordDiff(QStringList s1, QStringList s2)
343 {
344  static QString space(" ");
345  s1.prepend(space);
346  s2.prepend(space);
347 
348  static QStringList empty;
349  QStringList list=calcLCS(s1,s2,empty,empty);
350  bool r=list.first()==" ";
351  if (r)
352  list.removeFirst();
353  else
354  qDebug()<<"first ' ' assumption is wrong"<<list.first();
355 
356  QString result=list.join(QString());
357 
358 
359  if (!r)
360  result.remove(0,1);
361  result.remove("</KBABELADD><KBABELADD>");
362  result.remove("</KBABELDEL><KBABELDEL>");
363 
364  return result;
365 }
366 
367 
368 //this also separates punctuation marks etc from words as _only_ they may have changed
369 static void prepareLists(QString str, QStringList& main, QStringList& space, const QString& accel, QString markup)
370 {
371  Q_UNUSED(accel);
372  int pos=0;
373 
374  //accels are only removed by batch jobs
375  //and this is not the one
376 #if 0
377  QRegExp rxAccelInWord("[^\\W|\\d]"+accel+"[^\\W|\\d]");
378  int accelLen=accel.size();
379  while ((pos=rxAccelInWord.indexIn(str,pos))!=-1)
380  {
381  str.remove(rxAccelInWord.pos()+1,accelLen);
382  pos+=2;//two letters
383  }
384 #endif
385 
386  //QRegExp rxSplit("\\W+|\\d+");
387  //i tried that but it failed:
388  if (!markup.isEmpty())
389  markup+='|';
390  QRegExp rxSplit('('%markup%"\\W+|\\d+)+");
391 
392  main=str.split(rxSplit,QString::SkipEmptyParts);
393  main.prepend("\t");//little hack
394 
395 
396  //ensure the string always begins with the space part
397  str.prepend('\b');
398  pos=0;
399  while ((pos=rxSplit.indexIn(str,pos))!=-1)
400  {
401  space.append(rxSplit.cap(0));
402  pos+=rxSplit.matchedLength();
403  }
404  space.append(QString());//so we don't have to worry about list boundaries
405  space.append(QString());//so we don't have to worry about list boundaries
406 }
407 
408 QString userVisibleWordDiff(const QString& str1ForMatching,
409  const QString& str2ForMatching,
410  const QString& accel,
411  const QString& markup,
412  int options)
413 {
414  QStringList s1, s2;
415  QStringList s1Space, s2Space;
416 
417 
418  prepareLists(str1ForMatching, s1, s1Space, accel, markup);
419  prepareLists(str2ForMatching, s2, s2Space, accel, markup);
420 
421  //QRegExp rxSpace("[^(\\W+|\\d+)]");
422  //i tried that but it failed:
423  //QRegExp rxSpace("[^("+Project::instance()->markup()+"|\\W+|\\d+)]");
424  //QStringList s1Space(str1ForMatching.split(rxSpace,QString::SkipEmptyParts));
425  //QStringList s2Space(str2ForMatching.split(rxSpace,QString::SkipEmptyParts));
426 
427 
428  QStringList result(calcLCS(s1,s2,s1Space,s2Space));
429  result.removeFirst();//\t
430  result.first().remove(0,1);//\b
431 // kWarning()<<"wordDiff 1 '" <<result<<"'";
432  result.replaceInStrings("<KBABELDEL></KBABELDEL>","");
433  result.replaceInStrings("<KBABELADD></KBABELADD>","");
434 
435  result.replaceInStrings("<KBABELADD>","{KBABELADD}");
436  result.replaceInStrings("</KBABELADD>","{/KBABELADD}");
437  result.replaceInStrings("<KBABELDEL>","{KBABELDEL}");
438  result.replaceInStrings("</KBABELDEL>","{/KBABELDEL}");
439 
440  if (options&Html)
441  {
442  result.replaceInStrings("&","&amp;");
443  result.replaceInStrings("<","&lt;");
444  result.replaceInStrings(">","&gt;");
445  }
446 
447  //result.last().chop(1);//\b
448  //kWarning()<<"DIFF RESULT '" <<result<<"' '"<<result<<"'";
449 
450  QString res(result.join(QString()));
451  res.remove("{/KBABELADD}{KBABELADD}");
452  res.remove("{/KBABELDEL}{KBABELDEL}");
453 
454  if (options&Html)
455  {
456  res.replace("{KBABELADD}","<font style=\"background-color:"%Settings::addColor().name()%";color:black\">");
457  res.replace("{/KBABELADD}","</font>");
458  res.replace("{KBABELDEL}","<font style=\"background-color:"%Settings::delColor().name()%";color:black\">");
459  res.replace("{/KBABELDEL}","</font>");
460  res.replace("\\n","\\n<br>");
461  }
462 
463  return res;
464 }
465 
QRegExp::pos
int pos(int nth) const
Html
Definition: diff.h:45
QRegExp::cap
QString cap(int nth) const
calcLCS
QStringList calcLCS(const QStringList &s1Words, const QStringList &s2Words, const QStringList &s1Space, const QStringList &s2Space)
Definition: diff.cpp:251
wordDiff
QString wordDiff(QStringList s1, QStringList s2)
This is low-level wrapper used for evaluating translation memory search results.
Definition: diff.cpp:342
ARROW_UP
Definition: diff.cpp:43
QString::split
QStringList split(const QString &sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const
QString::prepend
QString & prepend(QChar ch)
main
int main(int argc, char **argv)
Definition: main.cpp:58
QList::removeFirst
void removeFirst()
userVisibleWordDiff
QString userVisibleWordDiff(const QString &str1ForMatching, const QString &str2ForMatching, const QString &accel, const QString &markup, int options)
Definition: diff.cpp:408
QList::at
const T & at(int i) const
Project::instance
static Project * instance()
Definition: project.cpp:67
QString::size
int size() const
QList::removeAt
void removeAt(int i)
delMarkerEnd
static QString delMarkerEnd
Definition: diff.cpp:52
QStringList::join
QString join(const QString &separator) const
QString::remove
QString & remove(int position, int n)
Settings::delColor
static QColor delColor()
Get DelColor.
Definition: prefs_lokalize.h:133
QList::const_iterator
addMarkerStart
static QString addMarkerStart
Definition: diff.cpp:49
QRegExp::matchedLength
int matchedLength() const
QRegExp::indexIn
int indexIn(const QString &str, int offset, CaretMode caretMode) const
QLinkedList< QString >
QRegExp
QVector::clear
void clear()
QList::count
int count(const T &value) const
QList::append
void append(const T &value)
Settings::addColor
static QColor addColor()
Get AddColor.
Definition: prefs_lokalize.h:115
QList::isEmpty
bool isEmpty() const
QString::isEmpty
bool isEmpty() const
QStringList::replaceInStrings
QStringList & replaceInStrings(const QString &before, const QString &after, Qt::CaseSensitivity cs)
QList::first
T & first()
QString
LCSMarker
LCSMarker
Definition: diff.cpp:40
diff.h
QStringList
ARROW_UP_LEFT
Definition: diff.cpp:45
addMarkerEnd
static QString addMarkerEnd
Definition: diff.cpp:50
ARROW_LEFT
Definition: diff.cpp:44
QString::replace
QString & replace(int position, int n, QChar after)
FINAL
Definition: diff.cpp:46
QVector::at
const T & at(int i) const
QVector< LCSMarker >
prefs_lokalize.h
prepareForInternalDiff
static QStringList prepareForInternalDiff(const QString &str)
Definition: diff.cpp:131
QString::at
const QChar at(int position) const
NOTHING
Definition: diff.cpp:42
QList::prepend
void prepend(const T &value)
delMarkerStart
static QString delMarkerStart
Definition: diff.cpp:51
QList::constBegin
const_iterator constBegin() const
prepareLists
static void prepareLists(QString str, QStringList &main, QStringList &space, const QString &accel, QString markup)
Definition: diff.cpp:369
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:40:06 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

lokalize

Skip menu "lokalize"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members

kdesdk API Reference

Skip menu "kdesdk API Reference"
  • kapptemplate
  • kcachegrind
  • kompare
  • lokalize
  • umbrello
  •   umbrello

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal