• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • sources
  • kde-4.14
  • kdelibs
  • kdecore
  • text
kstringhandler.cpp
Go to the documentation of this file.
1 /* This file is part of the KDE libraries
2  Copyright (C) 1999 Ian Zepp (icszepp@islc.net)
3  Copyright (C) 2006 by Dominic Battre <dominic@battre.de>
4  Copyright (C) 2006 by Martin Pool <mbp@canonical.com>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Library General Public
8  License as published by the Free Software Foundation; either
9  version 2 of the License, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Library General Public License for more details.
15 
16  You should have received a copy of the GNU Library General Public License
17  along with this library; see the file COPYING.LIB. If not, write to
18  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  Boston, MA 02110-1301, USA.
20 */
21 
22 #include "kstringhandler.h"
23 
24 #include <stdlib.h> // random()
25 
26 #include <kglobal.h>
27 
28 #include <QtCore/QRegExp> // for the word ranges
29 #include <QtCore/QCharRef>
30 #include <QtCore/QMutableStringListIterator>
31 
32 
33 
34 //
35 // Capitalization routines
36 //
37 QString KStringHandler::capwords( const QString &text )
38 {
39  if ( text.isEmpty() ) {
40  return text;
41  }
42 
43  const QString strippedText = text.trimmed();
44  const QString space = QString(QLatin1Char(' '));
45  const QStringList words = capwords(strippedText.split(space));
46 
47  QString result = text;
48  result.replace(strippedText, words.join(space));
49  return result;
50 }
51 
52 QStringList KStringHandler::capwords( const QStringList &list )
53 {
54  QStringList tmp = list;
55  for ( QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it ) {
56  *it = ( *it )[ 0 ].toUpper() + ( *it ).mid( 1 );
57  }
58  return tmp;
59 }
60 
61 
62 QString KStringHandler::lsqueeze( const QString & str, int maxlen )
63 {
64  if (str.length() > maxlen) {
65  int part = maxlen-3;
66  return QString::fromLatin1("...") + str.right(part);
67  }
68  else return str;
69 }
70 
71 QString KStringHandler::csqueeze( const QString & str, int maxlen )
72 {
73  if (str.length() > maxlen && maxlen > 3) {
74  const int part = (maxlen-3)/2;
75  return str.left(part) + QLatin1String("...") + str.right(part);
76  }
77  else return str;
78 }
79 
80 QString KStringHandler::rsqueeze( const QString & str, int maxlen )
81 {
82  if (str.length() > maxlen) {
83  int part = maxlen-3;
84  return str.left(part) + QLatin1String("...");
85  }
86  else return str;
87 }
88 
89 QStringList KStringHandler::perlSplit(const QString & sep, const QString & s, int max)
90 {
91  bool ignoreMax = 0 == max;
92 
93  QStringList l;
94 
95  int searchStart = 0;
96 
97  int tokenStart = s.indexOf(sep, searchStart);
98 
99  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
100  {
101  if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
102  l << s.mid(searchStart, tokenStart - searchStart);
103 
104  searchStart = tokenStart + sep.length();
105  tokenStart = s.indexOf(sep, searchStart);
106  }
107 
108  if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
109  l << s.mid(searchStart, s.length() - searchStart);
110 
111  return l;
112 }
113 
114 QStringList KStringHandler::perlSplit(const QChar & sep, const QString & s, int max)
115 {
116  bool ignoreMax = 0 == max;
117 
118  QStringList l;
119 
120  int searchStart = 0;
121 
122  int tokenStart = s.indexOf(sep, searchStart);
123 
124  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
125  {
126  if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
127  l << s.mid(searchStart, tokenStart - searchStart);
128 
129  searchStart = tokenStart + 1;
130  tokenStart = s.indexOf(sep, searchStart);
131  }
132 
133  if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
134  l << s.mid(searchStart, s.length() - searchStart);
135 
136  return l;
137 }
138 
139 QStringList KStringHandler::perlSplit(const QRegExp & sep, const QString & s, int max)
140 {
141  bool ignoreMax = 0 == max;
142 
143  QStringList l;
144 
145  int searchStart = 0;
146  int tokenStart = sep.indexIn(s, searchStart);
147  int len = sep.matchedLength();
148 
149  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
150  {
151  if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
152  l << s.mid(searchStart, tokenStart - searchStart);
153 
154  searchStart = tokenStart + len;
155  tokenStart = sep.indexIn(s, searchStart);
156  len = sep.matchedLength();
157  }
158 
159  if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
160  l << s.mid(searchStart, s.length() - searchStart);
161 
162  return l;
163 }
164 
165 QString KStringHandler::tagUrls( const QString& text )
166 {
167  /*static*/ QRegExp urlEx(QLatin1String("(www\\.(?!\\.)|(fish|(f|ht)tp(|s))://)[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$]+[\\d\\w/]"));
168 
169  QString richText( text );
170  int urlPos = 0, urlLen;
171  while ((urlPos = urlEx.indexIn(richText, urlPos)) >= 0)
172  {
173  urlLen = urlEx.matchedLength();
174  QString href = richText.mid( urlPos, urlLen );
175  // Qt doesn't support (?<=pattern) so we do it here
176  if((urlPos > 0) && richText[urlPos-1].isLetterOrNumber()){
177  urlPos++;
178  continue;
179  }
180  // Don't use QString::arg since %01, %20, etc could be in the string
181  QString anchor = QString::fromLatin1("<a href=\"") + href + QLatin1String("\">") + href + QLatin1String("</a>");
182  richText.replace( urlPos, urlLen, anchor );
183 
184 
185  urlPos += anchor.length();
186  }
187  return richText;
188 }
189 
190 QString KStringHandler::obscure( const QString &str )
191 {
192  QString result;
193  const QChar *unicode = str.unicode();
194  for ( int i = 0; i < str.length(); ++i )
195  // yes, no typo. can't encode ' ' or '!' because
196  // they're the unicode BOM. stupid scrambling. stupid.
197  result += ( unicode[ i ].unicode() <= 0x21 ) ? unicode[ i ] :
198  QChar( 0x1001F - unicode[ i ].unicode() );
199 
200  return result;
201 }
202 
203 
204 bool KStringHandler::isUtf8( const char *buf )
205 {
206  int i, n;
207  register unsigned char c;
208  bool gotone = false;
209 
210  if (!buf)
211  return true; // whatever, just don't crash
212 
213 #define F 0 /* character never appears in text */
214 #define T 1 /* character appears in plain ASCII text */
215 #define I 2 /* character appears in ISO-8859 text */
216 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
217 
218  static const unsigned char text_chars[256] = {
219  /* BEL BS HT LF FF CR */
220  F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
221  /* ESC */
222  F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
223  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
224  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
225  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
226  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
227  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
228  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
229  /* NEL */
230  X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
231  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
232  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
233  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
234  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
235  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
236  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
237  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
238  };
239 
240  /* *ulen = 0; */
241  for (i = 0; (c = buf[i]); ++i) {
242  if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
243  /*
244  * Even if the whole file is valid UTF-8 sequences,
245  * still reject it if it uses weird control characters.
246  */
247 
248  if (text_chars[c] != T)
249  return false;
250 
251  } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
252  return false;
253  } else { /* 11xxxxxx begins UTF-8 */
254  int following;
255 
256  if ((c & 0x20) == 0) { /* 110xxxxx */
257  following = 1;
258  } else if ((c & 0x10) == 0) { /* 1110xxxx */
259  following = 2;
260  } else if ((c & 0x08) == 0) { /* 11110xxx */
261  following = 3;
262  } else if ((c & 0x04) == 0) { /* 111110xx */
263  following = 4;
264  } else if ((c & 0x02) == 0) { /* 1111110x */
265  following = 5;
266  } else
267  return false;
268 
269  for (n = 0; n < following; ++n) {
270  i++;
271  if (!(c = buf[i]))
272  goto done;
273 
274  if ((c & 0x80) == 0 || (c & 0x40))
275  return false;
276  }
277  gotone = true;
278  }
279  }
280 done:
281  return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
282 }
283 
284 #undef F
285 #undef T
286 #undef I
287 #undef X
288 
289 QString KStringHandler::from8Bit( const char *str )
290 {
291  if (!str)
292  return QString();
293  if (!*str) {
294  static const QString &emptyString = KGlobal::staticQString("");
295  return emptyString;
296  }
297  return KStringHandler::isUtf8( str ) ?
298  QString::fromUtf8( str ) :
299  QString::fromLocal8Bit( str );
300 }
301 
302 int KStringHandler::naturalCompare(const QString &_a, const QString &_b, Qt::CaseSensitivity caseSensitivity)
303 {
304  // This method chops the input a and b into pieces of
305  // digits and non-digits (a1.05 becomes a | 1 | . | 05)
306  // and compares these pieces of a and b to each other
307  // (first with first, second with second, ...).
308  //
309  // This is based on the natural sort order code code by Martin Pool
310  // http://sourcefrog.net/projects/natsort/
311  // Martin Pool agreed to license this under LGPL or GPL.
312 
313  // FIXME: Using toLower() to implement case insensitive comparison is
314  // sub-optimal, but is needed because we compare strings with
315  // localeAwareCompare(), which does not know about case sensitivity.
316  // A task has been filled for this in Qt Task Tracker with ID 205990.
317  // http://trolltech.com/developer/task-tracker/index_html?method=entry&id=205990
318  QString a;
319  QString b;
320  if (caseSensitivity == Qt::CaseSensitive) {
321  a = _a;
322  b = _b;
323  } else {
324  a = _a.toLower();
325  b = _b.toLower();
326  }
327 
328  const QChar* currA = a.unicode(); // iterator over a
329  const QChar* currB = b.unicode(); // iterator over b
330 
331  if (currA == currB) {
332  return 0;
333  }
334 
335  while (!currA->isNull() && !currB->isNull()) {
336  const QChar* begSeqA = currA; // beginning of a new character sequence of a
337  const QChar* begSeqB = currB;
338  if (currA->unicode() == QChar::ObjectReplacementCharacter) {
339  return 1;
340  }
341 
342  if (currB->unicode() == QChar::ObjectReplacementCharacter) {
343  return -1;
344  }
345 
346  if (currA->unicode() == QChar::ReplacementCharacter) {
347  return 1;
348  }
349 
350  if (currB->unicode() == QChar::ReplacementCharacter) {
351  return -1;
352  }
353 
354  // find sequence of characters ending at the first non-character
355  while (!currA->isNull() && !currA->isDigit() && !currA->isPunct() && !currA->isSpace()) {
356  ++currA;
357  }
358 
359  while (!currB->isNull() && !currB->isDigit() && !currB->isPunct() && !currB->isSpace()) {
360  ++currB;
361  }
362 
363  // compare these sequences
364  const QStringRef& subA(a.midRef(begSeqA - a.unicode(), currA - begSeqA));
365  const QStringRef& subB(b.midRef(begSeqB - b.unicode(), currB - begSeqB));
366  const int cmp = QStringRef::localeAwareCompare(subA, subB);
367  if (cmp != 0) {
368  return cmp < 0 ? -1 : +1;
369  }
370 
371  if (currA->isNull() || currB->isNull()) {
372  break;
373  }
374 
375  // find sequence of characters ending at the first non-character
376  while ((currA->isPunct() || currA->isSpace()) && (currB->isPunct() || currB->isSpace())) {
377  if (*currA != *currB) {
378  return (*currA < *currB) ? -1 : +1;
379  }
380  ++currA;
381  ++currB;
382  if (currA->isNull() || currB->isNull()) {
383  break;
384  }
385  }
386 
387  // now some digits follow...
388  if ((*currA == QLatin1Char('0')) || (*currB == QLatin1Char('0'))) {
389  // one digit-sequence starts with 0 -> assume we are in a fraction part
390  // do left aligned comparison (numbers are considered left aligned)
391  while (1) {
392  if (!currA->isDigit() && !currB->isDigit()) {
393  break;
394  } else if (!currA->isDigit()) {
395  return +1;
396  } else if (!currB->isDigit()) {
397  return -1;
398  } else if (*currA < *currB) {
399  return -1;
400  } else if (*currA > *currB) {
401  return + 1;
402  }
403  ++currA;
404  ++currB;
405  }
406  } else {
407  // No digit-sequence starts with 0 -> assume we are looking at some integer
408  // do right aligned comparison.
409  //
410  // The longest run of digits wins. That aside, the greatest
411  // value wins, but we can't know that it will until we've scanned
412  // both numbers to know that they have the same magnitude.
413 
414  bool isFirstRun = true;
415  int weight = 0;
416  while (1) {
417  if (!currA->isDigit() && !currB->isDigit()) {
418  if (weight != 0) {
419  return weight;
420  }
421  break;
422  } else if (!currA->isDigit()) {
423  if (isFirstRun) {
424  return *currA < *currB ? -1 : +1;
425  } else {
426  return -1;
427  }
428  } else if (!currB->isDigit()) {
429  if (isFirstRun) {
430  return *currA < *currB ? -1 : +1;
431  } else {
432  return +1;
433  }
434  } else if ((*currA < *currB) && (weight == 0)) {
435  weight = -1;
436  } else if ((*currA > *currB) && (weight == 0)) {
437  weight = + 1;
438  }
439  ++currA;
440  ++currB;
441  isFirstRun = false;
442  }
443  }
444  }
445 
446  if (currA->isNull() && currB->isNull()) {
447  return 0;
448  }
449 
450  return currA->isNull() ? -1 : + 1;
451 }
452 
453 QString KStringHandler::preProcessWrap(const QString &text)
454 {
455  const QChar zwsp(0x200b);
456 
457  QString result;
458  result.reserve(text.length());
459 
460  for (int i = 0; i < text.length(); i++) {
461  const QChar c = text[i];
462  bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('['));
463  bool singleQuote = (c == QLatin1Char('\'') );
464  bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']'));
465  bool breakAfter = (closingParens || c.isPunct() || c.isSymbol());
466  bool nextIsSpace = (i == (text.length() - 1) || text[i + 1].isSpace());
467  bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp);
468 
469  // Provide a breaking opportunity before opening parenthesis
470  if (openingParens && !prevIsSpace)
471  result += zwsp;
472 
473  // Provide a word joiner before the single quote
474  if (singleQuote && !prevIsSpace)
475  result += QChar(0x2060);
476 
477  result += c;
478 
479  if (breakAfter && !openingParens && !nextIsSpace && !singleQuote)
480  result += zwsp;
481  }
482 
483  return result;
484 }
485 
QString::indexOf
int indexOf(QChar ch, int from, Qt::CaseSensitivity cs) const
KStringHandler::from8Bit
QString from8Bit(const char *str)
Construct QString from a c string, guessing whether it is UTF8- or Local8Bit-encoded.
Definition: kstringhandler.cpp:289
KStringHandler::naturalCompare
int naturalCompare(const QString &a, const QString &b, Qt::CaseSensitivity caseSensitivity=Qt::CaseSensitive)
Does a natural comparing of the strings.
Definition: kstringhandler.cpp:302
KStringHandler::rsqueeze
QString rsqueeze(const QString &str, int maxlen=40)
Substitute characters at the end of a string by "...".
Definition: kstringhandler.cpp:80
T
#define T
QChar
QString::split
QStringList split(const QString &sep, SplitBehavior behavior, Qt::CaseSensitivity cs) const
QChar::isDigit
bool isDigit() const
QStringList::join
QString join(const QString &separator) const
QStringRef
QRegExp::matchedLength
int matchedLength() const
QRegExp::indexIn
int indexIn(const QString &str, int offset, CaretMode caretMode) const
QRegExp
kglobal.h
QList::count
int count(const T &value) const
QString::fromLocal8Bit
QString fromLocal8Bit(const char *str, int size)
QString::fromUtf8
QString fromUtf8(const char *str, int size)
QChar::isSpace
bool isSpace() const
F
#define F
KStringHandler::preProcessWrap
QString preProcessWrap(const QString &text)
Preprocesses the given string in order to provide additional line breaking opportunities for QTextLay...
Definition: kstringhandler.cpp:453
KStringHandler::isUtf8
bool isUtf8(const char *str)
Guess whether a string is UTF8 encoded.
Definition: kstringhandler.cpp:204
QString::isEmpty
bool isEmpty() const
QString::trimmed
QString trimmed() const
QList::Iterator
typedef Iterator
KStringHandler::csqueeze
QString csqueeze(const QString &str, int maxlen=40)
Substitute characters at the middle of a string by "...".
Definition: kstringhandler.cpp:71
QString
QChar::unicode
ushort unicode() const
QChar::isPunct
bool isPunct() const
I
#define I
QStringList
QString::right
QString right(int n) const
QChar::isNull
bool isNull() const
QList::end
iterator end()
QString::toLower
QString toLower() const
QLatin1Char
QString::midRef
QStringRef midRef(int position, int n) const
KStringHandler::perlSplit
QStringList perlSplit(const QString &sep, const QString &s, int max=0)
Split a QString into a QStringList in a similar fashion to the static QStringList function in Qt...
Definition: kstringhandler.cpp:89
kstringhandler.h
QString::replace
QString & replace(int position, int n, QChar after)
QString::unicode
const QChar * unicode() const
QString::mid
QString mid(int position, int n) const
QLatin1String
KStringHandler::capwords
QString capwords(const QString &text)
Capitalizes each word in the string "hello there" becomes "Hello There" (string)
Definition: kstringhandler.cpp:37
QList::mid
QList< T > mid(int pos, int length) const
QString::length
int length() const
QString::reserve
void reserve(int size)
QString::left
QString left(int n) const
QString::fromLatin1
QString fromLatin1(const char *str, int size)
X
#define X
KGlobal::staticQString
const QString & staticQString(const char *str)
Creates a static QString.
Definition: kglobal.cpp:271
KStringHandler::lsqueeze
QString lsqueeze(const QString &str, int maxlen=40)
Substitute characters at the beginning of a string by "...".
Definition: kstringhandler.cpp:62
KStringHandler::obscure
QString obscure(const QString &str)
Obscure string by using a simple symmetric encryption.
Definition: kstringhandler.cpp:190
QList::begin
iterator begin()
QChar::isSymbol
bool isSymbol() const
QStringRef::localeAwareCompare
int localeAwareCompare(const QString &other) const
KStringHandler::tagUrls
QString tagUrls(const QString &text)
This method auto-detects URLs in strings, and adds HTML markup to them so that richtext or HTML-enabl...
Definition: kstringhandler.cpp:165
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:22:11 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal