KCoreAddons

kstringhandler.cpp
1 /*
2  This file is part of the KDE libraries
3 
4  SPDX-FileCopyrightText: 1999 Ian Zepp <[email protected]>
5  SPDX-FileCopyrightText: 2006 Dominic Battre <[email protected]>
6  SPDX-FileCopyrightText: 2006 Martin Pool <[email protected]>
7 
8  SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 
11 #include "kstringhandler.h"
12 
13 #include <stdlib.h> // random()
14 
15 #include <QRegExp> // for the word ranges
16 #include <QRegularExpression>
17 #include <QCharRef>
18 #include <QStringList>
19 #include <QVector>
20 
21 //
22 // Capitalization routines
23 //
25 {
26  if (text.isEmpty()) {
27  return text;
28  }
29 
30  const QString strippedText = text.trimmed();
31  const QString space = QString(QLatin1Char(' '));
32  const QStringList words = capwords(strippedText.split(space));
33 
34  QString result = text;
35  result.replace(strippedText, words.join(space));
36  return result;
37 }
38 
40 {
41  QStringList tmp = list;
42  for (QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it) {
43  *it = (*it)[ 0 ].toUpper() + (*it).midRef(1);
44  }
45  return tmp;
46 }
47 
48 QString KStringHandler::lsqueeze(const QString &str, int maxlen)
49 {
50  if (str.length() > maxlen) {
51  int part = maxlen - 3;
52  return QLatin1String("...") + str.rightRef(part);
53  } else {
54  return str;
55  }
56 }
57 
58 QString KStringHandler::csqueeze(const QString &str, int maxlen)
59 {
60  if (str.length() > maxlen && maxlen > 3) {
61  const int part = (maxlen - 3) / 2;
62  return str.leftRef(part) + QLatin1String("...") + str.rightRef(part);
63  } else {
64  return str;
65  }
66 }
67 
68 QString KStringHandler::rsqueeze(const QString &str, int maxlen)
69 {
70  if (str.length() > maxlen) {
71  int part = maxlen - 3;
72  return str.leftRef(part) + QLatin1String("...");
73  } else {
74  return str;
75  }
76 }
77 
78 QStringList KStringHandler::perlSplit(const QString &sep, const QString &s, int max)
79 {
80  bool ignoreMax = 0 == max;
81 
82  QStringList l;
83 
84  int searchStart = 0;
85 
86  int tokenStart = s.indexOf(sep, searchStart);
87 
88  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) {
89  if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) {
90  l << s.mid(searchStart, tokenStart - searchStart);
91  }
92 
93  searchStart = tokenStart + sep.length();
94  tokenStart = s.indexOf(sep, searchStart);
95  }
96 
97  if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) {
98  l << s.mid(searchStart, s.length() - searchStart);
99  }
100 
101  return l;
102 }
103 
104 QStringList KStringHandler::perlSplit(const QChar &sep, const QString &s, int max)
105 {
106  bool ignoreMax = 0 == max;
107 
108  QStringList l;
109 
110  int searchStart = 0;
111 
112  int tokenStart = s.indexOf(sep, searchStart);
113 
114  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) {
115  if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) {
116  l << s.mid(searchStart, tokenStart - searchStart);
117  }
118 
119  searchStart = tokenStart + 1;
120  tokenStart = s.indexOf(sep, searchStart);
121  }
122 
123  if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) {
124  l << s.mid(searchStart, s.length() - searchStart);
125  }
126 
127  return l;
128 }
129 
130 #if KCOREADDONS_BUILD_DEPRECATED_SINCE(5, 67)
131 QStringList KStringHandler::perlSplit(const QRegExp &sep, const QString &s, int max)
132 {
133  // nothing to split
134  if (s.isEmpty()) {
135  return QStringList();
136  }
137 
138  bool ignoreMax = 0 == max;
139 
140  QStringList l;
141 
142  int searchStart = 0;
143  int tokenStart = sep.indexIn(s, searchStart);
144  int len = sep.matchedLength();
145 
146  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) {
147  if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) {
148  l << s.mid(searchStart, tokenStart - searchStart);
149  }
150 
151  searchStart = tokenStart + len;
152  tokenStart = sep.indexIn(s, searchStart);
153  len = sep.matchedLength();
154  }
155 
156  if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) {
157  l << s.mid(searchStart, s.length() - searchStart);
158  }
159 
160  return l;
161 }
162 #endif
163 
165 {
166  // nothing to split
167  if (s.isEmpty()) {
168  return QStringList();
169  }
170 
171  bool ignoreMax = max == 0;
172 
173  QStringList list;
174 
175  int start = 0;
178  QString chunk;
179  while (iter.hasNext() && (ignoreMax || list.count() < max - 1)) {
180  match = iter.next();
181  chunk = s.mid(start, match.capturedStart() - start);
182  if (!chunk.isEmpty()) {
183  list.append(chunk);
184  }
185  start = match.capturedEnd();
186  }
187 
188  // catch the remainder
189  chunk = s.mid(start, s.size() - start);
190  if (!chunk.isEmpty()) {
191  list.append(chunk);
192  }
193 
194  return list;
195 }
196 
198 {
199  QString richText(text);
200  const QRegularExpression urlEx(QStringLiteral("(www\\.(?!\\.)|(fish|ftp|http|https)://[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$\\(\\)]+)"));
201  // the reference \1 is going to be replaced by the matched url
202  const QLatin1String regexBackRef(QLatin1String("\\1"));
203  const QString anchor = QLatin1String("<a href=\"") + regexBackRef + QLatin1String("\">") + regexBackRef + QLatin1String("</a>");
204  richText.replace(urlEx, anchor);
205  return richText;
206 }
207 
209 {
210  QString result;
211  const QChar *unicode = str.unicode();
212  for (int i = 0; i < str.length(); ++i)
213  // yes, no typo. can't encode ' ' or '!' because
214  // they're the unicode BOM. stupid scrambling. stupid.
215  result += (unicode[ i ].unicode() <= 0x21) ? unicode[ i ] :
216  QChar(0x1001F - unicode[ i ].unicode());
217 
218  return result;
219 }
220 
221 bool KStringHandler::isUtf8(const char *buf)
222 {
223  int i, n;
224  unsigned char c;
225  bool gotone = false;
226 
227  if (!buf) {
228  return true; // whatever, just don't crash
229  }
230 
231 #define F 0 /* character never appears in text */
232 #define T 1 /* character appears in plain ASCII text */
233 #define I 2 /* character appears in ISO-8859 text */
234 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
235 
236  static const unsigned char text_chars[256] = {
237  /* BEL BS HT LF FF CR */
238  F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
239  /* ESC */
240  F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
241  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
242  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
243  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
244  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
245  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
246  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
247  /* NEL */
248  X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
249  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
250  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
251  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
252  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
253  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
254  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
255  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
256  };
257 
258  /* *ulen = 0; */
259  for (i = 0; (c = buf[i]); ++i) {
260  if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
261  /*
262  * Even if the whole file is valid UTF-8 sequences,
263  * still reject it if it uses weird control characters.
264  */
265 
266  if (text_chars[c] != T) {
267  return false;
268  }
269 
270  } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
271  return false;
272  } else { /* 11xxxxxx begins UTF-8 */
273  int following;
274 
275  if ((c & 0x20) == 0) { /* 110xxxxx */
276  following = 1;
277  } else if ((c & 0x10) == 0) { /* 1110xxxx */
278  following = 2;
279  } else if ((c & 0x08) == 0) { /* 11110xxx */
280  following = 3;
281  } else if ((c & 0x04) == 0) { /* 111110xx */
282  following = 4;
283  } else if ((c & 0x02) == 0) { /* 1111110x */
284  following = 5;
285  } else {
286  return false;
287  }
288 
289  for (n = 0; n < following; ++n) {
290  i++;
291  if (!(c = buf[i])) {
292  goto done;
293  }
294 
295  if ((c & 0x80) == 0 || (c & 0x40)) {
296  return false;
297  }
298  }
299  gotone = true;
300  }
301  }
302 done:
303  return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
304 }
305 
306 #undef F
307 #undef T
308 #undef I
309 #undef X
310 
312 {
313  if (!str) {
314  return QString();
315  }
316  if (!*str) {
317  static const QLatin1String emptyString("");
318  return emptyString;
319  }
320  return KStringHandler::isUtf8(str) ?
321  QString::fromUtf8(str) :
323 }
324 
326 {
327  const QChar zwsp(0x200b);
328 
329  QString result;
330  result.reserve(text.length());
331 
332  for (int i = 0; i < text.length(); i++) {
333  const QChar c = text[i];
334  bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('['));
335  bool singleQuote = (c == QLatin1Char('\''));
336  bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']'));
337  bool breakAfter = (closingParens || c.isPunct() || c.isSymbol());
338  bool nextIsSpace = (i == (text.length() - 1) || text[i + 1].isSpace());
339  bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp);
340 
341  // Provide a breaking opportunity before opening parenthesis
342  if (openingParens && !prevIsSpace) {
343  result += zwsp;
344  }
345 
346  // Provide a word joiner before the single quote
347  if (singleQuote && !prevIsSpace) {
348  result += QChar(0x2060);
349  }
350 
351  result += c;
352 
353  if (breakAfter && !openingParens && !nextIsSpace && !singleQuote) {
354  result += zwsp;
355  }
356  }
357 
358  return result;
359 }
360 
362 {
363  int length = 0;
364  auto chrs = text.toUcs4();
365  for (auto chr : chrs) {
366  auto script = QChar::script(chr);
367  if (script == QChar::Script_Han ||
368  script == QChar::Script_Hangul ||
369  script == QChar::Script_Hiragana ||
370  script == QChar::Script_Katakana ||
371  script == QChar::Script_Yi ||
372  QChar::isHighSurrogate(chr)) {
373  length += 2;
374  } else {
375  length += 1;
376  }
377  }
378  return length;
379 }
int indexOf(QChar ch, int from, Qt::CaseSensitivity cs) const const
KCOREADDONS_EXPORT int logicalLength(const QString &text)
Returns the length that reflects the density of information in the text.
KCOREADDONS_EXPORT QString from8Bit(const char *str)
Construct QString from a c string, guessing whether it is UTF8- or Local8Bit-encoded.
QChar::Script script() const const
KCOREADDONS_EXPORT QString rsqueeze(const QString &str, int maxlen=40)
Substitute characters at the end of a string by "...".
int capturedStart(int nth) const const
QRegularExpressionMatchIterator globalMatch(const QString &subject, int offset, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions) const const
int size() const const
QString join(const QString &separator) const const
bool isHighSurrogate() const const
int matchedLength() const const
int indexIn(const QString &str, int offset, QRegExp::CaretMode caretMode) const const
QRegularExpressionMatch next()
int count(const T &value) const const
QString fromLocal8Bit(const char *str, int size)
void append(const T &value)
QString fromUtf8(const char *str, int size)
KCOREADDONS_EXPORT QString preProcessWrap(const QString &text)
Preprocesses the given string in order to provide additional line breaking opportunities for QTextLay...
KCOREADDONS_EXPORT bool isUtf8(const char *str)
Guess whether a string is UTF8 encoded.
QStringRef leftRef(int n) const const
int capturedEnd(int nth) const const
bool isEmpty() const const
QString trimmed() const const
QStringList split(const QString &sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
typedef Iterator
QStringRef rightRef(int n) const const
KCOREADDONS_EXPORT QString csqueeze(const QString &str, int maxlen=40)
Substitute characters at the middle of a string by "...".
bool isPunct() const const
QList::iterator end()
QStringRef midRef(int position, int n) const const
KCOREADDONS_EXPORT QStringList perlSplit(const QString &sep, const QString &s, int max=0)
Split a QString into a QStringList in a similar fashion to the static QStringList function in Qt...
QString & replace(int position, int n, QChar after)
const QChar * unicode() const const
QString mid(int position, int n) const const
KCOREADDONS_EXPORT QString capwords(const QString &text)
Capitalizes each word in the string "hello there" becomes "Hello There" (string)
int length() const const
void reserve(int size)
bool isEmpty() const const
QVector< uint > toUcs4() const const
KCOREADDONS_EXPORT QString lsqueeze(const QString &str, int maxlen=40)
Substitute characters at the beginning of a string by "...".
KCOREADDONS_EXPORT QString obscure(const QString &str)
Obscure string by using a simple symmetric encryption.
QList::iterator begin()
bool isSymbol() const const
KCOREADDONS_EXPORT QString tagUrls(const QString &text)
This method auto-detects URLs in strings, and adds HTML markup to them so that richtext or HTML-enabl...
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Wed Jul 8 2020 23:00:38 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.