KCoreAddons

kstringhandler.cpp
1 /*
2  This file is part of the KDE libraries
3 
4  SPDX-FileCopyrightText: 1999 Ian Zepp <[email protected]>
5  SPDX-FileCopyrightText: 2006 Dominic Battre <[email protected]>
6  SPDX-FileCopyrightText: 2006 Martin Pool <[email protected]>
7 
8  SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 
11 #include "kstringhandler.h"
12 
13 #include <stdlib.h> // random()
14 
15 #if KCOREADDONS_BUILD_DEPRECATED_SINCE(5, 67)
16 #include <QRegExp> // for the word ranges
17 #endif
18 #include <QRegularExpression>
19 #include <QStringList>
20 #include <QVector>
21 
22 //
23 // Capitalization routines
24 //
26 {
27  if (text.isEmpty()) {
28  return text;
29  }
30 
31  const QString strippedText = text.trimmed();
32  const QString space = QString(QLatin1Char(' '));
33  const QStringList words = capwords(strippedText.split(space));
34 
35  QString result = text;
36  result.replace(strippedText, words.join(space));
37  return result;
38 }
39 
41 {
42  QStringList tmp = list;
43  for (QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it) {
44  *it = (*it)[0].toUpper() + (*it).midRef(1);
45  }
46  return tmp;
47 }
48 
49 QString KStringHandler::lsqueeze(const QString &str, int maxlen)
50 {
51  if (str.length() > maxlen) {
52  int part = maxlen - 3;
53  return QLatin1String("...") + str.rightRef(part);
54  } else {
55  return str;
56  }
57 }
58 
59 QString KStringHandler::csqueeze(const QString &str, int maxlen)
60 {
61  if (str.length() > maxlen && maxlen > 3) {
62  const int part = (maxlen - 3) / 2;
63  return str.leftRef(part) + QLatin1String("...") + str.rightRef(part);
64  } else {
65  return str;
66  }
67 }
68 
69 QString KStringHandler::rsqueeze(const QString &str, int maxlen)
70 {
71  if (str.length() > maxlen) {
72  int part = maxlen - 3;
73  return str.leftRef(part) + QLatin1String("...");
74  } else {
75  return str;
76  }
77 }
78 
79 QStringList KStringHandler::perlSplit(const QString &sep, const QString &s, int max)
80 {
81  bool ignoreMax = 0 == max;
82 
83  QStringList l;
84 
85  int searchStart = 0;
86 
87  int tokenStart = s.indexOf(sep, searchStart);
88 
89  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) {
90  if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) {
91  l << s.mid(searchStart, tokenStart - searchStart);
92  }
93 
94  searchStart = tokenStart + sep.length();
95  tokenStart = s.indexOf(sep, searchStart);
96  }
97 
98  if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) {
99  l << s.mid(searchStart, s.length() - searchStart);
100  }
101 
102  return l;
103 }
104 
105 QStringList KStringHandler::perlSplit(const QChar &sep, const QString &s, int max)
106 {
107  bool ignoreMax = 0 == max;
108 
109  QStringList l;
110 
111  int searchStart = 0;
112 
113  int tokenStart = s.indexOf(sep, searchStart);
114 
115  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) {
116  if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) {
117  l << s.mid(searchStart, tokenStart - searchStart);
118  }
119 
120  searchStart = tokenStart + 1;
121  tokenStart = s.indexOf(sep, searchStart);
122  }
123 
124  if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) {
125  l << s.mid(searchStart, s.length() - searchStart);
126  }
127 
128  return l;
129 }
130 
131 #if KCOREADDONS_BUILD_DEPRECATED_SINCE(5, 67)
132 QStringList KStringHandler::perlSplit(const QRegExp &sep, const QString &s, int max)
133 {
134  // nothing to split
135  if (s.isEmpty()) {
136  return QStringList();
137  }
138 
139  bool ignoreMax = 0 == max;
140 
141  QStringList l;
142 
143  int searchStart = 0;
144  int tokenStart = sep.indexIn(s, searchStart);
145  int len = sep.matchedLength();
146 
147  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) {
148  if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) {
149  l << s.mid(searchStart, tokenStart - searchStart);
150  }
151 
152  searchStart = tokenStart + len;
153  tokenStart = sep.indexIn(s, searchStart);
154  len = sep.matchedLength();
155  }
156 
157  if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) {
158  l << s.mid(searchStart, s.length() - searchStart);
159  }
160 
161  return l;
162 }
163 #endif
164 
166 {
167  // nothing to split
168  if (s.isEmpty()) {
169  return QStringList();
170  }
171 
172  bool ignoreMax = max == 0;
173 
174  QStringList list;
175 
176  int start = 0;
177  QRegularExpression separator(sep);
179 
180  QRegularExpressionMatchIterator iter = separator.globalMatch(s);
182  QString chunk;
183  while (iter.hasNext() && (ignoreMax || list.count() < max - 1)) {
184  match = iter.next();
185  chunk = s.mid(start, match.capturedStart() - start);
186  if (!chunk.isEmpty()) {
187  list.append(chunk);
188  }
189  start = match.capturedEnd();
190  }
191 
192  // catch the remainder
193  chunk = s.mid(start, s.size() - start);
194  if (!chunk.isEmpty()) {
195  list.append(chunk);
196  }
197 
198  return list;
199 }
200 
202 {
203  QString richText(text);
204  static const QRegularExpression urlEx(QStringLiteral("(www\\.(?!\\.)|(fish|ftp|http|https)://[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$\\(\\)]+)"),
206  // The reference \1 is going to be replaced by the matched url
207  richText.replace(urlEx, QStringLiteral("<a href=\"\\1\">\\1</a>"));
208  return richText;
209 }
210 
212 {
213  QString result;
214  const QChar *unicode = str.unicode();
215  for (int i = 0; i < str.length(); ++i)
216  // yes, no typo. can't encode ' ' or '!' because
217  // they're the unicode BOM. stupid scrambling. stupid.
218  result += (unicode[i].unicode() <= 0x21) ? unicode[i] : QChar(0x1001F - unicode[i].unicode());
219 
220  return result;
221 }
222 
223 bool KStringHandler::isUtf8(const char *buf)
224 {
225  int i, n;
226  unsigned char c;
227  bool gotone = false;
228 
229  if (!buf) {
230  return true; // whatever, just don't crash
231  }
232 
233 #define F 0 /* character never appears in text */
234 #define T 1 /* character appears in plain ASCII text */
235 #define I 2 /* character appears in ISO-8859 text */
236 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
237  /* clang-format off */
238  static const unsigned char text_chars[256] = {
239  /* BEL BS HT LF FF CR */
240  F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
241  /* ESC */
242  F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
243  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
244  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
245  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
246  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
247  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
248  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
249  /* NEL */
250  X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
251  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
252  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
253  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
254  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
255  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
256  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
257  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
258  };
259  /* clang-format on */
260 
261  /* *ulen = 0; */
262  for (i = 0; (c = buf[i]); ++i) {
263  if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
264  /*
265  * Even if the whole file is valid UTF-8 sequences,
266  * still reject it if it uses weird control characters.
267  */
268 
269  if (text_chars[c] != T) {
270  return false;
271  }
272 
273  } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
274  return false;
275  } else { /* 11xxxxxx begins UTF-8 */
276  int following;
277 
278  if ((c & 0x20) == 0) { /* 110xxxxx */
279  following = 1;
280  } else if ((c & 0x10) == 0) { /* 1110xxxx */
281  following = 2;
282  } else if ((c & 0x08) == 0) { /* 11110xxx */
283  following = 3;
284  } else if ((c & 0x04) == 0) { /* 111110xx */
285  following = 4;
286  } else if ((c & 0x02) == 0) { /* 1111110x */
287  following = 5;
288  } else {
289  return false;
290  }
291 
292  for (n = 0; n < following; ++n) {
293  i++;
294  if (!(c = buf[i])) {
295  goto done;
296  }
297 
298  if ((c & 0x80) == 0 || (c & 0x40)) {
299  return false;
300  }
301  }
302  gotone = true;
303  }
304  }
305 done:
306  return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
307 }
308 
309 #undef F
310 #undef T
311 #undef I
312 #undef X
313 
315 {
316  if (!str) {
317  return QString();
318  }
319  if (!*str) {
320  static const QLatin1String emptyString("");
321  return emptyString;
322  }
324 }
325 
327 {
328  const QChar zwsp(0x200b);
329 
330  QString result;
331  result.reserve(text.length());
332 
333  for (int i = 0; i < text.length(); i++) {
334  const QChar c = text[i];
335  bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('['));
336  bool singleQuote = (c == QLatin1Char('\''));
337  bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']'));
338  bool breakAfter = (closingParens || c.isPunct() || c.isSymbol());
339  bool nextIsSpace = (i == (text.length() - 1) || text[i + 1].isSpace());
340  bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp);
341 
342  // Provide a breaking opportunity before opening parenthesis
343  if (openingParens && !prevIsSpace) {
344  result += zwsp;
345  }
346 
347  // Provide a word joiner before the single quote
348  if (singleQuote && !prevIsSpace) {
349  result += QChar(0x2060);
350  }
351 
352  result += c;
353 
354  if (breakAfter && !openingParens && !nextIsSpace && !singleQuote) {
355  result += zwsp;
356  }
357  }
358 
359  return result;
360 }
361 
363 {
364  int length = 0;
365  const auto chrs = text.toUcs4();
366  for (auto chr : chrs) {
367  auto script = QChar::script(chr);
368  /* clang-format off */
369  if (script == QChar::Script_Han
370  || script == QChar::Script_Hangul
371  || script == QChar::Script_Hiragana
372  || script == QChar::Script_Katakana
373  || script == QChar::Script_Yi
374  || QChar::isHighSurrogate(chr)) { /* clang-format on */
375  length += 2;
376  } else {
377  length += 1;
378  }
379  }
380  return length;
381 }
void setPatternOptions(QRegularExpression::PatternOptions options)
int indexOf(QChar ch, int from, Qt::CaseSensitivity cs) const const
KCOREADDONS_EXPORT int logicalLength(const QString &text)
Returns the length that reflects the density of information in the text.
KCOREADDONS_EXPORT QString from8Bit(const char *str)
Construct QString from a c string, guessing whether it is UTF8- or Local8Bit-encoded.
QChar::Script script() const const
KCOREADDONS_EXPORT QString rsqueeze(const QString &str, int maxlen=40)
Substitute characters at the end of a string by "...".
int capturedStart(int nth) const const
QRegularExpressionMatchIterator globalMatch(const QString &subject, int offset, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions) const const
int size() const const
QString join(const QString &separator) const const
bool isHighSurrogate() const const
int matchedLength() const const
int indexIn(const QString &str, int offset, QRegExp::CaretMode caretMode) const const
QRegularExpressionMatch next()
int count(const T &value) const const
QString fromLocal8Bit(const char *str, int size)
void append(const T &value)
QString fromUtf8(const char *str, int size)
KCOREADDONS_EXPORT QString preProcessWrap(const QString &text)
Preprocesses the given string in order to provide additional line breaking opportunities for QTextLay...
KCOREADDONS_EXPORT bool isUtf8(const char *str)
Guess whether a string is UTF8 encoded.
QStringRef leftRef(int n) const const
int capturedEnd(int nth) const const
bool isEmpty() const const
QString trimmed() const const
QStringList split(const QString &sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
typedef Iterator
QStringRef rightRef(int n) const const
KCOREADDONS_EXPORT QString csqueeze(const QString &str, int maxlen=40)
Substitute characters at the middle of a string by "...".
bool isPunct() const const
QList::iterator end()
QStringRef midRef(int position, int n) const const
KCOREADDONS_EXPORT QStringList perlSplit(const QString &sep, const QString &s, int max=0)
Split a QString into a QStringList in a similar fashion to the static QStringList function in Qt...
QString & replace(int position, int n, QChar after)
const QChar * unicode() const const
QString mid(int position, int n) const const
KCOREADDONS_EXPORT QString capwords(const QString &text)
Capitalizes each word in the string "hello there" becomes "Hello There" (string)
int length() const const
void reserve(int size)
bool isEmpty() const const
QVector< uint > toUcs4() const const
KCOREADDONS_EXPORT QString lsqueeze(const QString &str, int maxlen=40)
Substitute characters at the beginning of a string by "...".
KCOREADDONS_EXPORT QString obscure(const QString &str)
Obscure string by using a simple symmetric encryption.
QList::iterator begin()
bool isSymbol() const const
KCOREADDONS_EXPORT QString tagUrls(const QString &text)
This method auto-detects URLs in strings, and adds HTML markup to them so that richtext or HTML-enabl...
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sun Apr 18 2021 23:02:02 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.