KCoreAddons

kstringhandler.cpp
1 /*
2  This file is part of the KDE libraries
3 
4  SPDX-FileCopyrightText: 1999 Ian Zepp <[email protected]>
5  SPDX-FileCopyrightText: 2006 Dominic Battre <[email protected]>
6  SPDX-FileCopyrightText: 2006 Martin Pool <[email protected]>
7 
8  SPDX-License-Identifier: LGPL-2.0-or-later
9 */
10 
11 #include "kstringhandler.h"
12 
13 #include <stdlib.h> // random()
14 
15 #if KCOREADDONS_BUILD_DEPRECATED_SINCE(5, 67)
16 #include <QRegExp> // for the word ranges
17 #endif
18 #include <QRegularExpression>
19 #include <QVector>
20 
21 //
22 // Capitalization routines
23 //
25 {
26  if (text.isEmpty()) {
27  return text;
28  }
29 
30  const QString strippedText = text.trimmed();
31  const QString space = QString(QLatin1Char(' '));
32  const QStringList words = capwords(strippedText.split(space));
33 
34  QString result = text;
35  result.replace(strippedText, words.join(space));
36  return result;
37 }
38 
40 {
41  QStringList tmp = list;
42  for (auto &str : tmp) {
43  str[0] = str.at(0).toUpper();
44  }
45  return tmp;
46 }
47 
48 QString KStringHandler::lsqueeze(const QString &str, const int maxlen)
49 {
50  if (str.length() > maxlen) {
51  const int part = maxlen - 3;
52  return QLatin1String("...") + QStringView(str).right(part);
53  } else {
54  return str;
55  }
56 }
57 
58 QString KStringHandler::csqueeze(const QString &str, const int maxlen)
59 {
60  if (str.length() > maxlen && maxlen > 3) {
61  const int part = (maxlen - 3) / 2;
62  const QStringView strView{str};
63  return strView.left(part) + QLatin1String("...") + strView.right(part);
64  } else {
65  return str;
66  }
67 }
68 
69 QString KStringHandler::rsqueeze(const QString &str, const int maxlen)
70 {
71  if (str.length() > maxlen) {
72  const int part = maxlen - 3;
73  return QStringView(str).left(part) + QLatin1String("...");
74  } else {
75  return str;
76  }
77 }
78 
80 {
81  const bool ignoreMax = max == 0;
82 
83  const int sepLength = sep.size();
84 
85  QStringList list;
86  int searchStart = 0;
87  int sepIndex = str.indexOf(sep, searchStart);
88 
89  while (sepIndex != -1 && (ignoreMax || list.count() < max - 1)) {
90  const auto chunk = str.mid(searchStart, sepIndex - searchStart);
91  if (!chunk.isEmpty()) {
92  list.append(chunk.toString());
93  }
94 
95  searchStart = sepIndex + sepLength;
96  sepIndex = str.indexOf(sep, searchStart);
97  }
98 
99  const auto lastChunk = str.mid(searchStart, str.length() - searchStart);
100  if (!lastChunk.isEmpty()) {
101  list.append(lastChunk.toString());
102  }
103 
104  return list;
105 }
106 
107 QStringList KStringHandler::perlSplit(const QString &sep, const QString &s, int max)
108 {
109  return perlSplit(QStringView(sep), QStringView(s), max);
110 }
111 
112 QStringList KStringHandler::perlSplit(const QChar &sep, const QString &str, int max)
113 {
114  return perlSplit(QStringView(&sep, 1), QStringView(str), max);
115 }
116 
117 #if KCOREADDONS_BUILD_DEPRECATED_SINCE(5, 67)
118 QStringList KStringHandler::perlSplit(const QRegExp &sep, const QString &s, const int max)
119 {
120  // nothing to split
121  if (s.isEmpty()) {
122  return QStringList();
123  }
124 
125  const bool ignoreMax = 0 == max;
126 
127  QStringList l;
128 
129  int searchStart = 0;
130  int tokenStart = sep.indexIn(s, searchStart);
131  int len = sep.matchedLength();
132 
133  while (-1 != tokenStart && (ignoreMax || l.count() < max - 1)) {
134  if (!s.midRef(searchStart, tokenStart - searchStart).isEmpty()) {
135  l << s.mid(searchStart, tokenStart - searchStart);
136  }
137 
138  searchStart = tokenStart + len;
139  tokenStart = sep.indexIn(s, searchStart);
140  len = sep.matchedLength();
141  }
142 
143  if (!s.midRef(searchStart, s.length() - searchStart).isEmpty()) {
144  l << s.mid(searchStart, s.length() - searchStart);
145  }
146 
147  return l;
148 }
149 #endif
150 
152 {
153  // nothing to split
154  if (str.isEmpty()) {
155  return QStringList();
156  }
157 
158  const bool ignoreMax = max == 0;
159 
160  QStringList list;
161 
162  int start = 0;
163 
164  const QStringView strView(str);
165 
166  QRegularExpression separator(sep);
168 
169  QRegularExpressionMatchIterator iter = separator.globalMatch(strView);
171  while (iter.hasNext() && (ignoreMax || list.count() < max - 1)) {
172  match = iter.next();
173  const QStringView chunk = strView.mid(start, match.capturedStart() - start);
174  if (!chunk.isEmpty()) {
175  list.append(chunk.toString());
176  }
177 
178  start = match.capturedEnd();
179  }
180 
181  // catch the remainder
182  const QStringView lastChunk = strView.mid(start, strView.size() - start);
183  if (!lastChunk.isEmpty()) {
184  list.append(lastChunk.toString());
185  }
186 
187  return list;
188 }
189 
191 {
192  QString richText(text);
193 
194  static const QRegularExpression urlEx(QStringLiteral(R"((www\.(?!\.)|(fish|ftp|http|https)://[\d\w./,:_~?=&;#@\-+%$()]+))"),
196  // The reference \1 is going to be replaced by the matched url
197  richText.replace(urlEx, QStringLiteral("<a href=\"\\1\">\\1</a>"));
198  return richText;
199 }
200 
202 {
203  QString result;
204  for (const QChar ch : str) {
205  // yes, no typo. can't encode ' ' or '!' because
206  // they're the unicode BOM. stupid scrambling. stupid.
207  const ushort uc = ch.unicode();
208  result += (uc <= 0x21) ? ch : QChar(0x1001F - uc);
209  }
210 
211  return result;
212 }
213 
214 bool KStringHandler::isUtf8(const char *buf)
215 {
216  int i;
217  int n;
218  unsigned char c;
219  bool gotone = false;
220 
221  if (!buf) {
222  return true; // whatever, just don't crash
223  }
224 
225 #define F 0 /* character never appears in text */
226 #define T 1 /* character appears in plain ASCII text */
227 #define I 2 /* character appears in ISO-8859 text */
228 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
229  /* clang-format off */
230  static const unsigned char text_chars[256] = {
231  /* BEL BS HT LF FF CR */
232  F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
233  /* ESC */
234  F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
235  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
236  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
237  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
238  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
239  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
240  T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
241  /* NEL */
242  X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
243  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
244  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
245  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
246  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
247  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
248  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
249  I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
250  };
251  /* clang-format on */
252 
253  /* *ulen = 0; */
254  for (i = 0; (c = buf[i]); ++i) {
255  if ((c & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
256  /*
257  * Even if the whole file is valid UTF-8 sequences,
258  * still reject it if it uses weird control characters.
259  */
260 
261  if (text_chars[c] != T) {
262  return false;
263  }
264 
265  } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
266  return false;
267  } else { /* 11xxxxxx begins UTF-8 */
268  int following;
269 
270  if ((c & 0x20) == 0) { /* 110xxxxx */
271  following = 1;
272  } else if ((c & 0x10) == 0) { /* 1110xxxx */
273  following = 2;
274  } else if ((c & 0x08) == 0) { /* 11110xxx */
275  following = 3;
276  } else if ((c & 0x04) == 0) { /* 111110xx */
277  following = 4;
278  } else if ((c & 0x02) == 0) { /* 1111110x */
279  following = 5;
280  } else {
281  return false;
282  }
283 
284  for (n = 0; n < following; ++n) {
285  i++;
286  if (!(c = buf[i])) {
287  goto done;
288  }
289 
290  if ((c & 0x80) == 0 || (c & 0x40)) {
291  return false;
292  }
293  }
294  gotone = true;
295  }
296  }
297 done:
298  return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
299 }
300 
301 #undef F
302 #undef T
303 #undef I
304 #undef X
305 
307 {
308  if (!str) {
309  return QString();
310  }
311  if (!*str) {
312  static const QLatin1String emptyString("");
313  return emptyString;
314  }
316 }
317 
319 {
320  const QChar zwsp(0x200b);
321 
322  QString result;
323  result.reserve(text.length());
324 
325  for (int i = 0; i < text.length(); i++) {
326  const QChar c = text[i];
327  const bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('['));
328  const bool singleQuote = (c == QLatin1Char('\''));
329  const bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']'));
330  const bool breakAfter = (closingParens || c.isPunct() || c.isSymbol());
331  const bool isLastChar = i == (text.length() - 1);
332  const bool isLower = c.isLower();
333  const bool nextIsUpper = !isLastChar && text[i + 1].isUpper(); // false by default
334  const bool nextIsSpace = isLastChar || text[i + 1].isSpace(); // true by default
335  const bool prevIsSpace = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp);
336 
337  // Provide a breaking opportunity before opening parenthesis
338  if (openingParens && !prevIsSpace) {
339  result += zwsp;
340  }
341 
342  // Provide a word joiner before the single quote
343  if (singleQuote && !prevIsSpace) {
344  result += QChar(0x2060);
345  }
346 
347  result += c;
348 
349  // Provide a breaking opportunity between camelCase and PascalCase sub-words
350  const bool isCamelCase = isLower && nextIsUpper;
351 
352  if (isCamelCase || (breakAfter && !openingParens && !nextIsSpace && !singleQuote)) {
353  result += zwsp;
354  }
355  }
356 
357  return result;
358 }
359 
361 {
362  int length = 0;
363  const auto chrs = text.toUcs4();
364  for (const auto chr : chrs) {
365  const auto script = QChar::script(chr);
366  /* clang-format off */
367  if (script == QChar::Script_Han
368  || script == QChar::Script_Hangul
369  || script == QChar::Script_Hiragana
370  || script == QChar::Script_Katakana
371  || script == QChar::Script_Yi
372  || QChar::isHighSurrogate(chr)) { /* clang-format on */
373  length += 2;
374  } else {
375  length += 1;
376  }
377  }
378  return length;
379 }
void append(const T &value)
QStringView right(qsizetype length) const const
QString fromUtf8(const char *str, int size)
QChar::Script script() const const
KCOREADDONS_EXPORT QString preProcessWrap(const QString &text)
Preprocesses the given string in order to provide additional line breaking opportunities for QTextLay...
QStringRef midRef(int position, int n) const const
QStringList split(const QString &sep, QString::SplitBehavior behavior, Qt::CaseSensitivity cs) const const
int count(const T &value) const const
KCOREADDONS_EXPORT int logicalLength(const QString &text)
Returns the length that reflects the density of information in the text.
KCOREADDONS_EXPORT QString obscure(const QString &str)
Obscure string by using a simple symmetric encryption.
QString trimmed() const const
QStringView mid(qsizetype start) const const
Q_SCRIPTABLE Q_NOREPLY void start()
bool isHighSurrogate() const const
void reserve(int size)
const QChar * unicode() const const
KCOREADDONS_EXPORT QString rsqueeze(const QString &str, int maxlen=40)
Substitute characters at the end of a string by "...".
KCOREADDONS_EXPORT QStringList perlSplit(const QStringView sep, const QStringView str, int max)
Split a string into a QStringList in a similar fashion to the static QStringList function in Qt,...
KCOREADDONS_EXPORT QString from8Bit(const char *str)
Construct QString from a c string, guessing whether it is UTF8- or Local8Bit-encoded.
QStringView left(qsizetype length) const const
QRegularExpressionMatch next()
qsizetype size() const const
int matchedLength() const const
QString toString() const const
bool isUpper() const const
KCOREADDONS_EXPORT QString lsqueeze(const QString &str, int maxlen=40)
Substitute characters at the beginning of a string by "...".
KCOREADDONS_EXPORT QString csqueeze(const QString &str, int maxlen=40)
Substitute characters at the middle of a string by "...".
QString fromLocal8Bit(const char *str, int size)
void setPatternOptions(QRegularExpression::PatternOptions options)
int indexIn(const QString &str, int offset, QRegExp::CaretMode caretMode) const const
bool isEmpty() const const
int length() const const
const T & at(int i) const const
KCOREADDONS_EXPORT QString tagUrls(const QString &text)
This method auto-detects URLs in strings, and adds HTML markup to them so that richtext or HTML-enabl...
QString join(const QString &separator) const const
bool isPunct() const const
QString & replace(int position, int n, QChar after)
QVector< uint > toUcs4() const const
KCOREADDONS_EXPORT QString capwords(const QString &text)
Capitalizes each word in the string "hello there" becomes "Hello There" (string)
bool isEmpty() const const
QString left(int n) const const
KCOREADDONS_EXPORT bool isUtf8(const char *str)
Guess whether a string is UTF8 encoded.
int length() const const
bool isEmpty() const const
bool isSymbol() const const
qsizetype indexOf(QChar c, qsizetype from, Qt::CaseSensitivity cs) const const
QLatin1String right(int length) const const
QRegularExpressionMatchIterator globalMatch(const QString &subject, int offset, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions) const const
QString mid(int position, int n) const const
bool isLower() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2023 The KDE developers.
Generated on Mon May 8 2023 04:04:52 by doxygen 1.8.17 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.