28 #include "wtf/DisallowCType.h"
29 #include "wtf/ASCIICType.h"
35 #include "operations.h"
37 #include "identifier.h"
40 #include "commonunicode.h"
57 extern const double NaN;
58 extern const double Inf;
60 static inline size_t overflowIndicator()
62 return std::numeric_limits<size_t>::max();
64 static inline size_t maxUChars()
71 static inline UChar *allocChars(
size_t length)
74 if (length > maxUChars()) {
77 return static_cast<UChar *
>(fastMalloc(
sizeof(UChar) * length));
80 static inline UChar *reallocChars(UChar *buffer,
size_t length)
83 if (length > maxUChars()) {
86 return static_cast<UChar *
>(fastRealloc(buffer,
sizeof(UChar) * length));
89 CString::CString(
const char *c)
92 data =
new char[length + 1];
93 memcpy(data, c, length + 1);
96 CString::CString(
const char *c,
size_t len)
99 data =
new char[len + 1];
100 memcpy(data, c, len);
104 CString::CString(
const CString &b)
107 if (length > 0 && b.data) {
108 data =
new char[length + 1];
109 memcpy(data, b.data, length + 1);
120 CString &CString::operator=(
const char *c)
126 data =
new char[length + 1];
127 memcpy(data, c, length + 1);
132 CString &CString::operator=(
const CString &str)
143 data =
new char[length + 1];
144 memcpy(data, str.data, length + 1);
152 bool operator==(
const CString &c1,
const CString &c2)
154 size_t len = c1.size();
155 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
159 static unsigned short almostUChar;
160 UString::Rep UString::Rep::null = { 0, 0, 1, 0, 0, &UString::Rep::null, 0,
nullptr, 0, 0, 0, 0 };
161 UString::Rep UString::Rep::empty = { 0, 0, 1, 0, 0, &UString::Rep::empty, 0,
reinterpret_cast<UChar *
>(&almostUChar), 0, 0, 0, 0 };
162 const int normalStatBufferSize = 4096;
163 static char *statBuffer =
nullptr;
164 static int statBufferSize = 0;
166 PassRefPtr<UString::Rep> UString::Rep::createCopying(
const UChar *d,
int length)
168 UChar *copyD = allocChars(length);
169 memcpy(copyD, d, length *
sizeof(UChar));
171 return create(copyD, length);
174 PassRefPtr<UString::Rep> UString::Rep::create(UChar *d,
int l)
187 r->usedPreCapacity = 0;
194 PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<Rep> base,
int offset,
int length)
198 int baseOffset = base->offset;
200 base = base->baseString;
202 assert(-(offset + baseOffset) <= base->usedPreCapacity);
203 assert(offset + baseOffset + length <= base->usedCapacity);
206 r->offset = baseOffset + offset;
211 r->baseString = base.releaseRef();
216 r->usedPreCapacity = 0;
223 void UString::Rep::destroy()
226 Identifier::remove(
this);
228 if (baseString !=
this) {
238 const unsigned PHI = 0x9e3779b9U;
242 unsigned UString::Rep::computeHash(
const UChar *s,
int len)
254 tmp = (s[1].uc << 11) ^ hash;
255 hash = (hash << 16) ^ tmp;
286 unsigned UString::Rep::computeHash(
const char *s,
int len)
301 hash += (
unsigned char)s[0];
302 tmp = ((
unsigned char)s[1] << 11) ^ hash;
303 hash = (hash << 16) ^ tmp;
310 hash += (
unsigned char)s[0];
332 unsigned UString::Rep::computeHash(
const char *s)
334 return computeHash(s, strlen(s));
338 inline size_t UString::expandedSize(
size_t size,
size_t otherSize)
const
343 if (size > maxUChars()) {
344 return overflowIndicator();
347 size_t expandedSize = ((size + 10) / 10 * 11) + 1;
348 if (maxUChars() - expandedSize < otherSize) {
349 return overflowIndicator();
352 return expandedSize + otherSize;
355 inline int UString::usedCapacity()
const
357 return m_rep->baseString->usedCapacity;
360 inline int UString::usedPreCapacity()
const
362 return m_rep->baseString->usedPreCapacity;
365 void UString::expandCapacity(
int requiredLength)
367 Rep *r = m_rep->baseString;
369 if (requiredLength > r->capacity) {
370 size_t newCapacity = expandedSize(requiredLength, r->preCapacity);
371 UChar *oldBuf = r->buf;
372 r->buf = reallocChars(r->buf, newCapacity);
378 r->capacity = newCapacity - r->preCapacity;
380 if (requiredLength > r->usedCapacity) {
381 r->usedCapacity = requiredLength;
385 void UString::expandPreCapacity(
int requiredPreCap)
387 Rep *r = m_rep->baseString;
389 if (requiredPreCap > r->preCapacity) {
390 size_t newCapacity = expandedSize(requiredPreCap, r->capacity);
391 int delta = newCapacity - r->capacity - r->preCapacity;
393 UChar *newBuf = allocChars(newCapacity);
398 memcpy(newBuf + delta, r->buf, (r->capacity + r->preCapacity) *
sizeof(UChar));
402 r->preCapacity = newCapacity - r->capacity;
404 if (requiredPreCap > r->usedPreCapacity) {
405 r->usedPreCapacity = requiredPreCap;
409 UString::UString(Empty)
415 : m_rep(
Rep::create(allocChars(1), 1))
417 m_rep->buf[0] =
static_cast<unsigned char>(c);
432 size_t length = strlen(c);
433 UChar *d = allocChars(length);
437 for (
size_t i = 0; i < length; i++) {
440 m_rep = Rep::create(d,
static_cast<int>(length));
456 UChar *d = allocChars(length);
460 for (
size_t i = 0; i < length; i++) {
463 m_rep = Rep::create(d,
static_cast<int>(length));
472 m_rep = Rep::createCopying(c, length);
481 m_rep = Rep::createCopying(c, length);
483 m_rep = Rep::create(c, length);
489 if (!buffer.size()) {
492 m_rep = Rep::createCopying(buffer.data(), buffer.size());
498 int aSize = a.
size();
499 int aOffset = a.m_rep->offset;
500 int bSize = b.
size();
501 int bOffset = b.m_rep->offset;
502 int length = aSize + bSize;
509 }
else if (bSize == 0) {
512 }
else if (aOffset + aSize == a.usedCapacity() && aSize >= minShareSize && 4 * aSize >= bSize &&
513 (-bOffset != b.usedPreCapacity() || aSize >= bSize)) {
519 x.expandCapacity(aOffset + length);
522 m_rep = Rep::create(a.m_rep, 0, length);
526 }
else if (-bOffset == b.usedPreCapacity() && bSize >= minShareSize && 4 * bSize >= aSize) {
531 y.expandPreCapacity(-bOffset + aSize);
534 m_rep = Rep::create(b.m_rep, -aSize, length);
540 size_t newCapacity = expandedSize(length, 0);
541 UChar *d = allocChars(newCapacity);
545 memcpy(d, a.
data(), aSize *
sizeof(
UChar));
546 memcpy(d + aSize, b.
data(), bSize *
sizeof(
UChar));
547 m_rep = Rep::create(d, length);
548 m_rep->capacity = newCapacity;
561 UChar buf[1 +
sizeof(i) * 3];
562 UChar *end = buf +
sizeof(buf) /
sizeof(
UChar);
567 }
else if (i == INT_MIN) {
568 char minBuf[1 +
sizeof(i) * 3];
569 sprintf(minBuf,
"%d", INT_MIN);
572 bool negative =
false;
578 *--p = (
unsigned short)((i % 10) +
'0');
586 return UString(p,
static_cast<int>(end - p));
591 UChar buf[
sizeof(u) * 3];
592 UChar *end = buf +
sizeof(buf) /
sizeof(
UChar);
599 *--p = (
unsigned short)((u % 10) +
'0');
604 return UString(p,
static_cast<int>(end - p));
609 UChar buf[1 +
sizeof(l) * 3];
610 UChar *end = buf +
sizeof(buf) /
sizeof(
UChar);
615 }
else if (l == LONG_MIN) {
616 char minBuf[1 +
sizeof(l) * 3];
617 sprintf(minBuf,
"%ld", LONG_MIN);
620 bool negative =
false;
626 *--p = (
unsigned short)((l % 10) +
'0');
634 return UString(p,
static_cast<int>(end - p));
648 char *result = kjs_dtoa(d, 0, 0, &decimalPoint, &sign,
nullptr);
649 int length =
static_cast<int>(strlen(result));
656 if (decimalPoint <= 0 && decimalPoint > -6) {
659 for (
int j = decimalPoint; j < 0; j++) {
662 strcpy(buf + i, result);
664 }
else if (decimalPoint <= 21 && decimalPoint > 0) {
665 if (length <= decimalPoint) {
666 strcpy(buf + i, result);
668 for (
int j = 0; j < decimalPoint - length; j++) {
673 strncpy(buf + i, result, decimalPoint);
676 strcpy(buf + i, result + decimalPoint);
677 i += length - decimalPoint;
679 }
else if (result[0] <
'0' || result[0] >
'9') {
680 strcpy(buf + i, result);
683 buf[i++] = result[0];
686 strcpy(buf + i, result + 1);
691 buf[i++] = (decimalPoint >= 0) ?
'+' :
'-';
694 int exponential = decimalPoint - 1;
695 if (exponential < 0) {
696 exponential = exponential * -1;
698 if (exponential >= 100) {
699 buf[i++] =
'0' + exponential / 100;
701 if (exponential >= 10) {
702 buf[i++] =
'0' + (exponential % 100) / 10;
704 buf[i++] =
'0' + exponential % 10;
708 kjs_freedtoa(result);
713 UString UString::spliceSubstringsWithSeparators(
const Range *substringRanges,
int rangeCount,
const UString *separators,
int separatorCount)
const
715 if (rangeCount == 1 && separatorCount == 0) {
716 int thisSize =
size();
717 int position = substringRanges[0].position;
718 int length = substringRanges[0].length;
719 if (position <= 0 && length >= thisSize) {
722 return UString::Rep::create(m_rep, maxInt(0, position), minInt(thisSize, length));
726 for (
int i = 0; i < rangeCount; i++) {
727 totalLength += substringRanges[i].length;
729 for (
int i = 0; i < separatorCount; i++) {
730 totalLength += separators[i].
size();
733 if (totalLength == 0) {
737 UChar *buffer = allocChars(totalLength);
742 int maxCount = max(rangeCount, separatorCount);
744 for (
int i = 0; i < maxCount; i++) {
745 if (i < rangeCount) {
746 memcpy(buffer + bufferPos,
data() + substringRanges[i].position, substringRanges[i].length *
sizeof(UChar));
747 bufferPos += substringRanges[i].length;
749 if (i < separatorCount) {
750 memcpy(buffer + bufferPos, separators[i].
data(), separators[i].
size() *
sizeof(UChar));
751 bufferPos += separators[i].
size();
755 return UString::Rep::create(buffer, totalLength);
763 int subSize = subStr.
size();
767 }
else if (subPos >= subSize) {
773 if (subPos + subLength >= subSize) {
774 subLength = subSize - subPos;
782 int thisSize =
size();
783 int thisOffset = m_rep->offset;
784 int tSize = t.
size();
785 int length = thisSize + tSize;
791 }
else if (tSize == 0) {
793 }
else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
795 expandCapacity(thisOffset + length);
797 memcpy(
const_cast<UChar *
>(
data() + thisSize), t.
data(), tSize *
sizeof(UChar));
801 }
else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
803 expandCapacity(thisOffset + length);
805 memcpy(
const_cast<UChar *
>(
data() + thisSize), t.
data(), tSize *
sizeof(UChar));
806 m_rep = Rep::create(m_rep, 0, length);
810 size_t newCapacity = expandedSize(length, 0);
811 UChar *d = allocChars(newCapacity);
815 memcpy(d,
data(), thisSize *
sizeof(UChar));
816 memcpy(
const_cast<UChar *
>(d + thisSize), t.
data(), tSize *
sizeof(UChar));
817 m_rep = Rep::create(d, length);
818 m_rep->capacity = newCapacity;
827 int thisSize =
size();
828 int thisOffset = m_rep->offset;
829 int tSize =
static_cast<int>(strlen(t));
830 int length = thisSize + tSize;
836 }
else if (tSize == 0) {
838 }
else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
840 expandCapacity(thisOffset + length);
841 UChar *d =
const_cast<UChar *
>(
data());
843 for (
int i = 0; i < tSize; ++i) {
844 d[thisSize + i] = t[i];
849 }
else if (thisOffset + thisSize == usedCapacity() && thisSize >= minShareSize) {
851 expandCapacity(thisOffset + length);
852 UChar *d =
const_cast<UChar *
>(
data());
854 for (
int i = 0; i < tSize; ++i) {
855 d[thisSize + i] = t[i];
857 m_rep = Rep::create(m_rep, 0, length);
861 size_t newCapacity = expandedSize(length, 0);
862 UChar *d = allocChars(newCapacity);
866 memcpy(d,
data(), thisSize *
sizeof(UChar));
867 for (
int i = 0; i < tSize; ++i) {
868 d[thisSize + i] = t[i];
870 m_rep = Rep::create(d, length);
871 m_rep->capacity = newCapacity;
880 int thisOffset = m_rep->offset;
886 size_t newCapacity = expandedSize(1, 0);
887 UChar *d = allocChars(newCapacity);
892 m_rep = Rep::create(d, 1);
893 m_rep->capacity = newCapacity;
895 }
else if (m_rep->baseIsSelf() && m_rep->rc == 1) {
897 expandCapacity(thisOffset + length + 1);
898 UChar *d =
const_cast<UChar *
>(
data());
901 m_rep->len = length + 1;
904 }
else if (thisOffset + length == usedCapacity() && length >= minShareSize) {
906 expandCapacity(thisOffset + length + 1);
907 UChar *d =
const_cast<UChar *
>(
data());
910 m_rep = Rep::create(m_rep, 0, length + 1);
914 size_t newCapacity = expandedSize(length + 1, 0);
915 UChar *d = allocChars(newCapacity);
919 memcpy(d,
data(), length *
sizeof(UChar));
921 m_rep = Rep::create(d, length + 1);
922 m_rep->capacity = newCapacity;
939 int neededSize = length + 1;
940 if (neededSize < normalStatBufferSize) {
941 neededSize = normalStatBufferSize;
943 if (neededSize != statBufferSize) {
944 delete [] statBuffer;
945 statBuffer =
new char [neededSize];
946 statBufferSize = neededSize;
950 char *q = statBuffer;
951 const UChar *limit = p + length;
953 *q =
static_cast<char>(p->uc);
962 UString &UString::operator=(Empty)
971 set(c, c ? strlen(c) : 0);
976 void UString::set(
const char *c,
int l)
989 if (m_rep->rc == 1 && l <= m_rep->capacity && m_rep->baseIsSelf() && m_rep->offset == 0 && m_rep->preCapacity == 0) {
999 m_rep = Rep::create(d, l);
1001 for (
int i = 0; i < l; i++) {
1002 d[i].uc =
static_cast<unsigned char>(c[i]);
1022 if (pos >=
size()) {
1028 double UString::toDouble(
bool tolerateTrailingJunk,
bool tolerateEmptyString)
const
1032 const int length =
size();
1033 int leadingSpaces = 0;
1036 while (leadingSpaces < length && CommonUnicode::isStrWhiteSpace(
data()[leadingSpaces].uc)) {
1040 UString whitespaceSkipped =
substr(leadingSpaces, length - leadingSpaces);
1044 if (!whitespaceSkipped.
is8Bit()) {
1048 const char *c = whitespaceSkipped.
ascii();
1052 return tolerateEmptyString ? 0.0 : NaN;
1056 if (*c ==
'0' && (*(c + 1) ==
'x' || *(c + 1) ==
'X')) {
1057 const char *firstDigitPosition = c + 2;
1061 if (*c >=
'0' && *c <=
'9') {
1062 d = d * 16.0 + *c -
'0';
1063 }
else if ((*c >=
'A' && *c <=
'F') || (*c >=
'a' && *c <=
'f')) {
1064 d = d * 16.0 + (*c & 0xdf) -
'A' + 10.0;
1070 if (d >= mantissaOverflowLowerBound) {
1071 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1076 d = kjs_strtod(c, &end);
1077 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1084 }
else if (*c ==
'-') {
1094 if (strncmp(c,
"Infinity", 8) == 0) {
1097 }
else if ((d == Inf || d == -Inf) && *c !=
'I' && *c !=
'i') {
1106 while (isASCIISpace(*c)) {
1110 if (!tolerateTrailingJunk && *c !=
'\0') {
1117 #ifdef __FAST_MATH__
1118 # error "KJS does not work correctly with -ffast-math"
1121 double UString::toDouble(
bool tolerateTrailingJunk)
const
1123 return toDouble(tolerateTrailingJunk,
true);
1126 double UString::toDouble()
const
1128 return toDouble(
false,
true);
1138 int len = m_rep->len;
1142 const UChar *p = m_rep->data();
1143 unsigned short c = p->
unicode();
1147 if (len == 1 && ok) {
1157 if (c < '0' || c >
'9') {
1160 const unsigned d = c -
'0';
1163 if (i > 0xFFFFFFFFU / 10) {
1169 const unsigned max = 0xFFFFFFFFU - d;
1184 c = (++p)->unicode();
1202 const UChar *end = data_ + sz - fsz;
1203 int fsizeminusone = (fsz - 1) *
sizeof(
UChar);
1205 unsigned short fchar = fdata->uc;
1207 for (
const UChar *c = data_ + pos; c <= end; c++)
1208 if (c->uc == fchar && !memcmp(c + 1, fdata, fsizeminusone)) {
1220 const UChar *data_ =
data();
1221 const UChar *end = data_ +
size();
1222 for (
const UChar *c = data_ + pos; c < end; c++)
1240 if (pos > sz - fsz) {
1246 int fsizeminusone = (fsz - 1) *
sizeof(
UChar);
1249 for (
const UChar *c = data_ + pos; c >= data_; c--) {
1250 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) {
1263 if (pos + 1 >=
size()) {
1266 const UChar *data_ =
data();
1267 for (
const UChar *c = data_ + pos; c >= data_; c--) {
1282 }
else if (pos >= s) {
1288 if (pos + len >= s) {
1292 if (pos == 0 && len == s) {
1296 return UString(Rep::create(m_rep, pos, len));
1300 return ::KJS::maxUChars();
1303 void UString::copyForWriting()
1309 if (m_rep->rc > 1 || !m_rep->baseIsSelf()) {
1310 UChar *n = allocChars(l);
1311 memcpy(n,
data(), l *
sizeof(UChar));
1312 m_rep = Rep::create(n, l);
1316 bool operator==(
const UString &s1,
const UString &s2)
1319 if (s1.m_rep == s2.m_rep) {
1324 if (s1.m_rep->len != s2.m_rep->len) {
1328 return (memcmp(s1.m_rep->data(), s2.m_rep->data(),
1329 s1.m_rep->len *
sizeof(UChar)) == 0);
1332 bool operator==(
const UString &s1,
const char *s2)
1334 if (s2 ==
nullptr) {
1335 return s1.isEmpty();
1338 const UChar *u = s1.data();
1339 const UChar *uend = u + s1.size();
1340 while (u != uend && *s2) {
1341 if (u->uc != (
unsigned char)*s2) {
1348 return u == uend && *s2 == 0;
1351 bool operator<(
const UString &s1,
const UString &s2)
1353 const int l1 = s1.size();
1354 const int l2 = s2.size();
1355 const int lmin = l1 < l2 ? l1 : l2;
1356 const UChar *c1 = s1.data();
1357 const UChar *c2 = s2.data();
1359 while (l < lmin && *c1 == *c2) {
1365 return (c1->uc < c2->uc);
1371 bool UString::equal(
const UString::Rep *r,
const UString::Rep *b)
1377 int length = r->len;
1378 if (length != b->len) {
1382 const UChar *d = r->data();
1383 const UChar *s = b->data();
1384 for (
int i = 0; i != length; ++i)
1385 if (d[i].uc != s[i].uc) {
1391 int compare(
const UString &s1,
const UString &s2)
1393 const int l1 = s1.size();
1394 const int l2 = s2.size();
1395 const int lmin = l1 < l2 ? l1 : l2;
1396 const UChar *c1 = s1.data();
1397 const UChar *c2 = s2.data();
1399 while (l < lmin && *c1 == *c2) {
1406 return (c1->uc > c2->uc) ? 1 : -1;
1413 return (l1 > l2) ? 1 : -1;
1416 inline int inlineUTF8SequenceLengthNonASCII(
char b0)
1418 if ((b0 & 0xC0) != 0xC0) {
1421 if ((b0 & 0xE0) == 0xC0) {
1424 if ((b0 & 0xF0) == 0xE0) {
1427 if ((b0 & 0xF8) == 0xF0) {
1433 int UTF8SequenceLengthNonASCII(
char b0)
1435 return inlineUTF8SequenceLengthNonASCII(b0);
1438 inline int inlineUTF8SequenceLength(
char b0)
1440 return (b0 & 0x80) == 0 ? 1 : UTF8SequenceLengthNonASCII(b0);
1446 int UTF8SequenceLength(
char b0)
1448 return (b0 & 0x80) == 0 ? 1 : inlineUTF8SequenceLengthNonASCII(b0);
1454 int decodeUTF8Sequence(
const char *sequence)
1457 const unsigned char b0 = sequence[0];
1458 const int length = inlineUTF8SequenceLength(b0);
1464 const unsigned char b1 = sequence[1];
1473 if ((b1 & 0xC0) != 0x80) {
1476 const unsigned char b2 = sequence[2];
1481 const int c = ((b0 & 0x1F) << 6) | (b1 & 0x3F);
1489 if ((b2 & 0xC0) != 0x80) {
1492 const unsigned char b3 = sequence[3];
1497 const int c = ((b0 & 0xF) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F);
1502 if (c >= 0xD800 && c <= 0xDFFF) {
1506 if (c == 0xFFFE || c == 0xFFFF) {
1513 if ((b3 & 0xC0) != 0x80) {
1516 const unsigned char b4 = sequence[4];
1521 const int c = ((b0 & 0x7) << 18) | ((b1 & 0x3F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
1522 if (c < 0x10000 || c > 0x10FFFF) {
1534 const int length =
size();
1535 Vector<char, 1024> buffer(length * 3);
1538 char *p = buffer.begin();
1539 const unsigned short *d = &
data()->uc;
1540 for (
int i = 0; i != length; ++i) {
1541 unsigned int c = d[i], sc;
1544 }
else if (c < 0x800) {
1545 *p++ = (char)((c >> 6) | 0xC0);
1546 *p++ = (char)((c | 0x80) & 0xBF);
1547 }
else if (c >= 0xD800 && c <= 0xDBFF && (i + 1) < length &&
1548 (sc = d[i + 1]) >= 0xDC00 && sc <= 0xDFFF) {
1549 sc = 0x10000 + (((c & 0x3FF) << 10) | (sc & 0x3FF));
1550 *p++ = (char)((sc >> 18) | 0xF0);
1551 *p++ = (char)(((sc >> 12) | 0x80) & 0xBF);
1552 *p++ = (char)(((sc >> 6) | 0x80) & 0xBF);
1553 *p++ = (char)((sc | 0x80) & 0xBF);
1556 *p++ = (char)((c >> 12) | 0xE0);
1557 *p++ = (char)(((c >> 6) | 0x80) & 0xBF);
1558 *p++ = (char)((c | 0x80) & 0xBF);
1563 CString result(buffer.data(), p - buffer.data());