• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdesdk API Reference
  • KDE Home
  • Contact Us
 

okteta

  • sources
  • kde-4.12
  • kdesdk
  • okteta
  • kasten
  • controllers
  • view
  • structures
  • datatypes
  • strings
utf8stringdata.cpp
Go to the documentation of this file.
1 /*
2  * This file is part of the Okteta Kasten Framework, made within the KDE community.
3  *
4  * Copyright 2011 Alex Richardson <alex.richardson@gmx.de>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) version 3, or any
10  * later version accepted by the membership of KDE e.V. (or its
11  * successor approved by the membership of KDE e.V.), which shall
12  * act as a proxy defined in Section 6 of version 3 of the license.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 
24 
25 #include "utf8stringdata.h"
26 
27 #include <QVarLengthArray>
28 
29 #include <KLocale>
30 #include <KDebug> //TODO remove
31 
32 #include <abstractbytearraymodel.h>
33 
34 #include "../topleveldatainformation.h"
35 #include "../dummydatainformation.h"
36 #include "stringdatainformation.h"
37 
38 Utf8StringData::Utf8StringData(StringDataInformation* parent)
39  : StringData(parent), mOneByteCount(0), mTwoByteCount(0), mThreeByteCount(0), mFourByteCount(0), mNonBMPCount(0)
40 {
41 }
42 
43 Utf8StringData::~Utf8StringData()
44 {
45 }
46 
47 QString Utf8StringData::charType() const
48 {
49  return i18n("UTF8 char");
50 }
51 
52 QString Utf8StringData::typeName() const
53 {
54  return i18n("UTF8 string");
55 }
56 
57 uint Utf8StringData::count() const
58 {
59  return mCodePoints.size();
60 }
61 
62 QString Utf8StringData::stringValue(int row) const
63 {
64  Q_ASSERT((uint)row < count());
65  //TODO show invalid values
66  uint val = mCodePoints.at(row);
67  QString number = QString::number(val, 16).toUpper();
68  if (number.length() == 1)
69  number = QLatin1String("0") + number;
70  if (val > UNICODE_MAX)
71  return i18n("Value too big: 0x%1", number);
72  else if (val > BMP_MAX) {
73  QString ret(2, Qt::Uninitialized);
74  ret[0] = QChar::highSurrogate(val);
75  ret[1] = QChar::lowSurrogate(val);
76  return i18n("%1 (U+%2)", ret, number);
77  }
78  else
79  return i18n("%1 (U+%2)", QString(QChar(mCodePoints.at(row))), number);
80 }
81 
82 QString Utf8StringData::completeString(bool skipInvalid) const
83 {
84  QVarLengthArray<QChar> data(mCodePoints.size() + mNonBMPCount);
85  int codePointCount = mCodePoints.size();
86  int i = 0;
87  for (int idx = 0; idx < codePointCount; ++idx) {
88  uint val = mCodePoints.at(idx);
89  //if error at idx is set also skip
90  if (val > UNICODE_MAX || mErrorIndices.value(idx))
91  {
92  if (skipInvalid)
93  continue;
94  else
95  data[i] = QChar::ReplacementCharacter;
96  }
97  else if (val > BMP_MAX) {
98  data[i] = QChar::highSurrogate(val);
99  i++;
100  data[i] = QChar::lowSurrogate(val);
101  }
102  else
103  {
104  data[i] = QChar((ushort)val);
105  }
106  i++;
107  }
108  return QString(data.constData(), i);
109 }
110 
111 qint64 Utf8StringData::read(Okteta::AbstractByteArrayModel* input, Okteta::Address address,
112  BitCount64 bitsRemaining)
113 {
114  const int oldSize = count();
115  mNonBMPCount = 0;
116  mOneByteCount = 0;
117  mTwoByteCount = 0;
118  mThreeByteCount = 0;
119  mFourByteCount = 0;
120  if (mMode == CharCount)
121  {
122  mCodePoints.reserve(mLength.maxChars);
123  }
124  else if (mMode == ByteCount)
125  {
126  mCodePoints.reserve(mLength.maxBytes / 1.5); //just a guess, assuming 1.5 bytes per char
127  }
128 
129  mParent->topLevelDataInformation()->_childCountAboutToChange(mParent, oldSize, 0);
130  mParent->topLevelDataInformation()->_childCountChanged(mParent, oldSize, 0);
131 
132  const uint oldMax = mCodePoints.size();
133  quint64 remaining = bitsRemaining;
134  Okteta::Address addr = address;
135  uint count = 0;
136  mEofReached = false;
137  if (((mMode & CharCount) && mLength.maxChars == 0)
138  || ((mMode & ByteCount) && mLength.maxBytes < 2))
139  return 0;
140 
141  bool eofAtStart = false;
142  if (bitsRemaining < 8)
143  eofAtStart = true;
144 
145  while (true)
146  {
147  if (remaining < 8)
148  {
149  mEofReached = true;
150  break;
151  }
152  uint codePoint;
153  quint8 byte = input->byte(addr);
154  bool terminate = false;
155 
156  if (byte <= ASCII_MAX)
157  {
158  mOneByteCount++;
159  codePoint = byte;
160  }
161  else if ((byte & 0xe0) == 0xc0)
162  {
163  //two byte sequence
164  if (byte == 0xc0 || byte == 0xc1)
165  {
166  mOneByteCount++;
167  mErrorIndices[count] = 1;
168  codePoint = byte;
169  }
170  else if (remaining < 16)
171  {
172  mOneByteCount++;
173  mEofReached = true;
174  mErrorIndices[count] = 1;
175  codePoint = byte;
176  }
177  else
178  {
179  mTwoByteCount++;
180  remaining -= 8;
181  addr++;
182  quint8 byte2 = input->byte(addr);
183  if ((byte2 & 0xc0) != 0x80)
184  {
185  mErrorIndices[count] = 2;
186  codePoint = (byte << 8) | byte2; //just put the raw bytes in case of error
187  }
188  else
189  {
190  codePoint = (byte2 & 0x3f) | ((byte & 0x1f) << 6);
191  }
192  }
193  }
194  else if ((byte & 0xf0) == 0xe0)
195  {
196  if (remaining < 16)
197  {
198  mEofReached = true;
199  mErrorIndices[count] = 1;
200  codePoint = byte;
201  mOneByteCount++;
202  }
203  else if (remaining < 24)
204  {
205  mEofReached = true;
206  mErrorIndices[count] = 2;
207  remaining -= 8;
208  addr++;
209  codePoint = (byte << 8) | input->byte(addr);
210  mTwoByteCount++;
211  }
212  else {
213  mThreeByteCount++;
214  remaining -= 16;
215  addr++;
216  quint8 byte2 = input->byte(addr);
217  addr++;
218  quint8 byte3 = input->byte(addr);
219  if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80)
220  {
221  mErrorIndices[count] = 3;
222  codePoint = (byte << 16) | (byte2 << 8) | byte3; //just put the raw bytes in case of error
223  }
224  else
225  {
226  codePoint = (byte3 & 0x3f) | ((byte2 & 0x3f) << 6) | ((byte & 0x1f) << 12);
227  }
228  }
229  }
230  else if ((byte & 0xf8) == 0xf0)
231  {
232  if (remaining < 16)
233  {
234  mEofReached = true;
235  mErrorIndices[count] = 1;
236  codePoint = byte;
237  mOneByteCount++;
238  }
239  else if (remaining < 24)
240  {
241  mEofReached = true;
242  mErrorIndices[count] = 2;
243  addr++;
244  remaining -= 8;
245  codePoint = (byte << 8) | input->byte(addr);
246  mTwoByteCount++;
247  }
248  else if (remaining < 32)
249  {
250  mEofReached = true;
251  mErrorIndices[count] = 3;
252  codePoint = (byte << 16) | (input->byte(addr + 1) << 8) | input->byte(addr + 2);
253  addr += 2;
254  remaining -= 16;
255  mThreeByteCount++;
256  }
257  else
258  {
259  mFourByteCount++;
260  remaining -= 24;
261  addr++;
262  quint8 byte2 = input->byte(addr);
263  addr++;
264  quint8 byte3 = input->byte(addr);
265  addr++;
266  quint8 byte4 = input->byte(addr);
267  if ((byte2 & 0xc0) != 0x80 || (byte3 & 0xc0) != 0x80 || (byte4 & 0xc0) != 0x80)
268  {
269  mErrorIndices[count] = 3;
270  codePoint = (byte << 16) | (byte2 << 8) | byte3; //just put the raw bytes in case of error
271  }
272  else
273  {
274  codePoint = (byte4 & 0x3f) | ((byte3 & 0x3f) << 6)
275  | ((byte2 & 0x3f) << 12) | ((byte & 0x1f) << 18);
276  if (codePoint > UNICODE_MAX)
277  {
278  mErrorIndices[count] = 4;
279  //just put the raw bytes in case of error
280  codePoint = (byte << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
281  }
282  }
283  }
284  }
285  else {
286  mErrorIndices[count] = 1;
287  codePoint = byte;
288  mOneByteCount++;
289  }
290 
291  if (codePoint > BMP_MAX)
292  mNonBMPCount++;
293  if (count < oldMax)
294  mCodePoints[count] = codePoint;
295  else
296  mCodePoints.append(codePoint);
297 
298  remaining -= 8;
299  addr++;
300  count++;
301 
302  //now check if we have to terminate
303  if (mMode & Sequence)
304  {
305  if (codePoint == mTerminationCodePoint)
306  terminate = true;
307  }
308  if (mMode & ByteCount)
309  {
310  // divide by two in case someone set length to an odd number of bytes
311  if (uint(addr - address) >= mLength.maxBytes)
312  terminate = true;
313  }
314  if (mMode & CharCount)
315  {
316  if (count >= mLength.maxChars)
317  terminate = true;
318  }
319  if (mMode == None) {
320  kDebug() << "no termination mode set!!";
321  Q_ASSERT(false);
322  }
323  if (terminate)
324  break;
325  }
326  mCodePoints.resize(count);
327  mParent->topLevelDataInformation()->_childCountAboutToChange(mParent, 0, count);
328  mParent->topLevelDataInformation()->_childCountChanged(mParent, 0, count);
329 
330  if (eofAtStart)
331  return -1;
332  return (addr - address) * 8;
333 }
334 
335 BitCount32 Utf8StringData::size() const
336 {
337  //add 16 for every non BMP char, since they use 32 bits
338  return (mOneByteCount + mTwoByteCount * 2 + mThreeByteCount * 3 + mFourByteCount * 4) * 8;
339 }
340 
341 BitCount32 Utf8StringData::sizeAt(uint i) const
342 {
343  Q_ASSERT(i <= count());
344  quint8 isError = mErrorIndices[i];
345  if (isError)
346  return isError * 8; //error is number of bytes
347  uint val = mCodePoints.at(i);
348  if (val < 0x80)
349  return 8;
350  else if (val < 0x7ff)
351  return 16;
352  else if (val < 0xffff)
353  return 24;
354  else
355  return 32;
356 }
Okteta::Address
qint32 Address
Definition: address.h:34
Okteta::AbstractByteArrayModel
could it be useful to hide the data access behind an iterator? * class KDataBufferIterator { public: ...
Definition: abstractbytearraymodel.h:79
abstractbytearraymodel.h
StringData
Definition: stringdata.h:36
DataInformation::topLevelDataInformation
TopLevelDataInformation * topLevelDataInformation() const
Definition: datainformation.cpp:240
Utf8StringData::count
virtual uint count() const
Definition: utf8stringdata.cpp:57
TopLevelDataInformation::_childCountAboutToChange
void _childCountAboutToChange(DataInformation *sender, uint oldCount, uint newCount)
Definition: topleveldatainformation.h:184
Utf8StringData::size
virtual BitCount32 size() const
Definition: utf8stringdata.cpp:335
BitCount64
quint64 BitCount64
Definition: datainformationbase.h:42
TopLevelDataInformation::_childCountChanged
void _childCountChanged(DataInformation *sender, uint oldCount, uint newCount)
Definition: topleveldatainformation.h:192
StringData::ByteCount
Definition: stringdata.h:45
Utf8StringData::sizeAt
virtual BitCount32 sizeAt(uint i) const
Definition: utf8stringdata.cpp:341
StringData::BMP_MAX
static const uint BMP_MAX
Definition: stringdata.h:79
stringdatainformation.h
BitCount32
quint32 BitCount32
Definition: datainformationbase.h:37
Utf8StringData::stringValue
virtual QString stringValue(int row) const
Definition: utf8stringdata.cpp:62
StringData::ASCII_MAX
static const char ASCII_MAX
Definition: stringdata.h:80
Utf8StringData::typeName
virtual QString typeName() const
Definition: utf8stringdata.cpp:52
StringData::mTerminationCodePoint
quint32 mTerminationCodePoint
Definition: stringdata.h:87
StringData::Sequence
Definition: stringdata.h:43
Okteta::AbstractByteArrayModel::byte
virtual Byte byte(Address offset) const =0
locates working range The idea behind is to tell buffer which range will be requested in the followin...
StringData::mLength
union StringData::@5 mLength
StringData::mMode
uint mMode
Definition: stringdata.h:88
StringData::mParent
StringDataInformation * mParent
Definition: stringdata.h:82
Utf8StringData::completeString
virtual QString completeString(bool skipInvalid=false) const
Definition: utf8stringdata.cpp:82
Utf8StringData::charType
virtual QString charType() const
Definition: utf8stringdata.cpp:47
Utf8StringData::read
virtual qint64 read(Okteta::AbstractByteArrayModel *input, Okteta::Address address, BitCount64 bitsRemaining)
Definition: utf8stringdata.cpp:111
Utf8StringData::Utf8StringData
Utf8StringData(StringDataInformation *parent)
Definition: utf8stringdata.cpp:38
StringData::None
Definition: stringdata.h:42
utf8stringdata.h
Utf8StringData::~Utf8StringData
virtual ~Utf8StringData()
Definition: utf8stringdata.cpp:43
StringData::mEofReached
bool mEofReached
Definition: stringdata.h:90
StringData::CharCount
Definition: stringdata.h:44
StringDataInformation
Definition: stringdatainformation.h:39
StringData::UNICODE_MAX
static const uint UNICODE_MAX
Definition: stringdata.h:78
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 23:04:09 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

okteta

Skip menu "okteta"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdesdk API Reference

Skip menu "kdesdk API Reference"
  • kapptemplate
  • kcachegrind
  • kompare
  • lokalize
  • okteta
  • umbrello
  •   umbrello

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal