• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • applications API Reference
  • KDE Home
  • Contact Us
 

Kate

  • kde-4.14
  • applications
  • kate
  • part
  • buffer
katetextloader.h
Go to the documentation of this file.
1 /* This file is part of the Kate project.
2  *
3  * Copyright (C) 2010 Christoph Cullmann <cullmann@kde.org>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB. If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  */
20 
21 #ifndef KATE_TEXTLOADER_H
22 #define KATE_TEXTLOADER_H
23 
24 #include <QtCore/QString>
25 #include <QtCore/QFile>
26 #include <QtCore/QCryptographicHash>
27 
28 // on the fly compression
29 #include <kfilterdev.h>
30 #include <kmimetype.h>
31 
32 namespace Kate {
33 
39 static const qint64 KATE_FILE_LOADER_BS = 256 * 1024;
40 
44 class TextLoader
45 {
46  public:
52  TextLoader (const QString &filename, KEncodingProber::ProberType proberType)
53  : m_codec (0)
54  , m_eof (false) // default to not eof
55  , m_lastWasEndOfLine (true) // at start of file, we had a virtual newline
56  , m_lastWasR (false) // we have not found a \r as last char
57  , m_position (0)
58  , m_lastLineStart (0)
59  , m_eol (TextBuffer::eolUnknown) // no eol type detected atm
60  , m_buffer (KATE_FILE_LOADER_BS, 0)
61  , m_digest (QCryptographicHash::Md5)
62  , m_converterState (0)
63  , m_bomFound (false)
64  , m_firstRead (true)
65  , m_proberType (proberType)
66  {
67  // try to get mimetype for on the fly decompression, don't rely on filename!
68  QFile testMime (filename);
69  if (testMime.open (QIODevice::ReadOnly))
70  m_mimeType = KMimeType::findByContent (&testMime)->name ();
71  else
72  m_mimeType = KMimeType::findByPath (filename, 0, false)->name ();
73 
74  // construct filter device
75  m_file = KFilterDev::deviceForFile (filename, m_mimeType, false);
76  }
77 
81  ~TextLoader ()
82  {
83  delete m_file;
84  delete m_converterState;
85  }
86 
92  bool open (QTextCodec *codec)
93  {
94  m_codec = codec;
95  m_eof = false;
96  m_lastWasEndOfLine = true;
97  m_lastWasR = false;
98  m_position = 0;
99  m_lastLineStart = 0;
100  m_eol = TextBuffer::eolUnknown;
101  m_text.clear ();
102  delete m_converterState;
103  m_converterState = new QTextCodec::ConverterState (QTextCodec::ConvertInvalidToNull);
104  m_bomFound = false;
105  m_firstRead = true;
106 
107  // if already opened, close the file...
108  if (m_file->isOpen())
109  m_file->close ();
110 
111  return m_file->open (QIODevice::ReadOnly);
112  }
113 
118  bool eof () const { return m_eof && !m_lastWasEndOfLine && (m_lastLineStart == m_text.length()); }
119 
125  TextBuffer::EndOfLineMode eol () const { return m_eol; }
126 
131  bool byteOrderMarkFound () const { return m_bomFound; }
132 
137  const QString &mimeTypeForFilterDev () const { return m_mimeType; }
138 
143  const QChar *unicode () const { return m_text.unicode(); }
144 
149  QTextCodec *textCodec () const { return m_codec; }
150 
157  bool readLine (int &offset, int &length)
158  {
159  length = 0;
160  offset = 0;
161  bool encodingError = false;
162 
163  static const QLatin1Char cr(QLatin1Char('\r'));
164  static const QLatin1Char lf(QLatin1Char('\n'));
165 
171  bool failedToConvertOnce = false;
172 
176  while (m_position <= m_text.length())
177  {
178  if (m_position == m_text.length())
179  {
180  // try to load more text if something is around
181  if (!m_eof)
182  {
183  // kill the old lines...
184  m_text.remove (0, m_lastLineStart);
185 
186  // try to read new data
187  const int c = m_file->read(m_buffer.data(), m_buffer.size());
188 
189  // if any text is there, append it....
190  if (c > 0)
191  {
192  // update md5 hash sum
193  m_digest.addData (m_buffer.data(), c);
194 
195  // detect byte order marks & codec for byte order markers on first read
196  int bomBytes = 0;
197  if (m_firstRead) {
198  // use first 16 bytes max to allow BOM detection of codec
199  QByteArray bom (m_buffer.data(), qMin (16, c));
200  QTextCodec *codecForByteOrderMark = QTextCodec::codecForUtfText (bom, 0);
201 
202  // if codec != null, we found a BOM!
203  if (codecForByteOrderMark) {
204  m_bomFound = true;
205 
206  // eat away the different boms!
207  int mib = codecForByteOrderMark->mibEnum ();
208  if (mib == 106) // utf8
209  bomBytes = 3;
210  if (mib == 1013 || mib == 1014 || mib == 1015) // utf16
211  bomBytes = 2;
212  if (mib == 1017 || mib == 1018 || mib == 1019) // utf32
213  bomBytes = 4;
214  }
215 
219  if (!m_codec) {
223  if (codecForByteOrderMark)
224  m_codec = codecForByteOrderMark;
225  else {
229  KEncodingProber prober (m_proberType);
230  prober.feed (m_buffer.constData(), c);
231 
232  // we found codec with some confidence?
233  if (prober.confidence() > 0.5)
234  m_codec = QTextCodec::codecForName(prober.encoding());
235 
236  // no codec, no chance, encoding error
237  if (!m_codec)
238  return false;
239  }
240  }
241 
242  m_firstRead = false;
243  }
244 
245  Q_ASSERT (m_codec);
246  QString unicode = m_codec->toUnicode (m_buffer.constData() + bomBytes, c - bomBytes, m_converterState);
247 
248  // detect broken encoding
249  for (int i = 0; i < unicode.size(); ++i) {
250  if (unicode[i] == 0) {
251  encodingError = true;
252  break;
253  }
254  }
255 
256  m_text.append (unicode);
257  }
258 
259  // is file completely read ?
260  m_eof = (c == -1) || (c == 0);
261 
262  // recalc current pos and last pos
263  m_position -= m_lastLineStart;
264  m_lastLineStart = 0;
265  }
266 
267  // oh oh, end of file, escape !
268  if (m_eof && (m_position == m_text.length()))
269  {
270  m_lastWasEndOfLine = false;
271 
272  // line data
273  offset = m_lastLineStart;
274  length = m_position-m_lastLineStart;
275 
276  m_lastLineStart = m_position;
277 
278  return !encodingError && !failedToConvertOnce;
279  }
280 
281  // empty? try again
282  if (m_position == m_text.length()) {
283  failedToConvertOnce = true;
284  continue;
285  }
286  }
287 
288  if (m_text.at(m_position) == lf)
289  {
290  m_lastWasEndOfLine = true;
291 
292  if (m_lastWasR)
293  {
294  m_lastLineStart++;
295  m_lastWasR = false;
296  m_eol = TextBuffer::eolDos;
297  }
298  else
299  {
300  // line data
301  offset = m_lastLineStart;
302  length = m_position-m_lastLineStart;
303 
304  m_lastLineStart = m_position+1;
305  m_position++;
306 
307  // only win, if not dos!
308  if (m_eol != TextBuffer::eolDos)
309  m_eol = TextBuffer::eolUnix;
310 
311  return !encodingError;
312  }
313  }
314  else if (m_text.at(m_position) == cr)
315  {
316  m_lastWasEndOfLine = true;
317  m_lastWasR = true;
318 
319  // line data
320  offset = m_lastLineStart;
321  length = m_position-m_lastLineStart;
322 
323  m_lastLineStart = m_position+1;
324  m_position++;
325 
326  // should only win of first time!
327  if (m_eol == TextBuffer::eolUnknown)
328  m_eol = TextBuffer::eolMac;
329 
330  return !encodingError;
331  }
332  else if (m_text.at(m_position) == QChar::LineSeparator)
333  {
334  m_lastWasEndOfLine = true;
335 
336  // line data
337  offset = m_lastLineStart;
338  length = m_position-m_lastLineStart;
339 
340  m_lastLineStart = m_position+1;
341  m_position++;
342 
343  return !encodingError;
344  }
345  else
346  {
347  m_lastWasEndOfLine = false;
348  m_lastWasR = false;
349  }
350 
351  m_position++;
352  }
353 
354  return !encodingError;
355  }
356 
357  QByteArray digest ()
358  {
359  return m_digest.result ();
360  }
361 
362  private:
363  QTextCodec *m_codec;
364  bool m_eof;
365  bool m_lastWasEndOfLine;
366  bool m_lastWasR;
367  int m_position;
368  int m_lastLineStart;
369  TextBuffer::EndOfLineMode m_eol;
370  QString m_mimeType;
371  QIODevice *m_file;
372  QByteArray m_buffer;
373  QCryptographicHash m_digest;
374  QString m_text;
375  QTextCodec::ConverterState *m_converterState;
376  bool m_bomFound;
377  bool m_firstRead;
378  KEncodingProber::ProberType m_proberType;
379 };
380 
381 }
382 
383 #endif
QIODevice
Kate::TextBuffer::eolMac
Definition: katetextbuffer.h:63
QString::append
QString & append(QChar ch)
Kate::TextLoader::eol
TextBuffer::EndOfLineMode eol() const
Detected end of line mode for this file.
Definition: katetextloader.h:125
QByteArray
Kate::TextBuffer::eolDos
Definition: katetextbuffer.h:62
QChar
Kate::TextLoader::eof
bool eof() const
end of file reached?
Definition: katetextloader.h:118
Kate::TextLoader::textCodec
QTextCodec * textCodec() const
Get codec for this loader.
Definition: katetextloader.h:149
QString::size
int size() const
QString::remove
QString & remove(int position, int n)
QIODevice::open
virtual bool open(QFlags< QIODevice::OpenModeFlag > mode)
QTextCodec::ConverterState
QIODevice::close
virtual void close()
Kate::TextBuffer::EndOfLineMode
EndOfLineMode
End of line mode.
Definition: katetextbuffer.h:59
QFile
QString::clear
void clear()
Kate::TextLoader::byteOrderMarkFound
bool byteOrderMarkFound() const
BOM found?
Definition: katetextloader.h:131
Kate::TextLoader::open
bool open(QTextCodec *codec)
open file with given codec
Definition: katetextloader.h:92
QByteArray::constData
const char * constData() const
QCryptographicHash::addData
void addData(const char *data, int length)
QIODevice::read
qint64 read(char *data, qint64 maxSize)
QIODevice::isOpen
bool isOpen() const
QString
QTextCodec
QFile::open
virtual bool open(QFlags< QIODevice::OpenModeFlag > mode)
QLatin1Char
Kate::TextLoader::TextLoader
TextLoader(const QString &filename, KEncodingProber::ProberType proberType)
Construct file loader for given file.
Definition: katetextloader.h:52
Kate::TextLoader::~TextLoader
~TextLoader()
Destructor.
Definition: katetextloader.h:81
Kate::KATE_FILE_LOADER_BS
static const qint64 KATE_FILE_LOADER_BS
loader block size, load 256 kb at once per default if file size is smaller, fall back to file size mu...
Definition: katetextloader.h:39
Kate::TextLoader::digest
QByteArray digest()
Definition: katetextloader.h:357
QString::unicode
const QChar * unicode() const
Kate::TextBuffer::eolUnix
Definition: katetextbuffer.h:61
Kate::TextBuffer::eolUnknown
Definition: katetextbuffer.h:60
Kate::TextLoader::readLine
bool readLine(int &offset, int &length)
read a line, return length + offset in unicode data
Definition: katetextloader.h:157
QCryptographicHash
QString::at
const QChar at(int position) const
Kate::TextLoader::mimeTypeForFilterDev
const QString & mimeTypeForFilterDev() const
mime type used to create filter dev
Definition: katetextloader.h:137
QTextCodec::codecForName
QTextCodec * codecForName(const QByteArray &name)
QString::length
int length() const
QByteArray::data
char * data()
QCryptographicHash::result
QByteArray result() const
Kate::TextLoader
File Loader, will handle reading of files + detecting encoding.
Definition: katetextloader.h:44
Kate::TextBuffer
Class representing a text buffer.
Definition: katetextbuffer.h:48
Kate::TextLoader::unicode
const QChar * unicode() const
internal unicode data array
Definition: katetextloader.h:143
QByteArray::size
int size() const
QTextCodec::toUnicode
QString toUnicode(const QByteArray &a) const
QTextCodec::codecForUtfText
QTextCodec * codecForUtfText(const QByteArray &ba)
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sat May 9 2020 03:56:58 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Kate

Skip menu "Kate"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

applications API Reference

Skip menu "applications API Reference"
  •   kate
  •       kate
  •   KTextEditor
  •   Kate
  • Konsole

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal