• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • applications API Reference
  • KDE Home
  • Contact Us
 

Kate

  • sources
  • kde-4.12
  • applications
  • kate
  • part
  • buffer
katetextloader.h
Go to the documentation of this file.
1 /* This file is part of the Kate project.
2  *
3  * Copyright (C) 2010 Christoph Cullmann <cullmann@kde.org>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB. If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  */
20 
21 #ifndef KATE_TEXTLOADER_H
22 #define KATE_TEXTLOADER_H
23 
24 #include <QtCore/QString>
25 #include <QtCore/QFile>
26 #include <QtCore/QCryptographicHash>
27 
28 // on the fly compression
29 #include <kfilterdev.h>
30 #include <kmimetype.h>
31 
32 namespace Kate {
33 
39 static const qint64 KATE_FILE_LOADER_BS = 256 * 1024;
40 
44 class TextLoader
45 {
46  public:
52  TextLoader (const QString &filename, KEncodingProber::ProberType proberType)
53  : m_codec (0)
54  , m_eof (false) // default to not eof
55  , m_lastWasEndOfLine (true) // at start of file, we had a virtual newline
56  , m_lastWasR (false) // we have not found a \r as last char
57  , m_position (0)
58  , m_lastLineStart (0)
59  , m_eol (TextBuffer::eolUnknown) // no eol type detected atm
60  , m_buffer (KATE_FILE_LOADER_BS, 0)
61  , m_digest (QCryptographicHash::Md5)
62  , m_converterState (0)
63  , m_bomFound (false)
64  , m_firstRead (true)
65  , m_proberType (proberType)
66  {
67  // try to get mimetype for on the fly decompression, don't rely on filename!
68  QFile testMime (filename);
69  if (testMime.open (QIODevice::ReadOnly))
70  m_mimeType = KMimeType::findByContent (&testMime)->name ();
71  else
72  m_mimeType = KMimeType::findByPath (filename, 0, false)->name ();
73 
74  // construct filter device
75  m_file = KFilterDev::deviceForFile (filename, m_mimeType, false);
76  }
77 
81  ~TextLoader ()
82  {
83  delete m_file;
84  delete m_converterState;
85  }
86 
92  bool open (QTextCodec *codec)
93  {
94  m_codec = codec;
95  m_eof = false;
96  m_lastWasEndOfLine = true;
97  m_lastWasR = false;
98  m_position = 0;
99  m_lastLineStart = 0;
100  m_eol = TextBuffer::eolUnknown;
101  m_text.clear ();
102  delete m_converterState;
103  m_converterState = new QTextCodec::ConverterState (QTextCodec::ConvertInvalidToNull);
104  m_bomFound = false;
105  m_firstRead = true;
106 
107  // if already opened, close the file...
108  if (m_file->isOpen())
109  m_file->close ();
110 
111  return m_file->open (QIODevice::ReadOnly);
112  }
113 
118  bool eof () const { return m_eof && !m_lastWasEndOfLine && (m_lastLineStart == m_text.length()); }
119 
125  TextBuffer::EndOfLineMode eol () const { return m_eol; }
126 
131  bool byteOrderMarkFound () const { return m_bomFound; }
132 
137  const QString &mimeTypeForFilterDev () const { return m_mimeType; }
138 
143  const QChar *unicode () const { return m_text.unicode(); }
144 
149  QTextCodec *textCodec () const { return m_codec; }
150 
157  bool readLine (int &offset, int &length)
158  {
159  length = 0;
160  offset = 0;
161  bool encodingError = false;
162 
163  static const QLatin1Char cr(QLatin1Char('\r'));
164  static const QLatin1Char lf(QLatin1Char('\n'));
165 
171  bool failedToConvertOnce = false;
172 
176  while (m_position <= m_text.length())
177  {
178  if (m_position == m_text.length())
179  {
180  // try to load more text if something is around
181  if (!m_eof)
182  {
183  int c = m_file->read (m_buffer.data(), m_buffer.size());
184 
185  // update md5 hash sum
186  m_digest.addData (m_buffer.data(), c);
187 
188  // kill the old lines...
189  m_text.remove (0, m_lastLineStart);
190 
191  // if any text is there, append it....
192  if (c > 0)
193  {
194  // detect byte order marks & codec for byte order markers on first read
195  int bomBytes = 0;
196  if (m_firstRead) {
197  // use first 16 bytes max to allow BOM detection of codec
198  QByteArray bom (m_buffer.data(), qMin (16, c));
199  QTextCodec *codecForByteOrderMark = QTextCodec::codecForUtfText (bom, 0);
200 
201  // if codec != null, we found a BOM!
202  if (codecForByteOrderMark) {
203  m_bomFound = true;
204 
205  // eat away the different boms!
206  int mib = codecForByteOrderMark->mibEnum ();
207  if (mib == 106) // utf8
208  bomBytes = 3;
209  if (mib == 1013 || mib == 1014 || mib == 1015) // utf16
210  bomBytes = 2;
211  if (mib == 1017 || mib == 1018 || mib == 1019) // utf32
212  bomBytes = 4;
213  }
214 
218  if (!m_codec) {
222  if (codecForByteOrderMark)
223  m_codec = codecForByteOrderMark;
224  else {
228  KEncodingProber prober (m_proberType);
229  prober.feed (m_buffer.constData(), c);
230 
231  // we found codec with some confidence?
232  if (prober.confidence() > 0.5)
233  m_codec = QTextCodec::codecForName(prober.encoding());
234 
235  // no codec, no chance, encoding error
236  if (!m_codec)
237  return false;
238  }
239  }
240 
241  m_firstRead = false;
242  }
243 
244  Q_ASSERT (m_codec);
245  QString unicode = m_codec->toUnicode (m_buffer.constData() + bomBytes, c - bomBytes, m_converterState);
246 
247  // detect broken encoding
248  for (int i = 0; i < unicode.size(); ++i) {
249  if (unicode[i] == 0) {
250  encodingError = true;
251  break;
252  }
253  }
254 
255  m_text.append (unicode);
256  }
257 
258  // is file completely read ?
259  m_eof = (c == -1) || (c == 0);
260 
261  // recalc current pos and last pos
262  m_position -= m_lastLineStart;
263  m_lastLineStart = 0;
264  }
265 
266  // oh oh, end of file, escape !
267  if (m_eof && (m_position == m_text.length()))
268  {
269  m_lastWasEndOfLine = false;
270 
271  // line data
272  offset = m_lastLineStart;
273  length = m_position-m_lastLineStart;
274 
275  m_lastLineStart = m_position;
276 
277  return !encodingError && !failedToConvertOnce;
278  }
279 
280  // empty? try again
281  if (m_position == m_text.length()) {
282  failedToConvertOnce = true;
283  continue;
284  }
285  }
286 
287  if (m_text.at(m_position) == lf)
288  {
289  m_lastWasEndOfLine = true;
290 
291  if (m_lastWasR)
292  {
293  m_lastLineStart++;
294  m_lastWasR = false;
295  m_eol = TextBuffer::eolDos;
296  }
297  else
298  {
299  // line data
300  offset = m_lastLineStart;
301  length = m_position-m_lastLineStart;
302 
303  m_lastLineStart = m_position+1;
304  m_position++;
305 
306  // only win, if not dos!
307  if (m_eol != TextBuffer::eolDos)
308  m_eol = TextBuffer::eolUnix;
309 
310  return !encodingError;
311  }
312  }
313  else if (m_text.at(m_position) == cr)
314  {
315  m_lastWasEndOfLine = true;
316  m_lastWasR = true;
317 
318  // line data
319  offset = m_lastLineStart;
320  length = m_position-m_lastLineStart;
321 
322  m_lastLineStart = m_position+1;
323  m_position++;
324 
325  // should only win of first time!
326  if (m_eol == TextBuffer::eolUnknown)
327  m_eol = TextBuffer::eolMac;
328 
329  return !encodingError;
330  }
331  else if (m_text.at(m_position) == QChar::LineSeparator)
332  {
333  m_lastWasEndOfLine = true;
334 
335  // line data
336  offset = m_lastLineStart;
337  length = m_position-m_lastLineStart;
338 
339  m_lastLineStart = m_position+1;
340  m_position++;
341 
342  return !encodingError;
343  }
344  else
345  {
346  m_lastWasEndOfLine = false;
347  m_lastWasR = false;
348  }
349 
350  m_position++;
351  }
352 
353  return !encodingError;
354  }
355 
356  QByteArray digest ()
357  {
358  return m_digest.result ();
359  }
360 
361  private:
362  QTextCodec *m_codec;
363  bool m_eof;
364  bool m_lastWasEndOfLine;
365  bool m_lastWasR;
366  int m_position;
367  int m_lastLineStart;
368  TextBuffer::EndOfLineMode m_eol;
369  QString m_mimeType;
370  QIODevice *m_file;
371  QByteArray m_buffer;
372  QCryptographicHash m_digest;
373  QString m_text;
374  QTextCodec::ConverterState *m_converterState;
375  bool m_bomFound;
376  bool m_firstRead;
377  KEncodingProber::ProberType m_proberType;
378 };
379 
380 }
381 
382 #endif
KFilterDev::deviceForFile
static QIODevice * deviceForFile(const QString &fileName, const QString &mimetype=QString(), bool forceFilter=false)
Kate::TextBuffer::eolMac
Definition: katetextbuffer.h:63
qint64
Kate::TextLoader::eol
TextBuffer::EndOfLineMode eol() const
Detected end of line mode for this file.
Definition: katetextloader.h:125
kmimetype.h
kfilterdev.h
Kate::TextBuffer::eolDos
Definition: katetextbuffer.h:62
Kate::TextLoader::eof
bool eof() const
end of file reached?
Definition: katetextloader.h:118
Kate::TextLoader::textCodec
QTextCodec * textCodec() const
Get codec for this loader.
Definition: katetextloader.h:149
KEncodingProber::feed
ProberState feed(const QByteArray &data)
QString
Kate::TextBuffer::EndOfLineMode
EndOfLineMode
End of line mode.
Definition: katetextbuffer.h:59
Kate::TextLoader::byteOrderMarkFound
bool byteOrderMarkFound() const
BOM found?
Definition: katetextloader.h:131
KEncodingProber
Kate::TextLoader::open
bool open(QTextCodec *codec)
open file with given codec
Definition: katetextloader.h:92
KEncodingProber::confidence
float confidence() const
Kate::TextLoader::TextLoader
TextLoader(const QString &filename, KEncodingProber::ProberType proberType)
Construct file loader for given file.
Definition: katetextloader.h:52
Kate::TextLoader::~TextLoader
~TextLoader()
Destructor.
Definition: katetextloader.h:81
KEncodingProber::encoding
QByteArray encoding() const
Kate::KATE_FILE_LOADER_BS
static const qint64 KATE_FILE_LOADER_BS
loader block size, load 256 kb at once per default if file size is smaller, fall back to file size mu...
Definition: katetextloader.h:39
Kate::TextLoader::digest
QByteArray digest()
Definition: katetextloader.h:356
Kate::TextBuffer::eolUnix
Definition: katetextbuffer.h:61
Kate::TextBuffer::eolUnknown
Definition: katetextbuffer.h:60
Kate::TextLoader::readLine
bool readLine(int &offset, int &length)
read a line, return length + offset in unicode data
Definition: katetextloader.h:157
Kate::TextLoader::mimeTypeForFilterDev
const QString & mimeTypeForFilterDev() const
mime type used to create filter dev
Definition: katetextloader.h:137
KEncodingProber::ProberType
ProberType
Kate::TextLoader
File Loader, will handle reading of files + detecting encoding.
Definition: katetextloader.h:44
Kate::TextBuffer
Class representing a text buffer.
Definition: katetextbuffer.h:48
Kate::TextLoader::unicode
const QChar * unicode() const
internal unicode data array
Definition: katetextloader.h:143
QIODevice
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:31:53 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

Kate

Skip menu "Kate"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

applications API Reference

Skip menu "applications API Reference"
  •   kate
  •       kate
  •   KTextEditor
  •   Kate
  • Applications
  •   Libraries
  •     libkonq
  • Konsole

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal