KHtml

kencodingdetector.h
1 /*
2  This file is part of the KDE libraries
3 
4  Copyright (C) 1999 Lars Knoll ([email protected])
5  Copyright (C) 2007 Nick Shaforostoff ([email protected])
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Library General Public
9  License as published by the Free Software Foundation; either
10  version 2 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Library General Public License for more details.
16 
17  You should have received a copy of the GNU Library General Public License
18  along with this library; see the file COPYING.LIB. If not, write to
19  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  Boston, MA 02110-1301, USA.
21 
22 */
23 #ifndef KENCODINGDETECTOR_H
24 #define KENCODINGDETECTOR_H
25 
26 #include <QString>
27 
28 class QTextCodec;
29 class QTextDecoder;
30 class KEncodingDetectorPrivate;
31 
58 {
59 public:
60  enum EncodingChoiceSource {
61  DefaultEncoding,
62  AutoDetectedEncoding,
63  BOM,
64  EncodingFromXMLHeader,
65  EncodingFromMetaTag,
66  EncodingFromHTTPHeader,
67  UserChosenEncoding
68  };
69 
70  enum AutoDetectScript {
71  None,
72  SemiautomaticDetection,
73  Arabic,
74  Baltic,
75  CentralEuropean,
76  ChineseSimplified,
77  ChineseTraditional,
78  Cyrillic,
79  Greek,
80  Hebrew,
81  Japanese,
82  Korean,
83  NorthernSaami,
84  SouthEasternEurope,
85  Thai,
86  Turkish,
87  Unicode,
88  WesternEuropean
89  };
90 
95 
99  KEncodingDetector(QTextCodec *codec, EncodingChoiceSource source, AutoDetectScript script = None);
101 
102  //const QTextCodec* codec() const;
103 
107  bool setEncoding(const char *encoding, EncodingChoiceSource type);
108 
113  const char *encoding() const;
114 
115  bool visuallyOrdered() const;
116 
117 // void setAutoDetectLanguage( const QString& );
118 // const QString& autoDetectLanguage() const;
119 
120  void setAutoDetectLanguage(AutoDetectScript);
121  AutoDetectScript autoDetectLanguage() const;
122 
123  EncodingChoiceSource encodingChoiceSource() const;
124 
132  QString decode(const char *data, int len);
133  QString decode(const QByteArray &data);
134 
135  //* You don't need to call analyze() if you use this method.
145  QString decodeWithBuffering(const char *data, int len);
146 
159  bool decodedInvalidCharacters() const;
160 
170  void resetDecoder();
171 
176  QString flush();
177 
181  static AutoDetectScript scriptForName(const QString &lang);
182  static QString nameForScript(AutoDetectScript);
183  static bool hasAutoDetectionForScript(AutoDetectScript);
184 
185 protected:
190  bool processNull(char *data, int length);
191 
199  bool errorsIfUtf8(const char *data, int length);
200 
205  bool analyze(const char *data, int len);
206 
211 
212 private:
213  KEncodingDetectorPrivate *const d;
214 };
215 
216 #endif
bool decodedInvalidCharacters() const
This method checks whether invalid characters were found during a decoding operation.
Provides encoding detection capabilities.
QString decodeWithBuffering(const char *data, int len)
Convenience method that uses buffering.
static AutoDetectScript scriptForName(const QString &lang)
Takes lang name after it were i18n()&#39;ed.
QTextDecoder * decoder()
KEncodingDetector()
Default codec is latin1 (as html spec says), EncodingChoiceSource is default, AutoDetectScript=Semiau...
bool processNull(char *data, int length)
This nice method will kill all 0 bytes (or double bytes) and remember if this was a binary or not ;) ...
bool setEncoding(const char *encoding, EncodingChoiceSource type)
QString flush()
Convenience method to be used with decodeForHtml.
bool errorsIfUtf8(const char *data, int length)
Check if we are really utf8.
void resetDecoder()
Resets the decoder.
QString decode(const char *data, int len)
The main class method.
const char * encoding() const
Convenience method.
bool analyze(const char *data, int len)
Analyze text data.
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sat Sep 19 2020 22:46:01 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.