KDECore
kencodingdetector.cpp
Go to the documentation of this file.
107 KEncodingDetectorPrivate(QTextCodec* codec,KEncodingDetector::EncodingChoiceSource source, KEncodingDetector::AutoDetectScript script)
129 return m_source != KEncodingDetector::DefaultEncoding && m_source != KEncodingDetector::AutoDetectedEncoding;
137 if ( ( ptr[ i ] >= 0x80 && ptr[ i ] <= 0x9F ) || ptr[ i ] == 0xA1 || ptr[ i ] == 0xA2 || ptr[ i ] == 0xA3
176 if ( ptr[ i ] == 0xA5 || ptr[ i ] == 0xAE || ptr[ i ] == 0xBE || ptr[ i ] == 0xC3 || ptr[ i ] == 0xD0 || ptr[ i ] == 0xE3 || ptr[ i ] == 0xF0 ) {
392 if ( ptr[ i ] == 0x80 || ( ptr[ i ] >= 0x82 && ptr[ i ] <= 0x87 ) || ptr[ i ] == 0x89 || ptr[ i ] == 0x8B
393 || ( ptr[ i ] >= 0x91 && ptr[ i ] <= 0x97 ) || ptr[ i ] == 0x99 || ptr[ i ] == 0x9B || ptr[ i ] == 0xA4
406 || ( ptr[ i ] >= 0x91 && ptr[ i ] <= 0x99 ) || ptr[ i ] == 0x9B || ptr[ i ] == 0xA1 || ( ptr[ i ] >= 0xBF && ptr[ i ] <= 0xC9 )
441 if ( ptr[ i ] == 0x80 || ( ptr[ i ] >= 0x82 && ptr[ i ] <= 0x8C ) || ( ptr[ i ] >= 0x91 && ptr[ i ] <= 0x9C ) || ptr[ i ] == 0x9F ) {
654 KEncodingDetector::KEncodingDetector(QTextCodec* codec, EncodingChoiceSource source, AutoDetectScript script) :
837 bool detected = analyze(d->m_bufferForDefferedEncDetection.constData(), d->m_bufferForDefferedEncDetection.length());
870 kWarning() << "KEncodingDetector:flush() "<< d->m_bufferForDefferedEncDetection.length()<<" bytes "<< d->m_codec->name();
877 // Check for UTF-16 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
1065 kDebug( 6005 ) << "KEncodingDetector: found charset in <meta>: " << str.mid(pos,endpos-pos).data();
1088 return setEncoding(automaticDetectionForArabic( (const unsigned char*) data, len ), AutoDetectedEncoding);
1091 return setEncoding(automaticDetectionForBaltic( (const unsigned char*) data, len ), AutoDetectedEncoding);
1094 return setEncoding(automaticDetectionForCentralEuropean( (const unsigned char*) data, len ), AutoDetectedEncoding);
1097 return setEncoding(automaticDetectionForCyrillic( (const unsigned char*) data, len), AutoDetectedEncoding);
1100 return setEncoding(automaticDetectionForGreek( (const unsigned char*) data, len ), AutoDetectedEncoding);
1103 return setEncoding(automaticDetectionForHebrew( (const unsigned char*) data, len ), AutoDetectedEncoding);
1106 return setEncoding(automaticDetectionForJapanese( (const unsigned char*) data, len ), AutoDetectedEncoding);
1109 return setEncoding(automaticDetectionForTurkish( (const unsigned char*) data, len ), AutoDetectedEncoding);
1112 if (setEncoding(automaticDetectionForWesternEuropean( (const unsigned char*) data, len ), AutoDetectedEncoding))
QTextCodec * codecForName(const QString &name) const
Provided for compatibility.
Definition: kcharsets.cpp:689
Definition: kencodingdetector.h:76
Definition: kencodingdetector.cpp:55
Definition: kencodingdetector.cpp:52
static QByteArray automaticDetectionForTurkish(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:438
bool decodedInvalidCharacters() const
This method checks whether invalid characters were found during a decoding operation.
Definition: kencodingdetector.cpp:856
Definition: kencodingdetector.h:85
Definition: kencodingdetector.cpp:57
QString decodeWithBuffering(const char *data, int len)
Convenience method that uses buffering.
Definition: kencodingdetector.cpp:796
Definition: kencodingdetector.cpp:54
void setAutoDetectLanguage(AutoDetectScript)
Definition: kencodingdetector.cpp:664
bool visuallyOrdered() const
Definition: kencodingdetector.cpp:684
Definition: kencodingdetector.cpp:53
Definition: kencodingdetector.cpp:56
Definition: guess_ja_p.h:85
static int findXMLEncoding(const QByteArray &str, int &encodingLength)
Definition: kencodingdetector.cpp:512
static QByteArray automaticDetectionForBaltic(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:148
static AutoDetectScript scriptForName(const QString &lang)
Takes lang name after it were i18n()'ed.
Definition: kencodingdetector.cpp:1141
QString i18nc(const char *ctxt, const char *text)
Returns a localized version of a string and a context.
Definition: klocalizedstring.h:797
Definition: kencodingdetector.h:77
static QByteArray automaticDetectionForCyrillic(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:193
EncodingChoiceSource encodingChoiceSource() const
Definition: kencodingdetector.cpp:673
static QByteArray automaticDetectionForGreek(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:389
Definition: kencodingdetector.h:82
enum Type guess_jp(const char *buf, int buflen)
Definition: guess_ja.cpp:305
static QByteArray automaticDetectionForCentralEuropean(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:161
static QByteArray automaticDetectionForHebrew(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:402
KEncodingDetector()
Default codec is latin1 (as html spec says), EncodingChoiceSource is default, AutoDetectScript=Semiau...
Definition: kencodingdetector.cpp:650
static void skipComment(const char *&ptr, const char *pEnd)
Definition: kencodingdetector.cpp:478
bool processNull(char *data, int length)
This nice method will kill all 0 bytes (or double bytes) and remember if this was a binary or not ;) ...
Definition: kencodingdetector.cpp:556
Definition: kencodingdetector.cpp:51
static QByteArray automaticDetectionForJapanese(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:418
Definition: kencodingdetector.h:65
bool setEncoding(const char *encoding, EncodingChoiceSource type)
Definition: kencodingdetector.cpp:712
QString flush()
Convenience method to be used with decodeForHtml.
Definition: kencodingdetector.cpp:861
static bool hasAutoDetectionForScript(AutoDetectScript)
Definition: kencodingdetector.cpp:1169
bool errorsIfUtf8(const char *data, int length)
Check if we are really utf8.
Definition: kencodingdetector.cpp:585
QString decode(const char *data, int len)
The main class method.
Definition: kencodingdetector.cpp:772
Definition: kencodingdetector.h:83
static QByteArray automaticDetectionForWesternEuropean(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:449
Definition: kencodingdetector.h:88
static QByteArray automaticDetectionForArabic(const unsigned char *ptr, int size)
Definition: kencodingdetector.cpp:134
Definition: kencodingdetector.h:74
AutoDetectScript autoDetectLanguage() const
Definition: kencodingdetector.cpp:668
Definition: kencodingdetector.h:89
static QString nameForScript(AutoDetectScript)
Definition: kencodingdetector.cpp:1203
Definition: kencodingdetector.h:90
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:47:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:47:08 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.