• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • sources
  • kde-4.12
  • kdelibs
  • kdecore
  • localization
  • probers
UnicodeGroupProber.cpp
Go to the documentation of this file.
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* -*- C++ -*-
3 * Copyright (C) 2008 <wkai@gmail.com>
4 *
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
26 #include "UnicodeGroupProber.h"
27 
28 #include "ctype_test_p.h"
29 
30 #include <QtAlgorithms>
31 #include <math.h>
32 
33 namespace kencodingprober {
34 UnicodeGroupProber::UnicodeGroupProber(void)
35 {
36  mCodingSM[0] = new nsCodingStateMachine(&UTF8SMModel);
37  mCodingSM[1] = new nsCodingStateMachine(&UCS2LESMModel);
38  mCodingSM[2] = new nsCodingStateMachine(&UCS2BESMModel);
39  mActiveSM = NUM_OF_UNICODE_CHARSETS;
40  mState = eDetecting;
41  mDetectedCharset = "UTF-8";
42 }
43 
44 UnicodeGroupProber::~UnicodeGroupProber(void)
45 {
46  for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++)
47  delete mCodingSM[i];
48 }
49 
50 void UnicodeGroupProber::Reset(void)
51 {
52  mState = eDetecting;
53  for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++)
54  mCodingSM[i]->Reset();
55  mActiveSM = NUM_OF_UNICODE_CHARSETS;
56  mDetectedCharset = "UTF-8";
57 }
58 
59 nsProbingState UnicodeGroupProber::HandleData(const char* aBuf, unsigned int aLen)
60 {
61  nsSMState codingState;
62  int j;
63  uint i, weight_BOM, counts[5];
64  static bool disableUTF16LE = false;
65  static bool disableUTF16BE = false;
66  double weight_zero;
67 
68  if (mActiveSM <= 0) {
69  mState = eNotMe;
70  return mState;
71  }
72 
73  if (! (disableUTF16LE || disableUTF16BE)) {
74  if (aLen%2 != 0) {
75  disableUTF16LE = true;
76  disableUTF16BE = true;
77  }
78  weight_BOM = (uint)(sqrt((double)aLen) + aLen/10.0);
79  for (uint i = 0; i < 5; i++)
80  qCount(aBuf, aBuf+aLen, char(i), counts[i]);
81  weight_zero = (2.0*(counts[0] + counts[1] + counts[2] + counts[3] + counts[4]) + weight_BOM)/aLen;
82  if (weight_zero < log(1.4142)) {
83  disableUTF16LE = true;
84  disableUTF16BE = true;
85  }
86  if (4 >= aBuf[1] && aBuf[1] >= 0 && isprint(aBuf[0]))
87  disableUTF16BE = true;
88  else
89  disableUTF16LE = true;
90  if (disableUTF16BE)
91  mActiveSM--;
92  if (disableUTF16LE) {
93  nsCodingStateMachine* t;
94  t = mCodingSM[1];
95  mCodingSM[1] = mCodingSM[2];
96  mCodingSM[2] = t;
97  mActiveSM--;
98  }
99  }
100 
101  for (i = 0; i < aLen; ++i) {
102  for (j = mActiveSM-1; j>= 0; --j)
103  {
104  //byte is feed to all active state machine
105  codingState = mCodingSM[j]->NextState(aBuf[i]);
106  if (codingState == eError)
107  {
108  //got negative answer for this state machine, make it inactive
109  mActiveSM--;
110  if (mActiveSM == 0)
111  {
112  mState = eNotMe;
113  return mState;
114  }
115  else if (j != (int)mActiveSM)
116  {
117  nsCodingStateMachine* t;
118  t = mCodingSM[mActiveSM];
119  mCodingSM[mActiveSM] = mCodingSM[j];
120  mCodingSM[j] = t;
121  }
122  }
123  else if (codingState == eItsMe)
124  {
125  mState = eFoundIt;
126  mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
127  return mState;
128  } else if (mState == eDetecting)
129  mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();;
130  }
131  }
132  return mState;
133 }
134 
135 float UnicodeGroupProber::GetConfidence()
136 {
137  if (mState == eFoundIt)
138  return 0.99f;
139  else
140  return 0.0f;
141 }
142 
143 #ifdef DEBUG_PROBE
144 void UnicodeGroupProber::DumpStatus()
145 {
146  GetConfidence();
147  for (uint i = 0; i < mActiveSM; i++)
148  {
149  kDebug(180) << "Unicode group" << mCodingSM[i]->DumpCurrentState() << mCodingSM[i]->GetCodingStateMachine() ;
150  }
151 }
152 #endif
153 
154 }
155 
156 
isprint
#define isprint(c)
Definition: ctype_test_p.h:90
kencodingprober::UCS2LESMModel
KDE_NO_EXPORT SMModel UCS2LESMModel
Definition: nsMBCSSM.cpp:531
kencodingprober::UCS2BESMModel
KDE_NO_EXPORT SMModel UCS2BESMModel
Definition: nsMBCSSM.cpp:475
kencodingprober::nsCodingStateMachine::NextState
nsSMState NextState(char c)
Definition: nsCodingStateMachine.h:59
kencodingprober::UnicodeGroupProber::mState
nsProbingState mState
Definition: UnicodeGroupProber.h:53
kencodingprober::eError
Definition: nsCodingStateMachine.h:37
kencodingprober::eFoundIt
Definition: nsCharSetProber.h:36
ctype_test_p.h
kencodingprober::UnicodeGroupProber::GetConfidence
float GetConfidence()
Definition: UnicodeGroupProber.cpp:135
kencodingprober::UnicodeGroupProber::HandleData
nsProbingState HandleData(const char *aBuf, unsigned int aLen)
Definition: UnicodeGroupProber.cpp:59
kencodingprober::nsProbingState
nsProbingState
Definition: nsCharSetProber.h:34
kencodingprober::UnicodeGroupProber::~UnicodeGroupProber
virtual ~UnicodeGroupProber(void)
Definition: UnicodeGroupProber.cpp:44
kencodingprober::nsCodingStateMachine::GetCodingStateMachine
const char * GetCodingStateMachine()
Definition: nsCodingStateMachine.h:75
kencodingprober::eNotMe
Definition: nsCharSetProber.h:37
kencodingprober::UnicodeGroupProber::mDetectedCharset
const char * mDetectedCharset
Definition: UnicodeGroupProber.h:54
kencodingprober::UnicodeGroupProber::mActiveSM
unsigned int mActiveSM
Definition: UnicodeGroupProber.h:52
UnicodeGroupProber.h
kencodingprober::nsCodingStateMachine
Definition: nsCodingStateMachine.h:53
kencodingprober::eDetecting
Definition: nsCharSetProber.h:35
kencodingprober::eItsMe
Definition: nsCodingStateMachine.h:38
kencodingprober::UTF8SMModel
KDE_NO_EXPORT SMModel UTF8SMModel
Definition: nsMBCSSM.cpp:609
kDebug
#define kDebug
Definition: kdebug.h:316
kencodingprober::UnicodeGroupProber::UnicodeGroupProber
UnicodeGroupProber(void)
Definition: UnicodeGroupProber.cpp:34
NUM_OF_UNICODE_CHARSETS
#define NUM_OF_UNICODE_CHARSETS
Definition: UnicodeGroupProber.h:32
kencodingprober::UnicodeGroupProber::Reset
void Reset(void)
Definition: UnicodeGroupProber.cpp:50
kencodingprober::nsSMState
nsSMState
Definition: nsCodingStateMachine.h:35
kencodingprober::UnicodeGroupProber::mCodingSM
nsCodingStateMachine * mCodingSM[NUM_OF_UNICODE_CHARSETS]
Definition: UnicodeGroupProber.h:51
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:47:09 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal