KCodecs

nsGB2312Prober.cpp
1 /* -*- C++ -*-
2  SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <[email protected]>
3 
4  SPDX-License-Identifier: MIT
5 */
6 
7 // for S-JIS encoding, obeserve characteristic:
8 // 1, kana character (or hankaku?) often have hight frequency of appereance
9 // 2, kana character often exist in group
10 // 3, certain combination of kana is never used in japanese language
11 
12 #include "nsGB2312Prober.h"
13 
14 namespace kencodingprober
15 {
16 void nsGB18030Prober::Reset(void)
17 {
18  mCodingSM->Reset();
19  mState = eDetecting;
20  mDistributionAnalyser.Reset();
21  //mContextAnalyser.Reset();
22 }
23 
24 nsProbingState nsGB18030Prober::HandleData(const char *aBuf, unsigned int aLen)
25 {
26  if (aLen == 0)
27  return mState;
28 
29  nsSMState codingState;
30 
31  for (unsigned int i = 0; i < aLen; i++) {
32  codingState = mCodingSM->NextState(aBuf[i]);
33  if (codingState == eError) {
34  mState = eNotMe;
35  break;
36  }
37  if (codingState == eItsMe) {
38  mState = eFoundIt;
39  break;
40  }
41  if (codingState == eStart) {
42  unsigned int charLen = mCodingSM->GetCurrentCharLen();
43 
44  if (i == 0) {
45  mLastChar[1] = aBuf[0];
46  mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
47  } else {
48  mDistributionAnalyser.HandleOneChar(aBuf + i - 1, charLen);
49  }
50  }
51  }
52 
53  mLastChar[0] = aBuf[aLen - 1];
54 
55  if (mState == eDetecting)
56  if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
57  mState = eFoundIt;
58  }
59 // else
60 // mDistributionAnalyser.HandleData(aBuf, aLen);
61 
62  return mState;
63 }
64 
65 float nsGB18030Prober::GetConfidence(void)
66 {
67  float distribCf = mDistributionAnalyser.GetConfidence();
68 
69  return (float)distribCf;
70 }
71 }
72 
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sun May 24 2020 23:03:28 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.