KCodecs

nsSBCSGroupProber.cpp
1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "nsSBCSGroupProber.h"
8
9#include "UnicodeGroupProber.h"
10#include "nsHebrewProber.h"
11#include "nsSBCharSetProber.h"
12
13#include <stdio.h>
14#include <stdlib.h>
15
16namespace kencodingprober
17{
18nsSBCSGroupProber::nsSBCSGroupProber()
19{
20 mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
21 mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
22 mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
23 mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
24 mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
25 mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
26 mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
27 mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
28 mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
29 mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
30
31 nsHebrewProber *hebprober = new nsHebrewProber();
32 // Notice: Any change in these indexes - 10,11,12 must be reflected
33 // in the code below as well.
34 mProbers[10] = hebprober;
35 mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew
36 mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew
37 mProbers[13] = new UnicodeGroupProber();
38
39 // Tell the Hebrew prober about the logical and visual probers
40 if (mProbers[10] && mProbers[11] && mProbers[12]) { // all are not null
41 hebprober->SetModelProbers(mProbers[11], mProbers[12]);
42 } else { // One or more is null. avoid any Hebrew probing, null them all
43 for (unsigned int i = 10; i <= 12; ++i) {
44 delete mProbers[i];
45 mProbers[i] = nullptr;
46 }
47 }
48
49 // disable latin2 before latin1 is available, otherwise all latin1
50 // will be detected as latin2 because of their similarity.
51 // mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
52 // mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
53
54 Reset();
55}
56
57nsSBCSGroupProber::~nsSBCSGroupProber()
58{
59 for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
60 delete mProbers[i];
61 }
62}
63
64const char *nsSBCSGroupProber::GetCharSetName()
65{
66 // if we have no answer yet
67 if (mBestGuess == -1) {
68 GetConfidence();
69 // no charset seems positive
70 if (mBestGuess == -1)
71 // we will use default.
72 {
73 mBestGuess = 0;
74 }
75 }
76 return mProbers[mBestGuess]->GetCharSetName();
77}
78
79void nsSBCSGroupProber::Reset(void)
80{
81 mActiveNum = 0;
82 for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
83 if (mProbers[i]) { // not null
84 mProbers[i]->Reset();
85 mIsActive[i] = true;
86 ++mActiveNum;
87 } else {
88 mIsActive[i] = false;
89 }
90 }
91 mBestGuess = -1;
92 mState = eDetecting;
93}
94
95nsProbingState nsSBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen)
96{
97 nsProbingState st;
98 unsigned int i;
99 char *newBuf1 = nullptr;
100 unsigned int newLen1 = 0;
101
102 // apply filter to original buffer, and we got new buffer back
103 // depend on what script it is, we will feed them the new buffer
104 // we got after applying proper filter
105 // this is done without any consideration to KeepEnglishLetters
106 // of each prober since as of now, there are no probers here which
107 // recognize languages with English characters.
108 if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
109 goto done;
110 }
111
112 if (newLen1 == 0) {
113 goto done; // Nothing to see here, move on.
114 }
115
116 for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) {
117 if (!mIsActive[i]) {
118 continue;
119 }
120 st = mProbers[i]->HandleData(newBuf1, newLen1);
121 if (st == eFoundIt) {
122 mBestGuess = i;
123 mState = eFoundIt;
124 break;
125 } else if (st == eNotMe) {
126 mIsActive[i] = false;
127 mActiveNum--;
128 if (mActiveNum == 0) {
129 mState = eNotMe;
130 break;
131 }
132 }
133 }
134
135done:
136 free(newBuf1);
137
138 return mState;
139}
140
141float nsSBCSGroupProber::GetConfidence(void)
142{
143 unsigned int i;
144 float bestConf = 0.0;
145 float cf;
146
147 switch (mState) {
148 case eFoundIt:
149 return (float)0.99; // sure yes
150 case eNotMe:
151 return (float)0.01; // sure no
152 default:
153 for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) {
154 if (!mIsActive[i]) {
155 continue;
156 }
157 cf = mProbers[i]->GetConfidence();
158 if (bestConf < cf) {
159 bestConf = cf;
160 mBestGuess = i;
161 }
162 }
163 }
164 return bestConf;
165}
166
167#ifdef DEBUG_PROBE
168void nsSBCSGroupProber::DumpStatus()
169{
170 unsigned int i;
171 float cf;
172
173 cf = GetConfidence();
174 printf(" SBCS Group Prober --------begin status \r\n");
175 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
176 if (!mIsActive[i]) {
177 printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
178 } else {
179 mProbers[i]->DumpStatus();
180 }
181 }
182 printf(" SBCS Group found best match [%s] confidence %f.\r\n", mProbers[mBestGuess]->GetCharSetName(), cf);
183}
184#endif
185}
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 11:48:44 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.