KCodecs

nsSBCharSetProber.cpp
1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "nsSBCharSetProber.h"
8
9#include <stdio.h>
10
11namespace kencodingprober
12{
13nsProbingState nsSingleByteCharSetProber::HandleData(const char *aBuf, unsigned int aLen)
14{
15 for (unsigned int i = 0; i < aLen; i++) {
16 const unsigned char order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
17
18 if (order < SYMBOL_CAT_ORDER) {
19 mTotalChar++;
20 }
21 if (order < SAMPLE_SIZE) {
22 mFreqChar++;
23
24 if (mLastOrder < SAMPLE_SIZE) {
25 mTotalSeqs++;
26 if (!mReversed) {
27 ++(mSeqCounters[(int)mModel->precedenceMatrix[mLastOrder * SAMPLE_SIZE + order]]);
28 } else { // reverse the order of the letters in the lookup
29 ++(mSeqCounters[(int)mModel->precedenceMatrix[order * SAMPLE_SIZE + mLastOrder]]);
30 }
31 }
32 }
33 mLastOrder = order;
34 }
35
36 if (mState == eDetecting) {
37 if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD) {
38 float cf = GetConfidence();
39 if (cf > POSITIVE_SHORTCUT_THRESHOLD) {
40 mState = eFoundIt;
41 } else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) {
42 mState = eNotMe;
43 }
44 }
45 }
46
47 return mState;
48}
49
50void nsSingleByteCharSetProber::Reset(void)
51{
52 mState = eDetecting;
53 mLastOrder = 255;
54 for (unsigned int i = 0; i < NUMBER_OF_SEQ_CAT; i++) {
55 mSeqCounters[i] = 0;
56 }
57 mTotalSeqs = 0;
58 mTotalChar = 0;
59 mFreqChar = 0;
60}
61
62//#define NEGATIVE_APPROACH 1
63
64float nsSingleByteCharSetProber::GetConfidence(void)
65{
66#ifdef NEGATIVE_APPROACH
67 if (mTotalSeqs > 0)
68 if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT] * 10) {
69 return ((float)(mTotalSeqs - mSeqCounters[NEGATIVE_CAT] * 10)) / mTotalSeqs * mFreqChar / mTotalChar;
70 }
71 return (float)0.01;
72#else // POSITIVE_APPROACH
73 float r;
74
75 if (mTotalSeqs > 0) {
76 r = ((float)1.0) * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
77 r = r * mFreqChar / mTotalChar;
78 if (r >= (float)1.00) {
79 r = (float)0.99;
80 }
81 return r;
82 }
83 return (float)0.01;
84#endif
85}
86
87const char *nsSingleByteCharSetProber::GetCharSetName()
88{
89 if (!mNameProber) {
90 return mModel->charsetName;
91 }
92 return mNameProber->GetCharSetName();
93}
94
95#ifdef DEBUG_PROBE
96void nsSingleByteCharSetProber::DumpStatus()
97{
98 printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
99}
100#endif
101}
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Sat Dec 21 2024 16:59:08 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.