• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdelibs API Reference
  • KDE Home
  • Contact Us
 

KDECore

  • sources
  • kde-4.12
  • kdelibs
  • kdecore
  • localization
  • probers
nsLatin1Prober.cpp
Go to the documentation of this file.
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* -*- C++ -*-
3 * Copyright (C) 1998 <developer@mozilla.org>
4 *
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
26 #include "nsLatin1Prober.h"
27 #include <stdio.h>
28 #include <stdlib.h>
29 
30 #define UDF 0 // undefined
31 #define OTH 1 //other
32 #define ASC 2 // ascii capital letter
33 #define ASS 3 // ascii small letter
34 #define ACV 4 // accent capital vowel
35 #define ACO 5 // accent capital other
36 #define ASV 6 // accent small vowel
37 #define ASO 7 // accent small other
38 #define CLASS_NUM 8 // total classes
39 
40 namespace kencodingprober {
41 static unsigned char Latin1_CharToClass[] =
42 {
43  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
44  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
45  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17
46  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F
47  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27
48  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F
49  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37
50  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F
51  OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47
52  ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F
53  ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57
54  ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F
55  OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67
56  ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F
57  ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77
58  ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F
59  OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87
60  OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F
61  UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97
62  OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F
63  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7
64  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF
65  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7
66  OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF
67  ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7
68  ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF
69  ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7
70  ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF
71  ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7
72  ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF
73  ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7
74  ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
75 };
76 
77 
78 /* 0 : illegal
79  1 : very unlikely
80  2 : normal
81  3 : very likely
82 */
83 static unsigned char Latin1ClassModel[] =
84 {
85 /* UDF OTH ASC ASS ACV ACO ASV ASO */
86 /*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
87 /*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
88 /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
89 /*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
90 /*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
91 /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
92 /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
93 /*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
94 };
95 
96 void nsLatin1Prober::Reset(void)
97 {
98  mState = eDetecting;
99  mLastCharClass = OTH;
100  for (int i = 0; i < FREQ_CAT_NUM; i++)
101  mFreqCounter[i] = 0;
102 }
103 
104 
105 nsProbingState nsLatin1Prober::HandleData(const char* aBuf, unsigned int aLen)
106 {
107  char *newBuf1 = 0;
108  unsigned int newLen1 = 0;
109 
110  if (!FilterWithEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) {
111  newBuf1 = (char*)aBuf;
112  newLen1 = aLen;
113  }
114 
115  unsigned char charClass;
116  unsigned char freq;
117  for (unsigned int i = 0; i < newLen1; i++)
118  {
119  charClass = Latin1_CharToClass[(unsigned char)newBuf1[i]];
120  freq = Latin1ClassModel[mLastCharClass*CLASS_NUM + charClass];
121  if (freq == 0) {
122  mState = eNotMe;
123  break;
124  }
125  mFreqCounter[freq]++;
126  mLastCharClass = charClass;
127  }
128 
129  if (newBuf1 != aBuf)
130  free(newBuf1);
131 
132  return mState;
133 }
134 
135 float nsLatin1Prober::GetConfidence(void)
136 {
137  if (mState == eNotMe)
138  return 0.01f;
139 
140  float confidence;
141  unsigned int total = 0;
142  for (int i = 0; i < FREQ_CAT_NUM; i++)
143  total += mFreqCounter[i];
144 
145  if(!total)
146  confidence = 0.0f;
147  else
148  {
149  confidence = mFreqCounter[3]*1.0f / total;
150  confidence -= mFreqCounter[1]*20.0f/total;
151  }
152 
153  if (confidence < 0.0f)
154  confidence = 0.0f;
155 
156  // lower the confidence of latin1 so that other more accurate detector
157  // can take priority.
158  confidence *= 0.50f;
159 
160  return confidence;
161 }
162 
163 #ifdef DEBUG_PROBE
164 void nsLatin1Prober::DumpStatus()
165 {
166  printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
167 }
168 #endif
169 }
170 
171 
OTH
#define OTH
Definition: nsLatin1Prober.cpp:31
kencodingprober::nsLatin1Prober::GetCharSetName
const char * GetCharSetName()
Definition: nsLatin1Prober.h:38
ASO
#define ASO
Definition: nsLatin1Prober.cpp:37
CLASS_NUM
#define CLASS_NUM
Definition: nsLatin1Prober.cpp:38
FREQ_CAT_NUM
#define FREQ_CAT_NUM
Definition: nsLatin1Prober.h:31
kencodingprober::Latin1_CharToClass
static unsigned char Latin1_CharToClass[]
Definition: nsLatin1Prober.cpp:41
UDF
#define UDF
Definition: nsLatin1Prober.cpp:30
ASV
#define ASV
Definition: nsLatin1Prober.cpp:36
ASC
#define ASC
Definition: nsLatin1Prober.cpp:32
ACO
#define ACO
Definition: nsLatin1Prober.cpp:35
kencodingprober::nsLatin1Prober::mState
nsProbingState mState
Definition: nsLatin1Prober.h:42
kencodingprober::Latin1ClassModel
static unsigned char Latin1ClassModel[]
Definition: nsLatin1Prober.cpp:83
kencodingprober::nsProbingState
nsProbingState
Definition: nsCharSetProber.h:34
kencodingprober::nsLatin1Prober::HandleData
nsProbingState HandleData(const char *aBuf, unsigned int aLen)
Definition: nsLatin1Prober.cpp:105
kencodingprober::nsLatin1Prober::mFreqCounter
unsigned int mFreqCounter[FREQ_CAT_NUM]
Definition: nsLatin1Prober.h:52
kencodingprober::eNotMe
Definition: nsCharSetProber.h:37
nsLatin1Prober.h
kencodingprober::eDetecting
Definition: nsCharSetProber.h:35
kencodingprober::nsLatin1Prober::mLastCharClass
char mLastCharClass
Definition: nsLatin1Prober.h:51
ACV
#define ACV
Definition: nsLatin1Prober.cpp:34
kencodingprober::nsLatin1Prober::Reset
void Reset(void)
Definition: nsLatin1Prober.cpp:96
ASS
#define ASS
Definition: nsLatin1Prober.cpp:33
kencodingprober::nsLatin1Prober::GetConfidence
float GetConfidence(void)
Definition: nsLatin1Prober.cpp:135
kencodingprober::nsCharSetProber::FilterWithEnglishLetters
static bool FilterWithEnglishLetters(const char *aBuf, unsigned int aLen, char **newBuf, unsigned int &newLen)
Definition: nsCharSetProber.cpp:72
This file is part of the KDE documentation.
Documentation copyright © 1996-2014 The KDE developers.
Generated on Tue Oct 14 2014 22:47:09 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs API Reference

Skip menu "kdelibs API Reference"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  • kjsembed
  •   WTF
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Nepomuk-Core
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal