KCodecs

nsMBCSSM.cpp
1 /* -*- C++ -*-
2  SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <[email protected]>
3 
4  SPDX-License-Identifier: MIT
5 */
6 
7 #include "nsCodingStateMachine.h"
8 
9 /*
10 Modification from frank tang's original work:
11 . 0x00 is allowed as a legal character. Since some web pages contains this char in
12  text stream.
13 */
14 
15 // BIG5
16 
17 namespace kencodingprober
18 {
19 static const unsigned int BIG5_cls [ 256 / 8 ] = {
20 //PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
21  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07 //allow 0x00 as legal value
22  PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
23  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
24  PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
25  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
26  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
27  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
28  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
29  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
30  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
31  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
32  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
33  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
34  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
35  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
36  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f
37  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 80 - 87
38  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 88 - 8f
39  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 90 - 97
40  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 98 - 9f
41  PCK4BITS(4, 3, 3, 3, 3, 3, 3, 3), // a0 - a7
42  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // a8 - af
43  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // b0 - b7
44  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // b8 - bf
45  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // c0 - c7
46  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // c8 - cf
47  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // d0 - d7
48  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // d8 - df
49  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7
50  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e8 - ef
51  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // f0 - f7
52  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 0) // f8 - ff
53 };
54 
55 static const unsigned int BIG5_st [ 3] = {
56  PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError, eError), //00-07
57  PCK4BITS(eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError), //08-0f
58  PCK4BITS(eError, eStart, eStart, eStart, eStart, eStart, eStart, eStart) //10-17
59 };
60 
61 static const unsigned int Big5CharLenTable[] = {0, 1, 1, 2, 0};
62 
63 const SMModel Big5SMModel = {
64  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_cls },
65  5,
66  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, BIG5_st },
67  Big5CharLenTable,
68  "Big5",
69 };
70 
71 static const unsigned int EUCJP_cls [ 256 / 8 ] = {
72 //PCK4BITS(5,4,4,4,4,4,4,4), // 00 - 07
73  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 00 - 07
74  PCK4BITS(4, 4, 4, 4, 4, 4, 5, 5), // 08 - 0f
75  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 10 - 17
76  PCK4BITS(4, 4, 4, 5, 4, 4, 4, 4), // 18 - 1f
77  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 20 - 27
78  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 28 - 2f
79  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 30 - 37
80  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 38 - 3f
81  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 40 - 47
82  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 48 - 4f
83  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 50 - 57
84  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 58 - 5f
85  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 60 - 67
86  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 68 - 6f
87  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 70 - 77
88  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 78 - 7f
89  PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 80 - 87
90  PCK4BITS(5, 5, 5, 5, 5, 5, 1, 3), // 88 - 8f
91  PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 90 - 97
92  PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // 98 - 9f
93  PCK4BITS(5, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
94  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
95  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
96  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
97  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
98  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
99  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
100  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
101  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
102  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
103  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
104  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 5) // f8 - ff
105 };
106 
107 static const unsigned int EUCJP_st [ 5] = {
108  PCK4BITS(3, 4, 3, 5, eStart, eError, eError, eError), //00-07
109  PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), //08-0f
110  PCK4BITS(eItsMe, eItsMe, eStart, eError, eStart, eError, eError, eError), //10-17
111  PCK4BITS(eError, eError, eStart, eError, eError, eError, 3, eError), //18-1f
112  PCK4BITS(3, eError, eError, eError, eStart, eStart, eStart, eStart) //20-27
113 };
114 
115 static const unsigned int EUCJPCharLenTable[] = {2, 2, 2, 3, 1, 0};
116 
117 const SMModel EUCJPSMModel = {
118  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_cls },
119  6,
120  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCJP_st },
121  EUCJPCharLenTable,
122  "EUC-JP",
123 };
124 
125 static const unsigned int EUCKR_cls [ 256 / 8 ] = {
126 //PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
127  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
128  PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
129  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
130  PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
131  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
132  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
133  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
134  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
135  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
136  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
137  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
138  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
139  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
140  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
141  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
142  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
143  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 80 - 87
144  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 88 - 8f
145  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 90 - 97
146  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 98 - 9f
147  PCK4BITS(0, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
148  PCK4BITS(2, 2, 2, 2, 2, 3, 3, 3), // a8 - af
149  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
150  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
151  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
152  PCK4BITS(2, 3, 2, 2, 2, 2, 2, 2), // c8 - cf
153  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
154  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
155  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
156  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
157  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
158  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 0) // f8 - ff
159 };
160 
161 static const unsigned int EUCKR_st [ 2] = {
162  PCK4BITS(eError, eStart, 3, eError, eError, eError, eError, eError), //00-07
163  PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart, eStart) //08-0f
164 };
165 
166 static const unsigned int EUCKRCharLenTable[] = {0, 1, 2, 0};
167 
168 const SMModel EUCKRSMModel = {
169  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_cls },
170  4,
171  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, EUCKR_st },
172  EUCKRCharLenTable,
173  "EUC-KR",
174 };
175 
176 /* obsolete GB2312 by gb18030
177 static unsigned int GB2312_cls [ 256 / 8 ] = {
178 //PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
179 PCK4BITS(1,1,1,1,1,1,1,1), // 00 - 07
180 PCK4BITS(1,1,1,1,1,1,0,0), // 08 - 0f
181 PCK4BITS(1,1,1,1,1,1,1,1), // 10 - 17
182 PCK4BITS(1,1,1,0,1,1,1,1), // 18 - 1f
183 PCK4BITS(1,1,1,1,1,1,1,1), // 20 - 27
184 PCK4BITS(1,1,1,1,1,1,1,1), // 28 - 2f
185 PCK4BITS(1,1,1,1,1,1,1,1), // 30 - 37
186 PCK4BITS(1,1,1,1,1,1,1,1), // 38 - 3f
187 PCK4BITS(1,1,1,1,1,1,1,1), // 40 - 47
188 PCK4BITS(1,1,1,1,1,1,1,1), // 48 - 4f
189 PCK4BITS(1,1,1,1,1,1,1,1), // 50 - 57
190 PCK4BITS(1,1,1,1,1,1,1,1), // 58 - 5f
191 PCK4BITS(1,1,1,1,1,1,1,1), // 60 - 67
192 PCK4BITS(1,1,1,1,1,1,1,1), // 68 - 6f
193 PCK4BITS(1,1,1,1,1,1,1,1), // 70 - 77
194 PCK4BITS(1,1,1,1,1,1,1,1), // 78 - 7f
195 PCK4BITS(1,0,0,0,0,0,0,0), // 80 - 87
196 PCK4BITS(0,0,0,0,0,0,0,0), // 88 - 8f
197 PCK4BITS(0,0,0,0,0,0,0,0), // 90 - 97
198 PCK4BITS(0,0,0,0,0,0,0,0), // 98 - 9f
199 PCK4BITS(0,2,2,2,2,2,2,2), // a0 - a7
200 PCK4BITS(2,2,3,3,3,3,3,3), // a8 - af
201 PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
202 PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
203 PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
204 PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
205 PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
206 PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
207 PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
208 PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
209 PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
210 PCK4BITS(2,2,2,2,2,2,2,0) // f8 - ff
211 };
212 
213 static unsigned int GB2312_st [ 2] = {
214 PCK4BITS(eError,eStart, 3,eError,eError,eError,eError,eError),//00-07
215 PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,eStart,eStart) //08-0f
216 };
217 
218 static const unsigned int GB2312CharLenTable[] = {0, 1, 2, 0};
219 
220 SMModel GB2312SMModel = {
221  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_cls },
222  4,
223  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB2312_st },
224  GB2312CharLenTable,
225  "GB2312",
226 };
227 */
228 
229 // the following state machine data was created by perl script in
230 // intl/chardet/tools. It should be the same as in PSM detector.
231 static const unsigned int GB18030_cls [ 256 / 8 ] = {
232  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
233  PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
234  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
235  PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
236  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
237  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
238  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 30 - 37
239  PCK4BITS(3, 3, 1, 1, 1, 1, 1, 1), // 38 - 3f
240  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
241  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
242  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
243  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
244  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
245  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
246  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
247  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 4), // 78 - 7f
248  PCK4BITS(5, 6, 6, 6, 6, 6, 6, 6), // 80 - 87
249  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // 88 - 8f
250  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // 90 - 97
251  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // 98 - 9f
252  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // a0 - a7
253  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // a8 - af
254  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // b0 - b7
255  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // b8 - bf
256  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // c0 - c7
257  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // c8 - cf
258  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d0 - d7
259  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d8 - df
260  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // e0 - e7
261  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // e8 - ef
262  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // f0 - f7
263  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 0) // f8 - ff
264 };
265 
266 static const unsigned int GB18030_st [ 6] = {
267  PCK4BITS(eError, eStart, eStart, eStart, eStart, eStart, 3, eError), //00-07
268  PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe, eItsMe), //08-0f
269  PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError, eStart), //10-17
270  PCK4BITS(4, eError, eStart, eStart, eError, eError, eError, eError), //18-1f
271  PCK4BITS(eError, eError, 5, eError, eError, eError, eItsMe, eError), //20-27
272  PCK4BITS(eError, eError, eStart, eStart, eStart, eStart, eStart, eStart) //28-2f
273 };
274 
275 // To be accurate, the length of class 6 can be either 2 or 4.
276 // But it is not necessary to discriminate between the two since
277 // it is used for frequency analysis only, and we are validing
278 // each code range there as well. So it is safe to set it to be
279 // 2 here.
280 static const unsigned int GB18030CharLenTable[] = {0, 1, 1, 1, 1, 1, 2};
281 
282 const SMModel GB18030SMModel = {
283  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_cls },
284  7,
285  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, GB18030_st },
286  GB18030CharLenTable,
287  "GB18030",
288 };
289 
290 // sjis
291 
292 static const unsigned int SJIS_cls [ 256 / 8 ] = {
293 //PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
294  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07
295  PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
296  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
297  PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
298  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
299  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
300  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
301  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
302  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 40 - 47
303  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 48 - 4f
304  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 50 - 57
305  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 58 - 5f
306  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 60 - 67
307  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 68 - 6f
308  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 70 - 77
309  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 1), // 78 - 7f
310  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 80 - 87
311  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 88 - 8f
312  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 90 - 97
313  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // 98 - 9f
314 //0xa0 is illegal in sjis encoding, but some pages does
315 //contain such byte. We need to be more error forgiven.
316  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
317  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
318  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
319  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
320  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
321  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
322  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
323  PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
324  PCK4BITS(3, 3, 3, 3, 3, 3, 3, 3), // e0 - e7
325  PCK4BITS(3, 3, 3, 3, 3, 4, 4, 4), // e8 - ef
326  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // f0 - f7
327  PCK4BITS(4, 4, 4, 4, 4, 0, 0, 0) // f8 - ff
328 };
329 
330 static const unsigned int SJIS_st [ 3] = {
331  PCK4BITS(eError, eStart, eStart, 3, eError, eError, eError, eError), //00-07
332  PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), //08-0f
333  PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, eStart, eStart) //10-17
334 };
335 
336 static const unsigned int SJISCharLenTable[] = {0, 1, 1, 2, 0, 0};
337 
338 const SMModel SJISSMModel = {
339  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_cls },
340  6,
341  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, SJIS_st },
342  SJISCharLenTable,
343  "Shift_JIS",
344 };
345 
346 static const unsigned int UCS2BE_cls [ 256 / 8 ] = {
347  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
348  PCK4BITS(0, 0, 1, 0, 0, 2, 0, 0), // 08 - 0f
349  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
350  PCK4BITS(0, 0, 0, 3, 0, 0, 0, 0), // 18 - 1f
351  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
352  PCK4BITS(0, 3, 3, 3, 3, 3, 0, 0), // 28 - 2f
353  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
354  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
355  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47
356  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
357  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
358  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
359  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
360  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
361  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
362  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
363  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 80 - 87
364  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 88 - 8f
365  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 90 - 97
366  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 98 - 9f
367  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a0 - a7
368  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a8 - af
369  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b0 - b7
370  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b8 - bf
371  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c0 - c7
372  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c8 - cf
373  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // d0 - d7
374  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // d8 - df
375  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
376  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
377  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
378  PCK4BITS(0, 0, 0, 0, 0, 0, 4, 5) // f8 - ff
379 };
380 
381 static const unsigned int UCS2BE_st [ 7] = {
382  PCK4BITS(5, 7, 7, eError, 4, 3, eError, eError), //00-07
383  PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), //08-0f
384  PCK4BITS(eItsMe, eItsMe, 6, 6, 6, 6, eError, eError), //10-17
385  PCK4BITS(6, 6, 6, 6, 6, eItsMe, 6, 6), //18-1f
386  PCK4BITS(6, 6, 6, 6, 5, 7, 7, eError), //20-27
387  PCK4BITS(5, 8, 6, 6, eError, 6, 6, 6), //28-2f
388  PCK4BITS(6, 6, 6, 6, eError, eError, eStart, eStart) //30-37
389 };
390 
391 static const unsigned int UCS2BECharLenTable[] = {2, 2, 2, 0, 2, 2};
392 
393 const SMModel UCS2BESMModel = {
394  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_cls },
395  6,
396  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2BE_st },
397  UCS2BECharLenTable,
398  "UTF-16BE",
399 };
400 
401 static const unsigned int UCS2LE_cls [ 256 / 8 ] = {
402  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
403  PCK4BITS(0, 0, 1, 0, 0, 2, 0, 0), // 08 - 0f
404  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
405  PCK4BITS(0, 0, 0, 3, 0, 0, 0, 0), // 18 - 1f
406  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
407  PCK4BITS(0, 3, 3, 3, 3, 3, 0, 0), // 28 - 2f
408  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
409  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
410  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47
411  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
412  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
413  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
414  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
415  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
416  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
417  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
418  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 80 - 87
419  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 88 - 8f
420  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 90 - 97
421  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 98 - 9f
422  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a0 - a7
423  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // a8 - af
424  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b0 - b7
425  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // b8 - bf
426  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c0 - c7
427  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // c8 - cf
428  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // d0 - d7
429  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // d8 - df
430  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e0 - e7
431  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // e8 - ef
432  PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // f0 - f7
433  PCK4BITS(0, 0, 0, 0, 0, 0, 4, 5) // f8 - ff
434 };
435 
436 static const unsigned int UCS2LE_st [ 7] = {
437  PCK4BITS(6, 6, 7, 6, 4, 3, eError, eError), //00-07
438  PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), //08-0f
439  PCK4BITS(eItsMe, eItsMe, 5, 5, 5, eError, eItsMe, eError), //10-17
440  PCK4BITS(5, 5, 5, eError, 5, eError, 6, 6), //18-1f
441  PCK4BITS(7, 6, 8, 8, 5, 5, 5, eError), //20-27
442  PCK4BITS(5, 5, 5, eError, eError, eError, 5, 5), //28-2f
443  PCK4BITS(5, 5, 5, eError, 5, eError, eStart, eStart) //30-37
444 };
445 
446 static const unsigned int UCS2LECharLenTable[] = {2, 2, 2, 2, 2, 2};
447 
448 const SMModel UCS2LESMModel = {
449  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_cls },
450  6,
451  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UCS2LE_st },
452  UCS2LECharLenTable,
453  "UTF-16LE",
454 };
455 
456 static const unsigned int UTF8_cls [ 256 / 8 ] = {
457 //PCK4BITS(0,1,1,1,1,1,1,1), // 00 - 07
458  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 00 - 07 //allow 0x00 as a legal value
459  PCK4BITS(1, 1, 1, 1, 1, 1, 0, 0), // 08 - 0f
460  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 10 - 17
461  PCK4BITS(1, 1, 1, 0, 1, 1, 1, 1), // 18 - 1f
462  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 20 - 27
463  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 28 - 2f
464  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 30 - 37
465  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 38 - 3f
466  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 40 - 47
467  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 48 - 4f
468  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 50 - 57
469  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 58 - 5f
470  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 60 - 67
471  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 68 - 6f
472  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 70 - 77
473  PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 78 - 7f
474  PCK4BITS(2, 2, 2, 2, 3, 3, 3, 3), // 80 - 87
475  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 88 - 8f
476  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 90 - 97
477  PCK4BITS(4, 4, 4, 4, 4, 4, 4, 4), // 98 - 9f
478  PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // a0 - a7
479  PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // a8 - af
480  PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // b0 - b7
481  PCK4BITS(5, 5, 5, 5, 5, 5, 5, 5), // b8 - bf
482  PCK4BITS(0, 0, 6, 6, 6, 6, 6, 6), // c0 - c7
483  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // c8 - cf
484  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d0 - d7
485  PCK4BITS(6, 6, 6, 6, 6, 6, 6, 6), // d8 - df
486  PCK4BITS(7, 8, 8, 8, 8, 8, 8, 8), // e0 - e7
487  PCK4BITS(8, 8, 8, 8, 8, 9, 8, 8), // e8 - ef
488  PCK4BITS(10, 11, 11, 11, 11, 11, 11, 11), // f0 - f7
489  PCK4BITS(12, 13, 13, 13, 14, 15, 0, 0) // f8 - ff
490 };
491 
492 static const unsigned int UTF8_st [ 26] = {
493  PCK4BITS(eError, eStart, eError, eError, eError, eError, 12, 10), //00-07
494  PCK4BITS(9, 11, 8, 7, 6, 5, 4, 3), //08-0f
495  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //10-17
496  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //18-1f
497  PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), //20-27
498  PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), //28-2f
499  PCK4BITS(eError, eError, 5, 5, 5, 5, eError, eError), //30-37
500  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //38-3f
501  PCK4BITS(eError, eError, eError, 5, 5, 5, eError, eError), //40-47
502  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //48-4f
503  PCK4BITS(eError, eError, 7, 7, 7, 7, eError, eError), //50-57
504  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //58-5f
505  PCK4BITS(eError, eError, eError, eError, 7, 7, eError, eError), //60-67
506  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //68-6f
507  PCK4BITS(eError, eError, 9, 9, 9, 9, eError, eError), //70-77
508  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //78-7f
509  PCK4BITS(eError, eError, eError, eError, eError, 9, eError, eError), //80-87
510  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //88-8f
511  PCK4BITS(eError, eError, 12, 12, 12, 12, eError, eError), //90-97
512  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //98-9f
513  PCK4BITS(eError, eError, eError, eError, eError, 12, eError, eError), //a0-a7
514  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //a8-af
515  PCK4BITS(eError, eError, 12, 12, 12, eError, eError, eError), //b0-b7
516  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError), //b8-bf
517  PCK4BITS(eError, eError, eStart, eStart, eStart, eStart, eError, eError), //c0-c7
518  PCK4BITS(eError, eError, eError, eError, eError, eError, eError, eError) //c8-cf
519 };
520 
521 static const unsigned int UTF8CharLenTable[] = {0, 1, 0, 0, 0, 0, 2, 3,
522  3, 3, 4, 4, 5, 5, 6, 6
523  };
524 
525 const SMModel UTF8SMModel = {
526  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_cls },
527  16,
528  {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, UTF8_st },
529  UTF8CharLenTable,
530  "UTF-8",
531 };
532 }
533 
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Sun May 24 2020 23:03:28 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.