Libksieve

utf8validator.cpp
1 /* -*- c++ -*-
2  utf8validator.cpp
3 
4  This file is part of KSieve,
5  the KDE internet mail/usenet news message filtering library.
6  Copyright (c) 2003 Marc Mutz <[email protected]>
7 
8  KSieve is free software; you can redistribute it and/or modify it
9  under the terms of the GNU General Public License, version 2, as
10  published by the Free Software Foundation.
11 
12  KSieve is distributed in the hope that it will be useful, but
13  WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with this program; if not, write to the Free Software
19  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 
21  In addition, as a special exception, the copyright holders give
22  permission to link the code of this program with any edition of
23  the Qt library by Trolltech AS, Norway (or with modified versions
24  of Qt that use the same license as Qt), and distribute linked
25  combinations including the two. You must obey the GNU General
26  Public License in all respects for all of the code used other than
27  Qt. If you modify this file, you may extend this exception to
28  your version of the file, but you are not obligated to do so. If
29  you do not wish to do so, delete this exception statement from
30  your version.
31 */
32 
33 #include <impl/utf8validator.h>
34 
35 #include <qglobal.h>
36 
37 static inline bool is8Bit(signed char ch)
38 {
39  return ch < 0;
40 }
41 
42 static inline bool isUtf8TupelIndicator(unsigned char ch)
43 {
44  return (ch & 0xE0) == 0xC0; // 110x xxxx
45 }
46 
47 static inline bool isUtf8OverlongTupel(unsigned char ch)
48 {
49  return (ch & 0xFE) == 0xC0;
50 }
51 
52 static inline bool isUtf8TripleIndicator(unsigned char ch)
53 {
54  return (ch & 0xF0) == 0xE0; // 1110 xxxx
55 }
56 
57 static inline bool isUtf8OverlongTriple(unsigned char ch1, unsigned char ch2)
58 {
59  return (ch1 & 0xFF) == 0xE0 && (ch2 & 0xE0) == 0x80;
60 }
61 
62 static inline bool isUtf8QuartetIndicator(unsigned char ch)
63 {
64  return (ch & 0xF8) == 0xF0; // 1111 0xxx
65 }
66 
67 static inline bool isUtf8OverlongQuartet(unsigned char ch1, unsigned char ch2)
68 {
69  return (ch1 & 0xFF) == 0xF0 && (ch2 & 0xF0) == 0x80;
70 }
71 
72 static inline bool isUtf8QuintetIndicator(unsigned char ch)
73 {
74  return (ch & 0xFC) == 0xF8; // 1111 10xx
75 }
76 
77 static inline bool isUtf8OverlongQuintet(unsigned char ch1, unsigned char ch2)
78 {
79  return (ch1 & 0xFF) == 0xF8 && (ch2 & 0xF8) == 0x80;
80 }
81 
82 static inline bool isUtf8SextetIndicator(unsigned char ch)
83 {
84  return (ch & 0xFE) == 0xFC; // 1111 110x
85 }
86 
87 static inline bool isUtf8OverlongSextet(unsigned char ch1, unsigned char ch2)
88 {
89  return (ch1 & 0xFF) == 0xFC && (ch2 & 0xFC) == 0x80;
90 }
91 
92 static inline bool isUtf8Continuation(unsigned char ch)
93 {
94  return (ch & 0xC0) == 0x80;
95 }
96 
97 bool KSieve::isValidUtf8(const char *s, unsigned int len)
98 {
99  for (unsigned int i = 0; i < len; ++i) {
100  const unsigned char ch = s[i];
101  if (!is8Bit(ch)) {
102  continue;
103  }
104  if (isUtf8TupelIndicator(ch)) {
105  if (len - i < 1) { // too short
106  return false;
107  }
108  if (isUtf8OverlongTupel(ch)) { // not minimally encoded
109  return false;
110  }
111  if (!isUtf8Continuation(s[i + 1])) { // not followed by 10xx xxxx
112  return false;
113  }
114  i += 1;
115  } else if (isUtf8TripleIndicator(ch)) {
116  if (len - i < 2) { // too short
117  return false;
118  }
119  if (isUtf8OverlongTriple(ch, s[i + 1])) { // not minimally encoded
120  return false;
121  }
122  if (!isUtf8Continuation(s[i + 2])) { // not followed by 10xx xxxx
123  return false;
124  }
125  i += 2;
126  } else if (isUtf8QuartetIndicator(ch)) {
127  if (len - i < 3) { // too short
128  return false;
129  }
130  if (isUtf8OverlongQuartet(ch, s[i + 1])) { // not minimally encoded
131  return false;
132  }
133  if (!isUtf8Continuation(s[i + 2])
134  || !isUtf8Continuation(s[i + 3])) { // not followed by 2x 10xx xxxx
135  return false;
136  }
137  i += 3;
138  } else if (isUtf8QuintetIndicator(ch)) {
139  if (len - i < 4) { // too short
140  return false;
141  }
142  if (isUtf8OverlongQuintet(ch, s[i + 1])) { // not minimally encoded
143  return false;
144  }
145  if (!isUtf8Continuation(s[i + 2])
146  || !isUtf8Continuation(s[i + 3])
147  || !isUtf8Continuation(s[i + 4])) { // not followed by 3x 10xx xxxx
148  return false;
149  }
150  i += 4;
151  } else if (isUtf8SextetIndicator(ch)) {
152  if (len - i < 5) { // too short
153  return false;
154  }
155  if (isUtf8OverlongSextet(ch, s[i + 1])) { // not minimally encoded
156  return false;
157  }
158  if (!isUtf8Continuation(s[i + 2])
159  || !isUtf8Continuation(s[i + 3])
160  || !isUtf8Continuation(s[i + 4])
161  || !isUtf8Continuation(s[i + 5])) { // not followed by 4x 10xx xxxx
162  return false;
163  }
164  i += 5;
165  } else {
166  return false;
167  }
168  }
169  return true;
170 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 1 2020 23:08:21 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.