Libksieve

utf8validator.cpp
1 /* -*- c++ -*-
2  utf8validator.cpp
3 
4  This file is part of KSieve,
5  the KDE internet mail/usenet news message filtering library.
6  SPDX-FileCopyrightText: 2002-2003 Marc Mutz <[email protected]>
7 
8  SPDX-License-Identifier: GPL-2.0-only
9 */
10 
11 #include <impl/utf8validator.h>
12 
13 #include <qglobal.h>
14 
15 static inline bool isChar8Bit(signed char ch)
16 {
17  return ch < 0;
18 }
19 
20 static inline bool isUtf8TupelIndicator(unsigned char ch)
21 {
22  return (ch & 0xE0) == 0xC0; // 110x xxxx
23 }
24 
25 static inline bool isUtf8OverlongTupel(unsigned char ch)
26 {
27  return (ch & 0xFE) == 0xC0;
28 }
29 
30 static inline bool isUtf8TripleIndicator(unsigned char ch)
31 {
32  return (ch & 0xF0) == 0xE0; // 1110 xxxx
33 }
34 
35 static inline bool isUtf8OverlongTriple(unsigned char ch1, unsigned char ch2)
36 {
37  return (ch1 & 0xFF) == 0xE0 && (ch2 & 0xE0) == 0x80;
38 }
39 
40 static inline bool isUtf8QuartetIndicator(unsigned char ch)
41 {
42  return (ch & 0xF8) == 0xF0; // 1111 0xxx
43 }
44 
45 static inline bool isUtf8OverlongQuartet(unsigned char ch1, unsigned char ch2)
46 {
47  return (ch1 & 0xFF) == 0xF0 && (ch2 & 0xF0) == 0x80;
48 }
49 
50 static inline bool isUtf8QuintetIndicator(unsigned char ch)
51 {
52  return (ch & 0xFC) == 0xF8; // 1111 10xx
53 }
54 
55 static inline bool isUtf8OverlongQuintet(unsigned char ch1, unsigned char ch2)
56 {
57  return (ch1 & 0xFF) == 0xF8 && (ch2 & 0xF8) == 0x80;
58 }
59 
60 static inline bool isUtf8SextetIndicator(unsigned char ch)
61 {
62  return (ch & 0xFE) == 0xFC; // 1111 110x
63 }
64 
65 static inline bool isUtf8OverlongSextet(unsigned char ch1, unsigned char ch2)
66 {
67  return (ch1 & 0xFF) == 0xFC && (ch2 & 0xFC) == 0x80;
68 }
69 
70 static inline bool isUtf8Continuation(unsigned char ch)
71 {
72  return (ch & 0xC0) == 0x80;
73 }
74 
75 bool KSieve::isValidUtf8(const char *s, unsigned int len)
76 {
77  for (unsigned int i = 0; i < len; ++i) {
78  const unsigned char ch = s[i];
79  if (!isChar8Bit(ch)) {
80  continue;
81  }
82  if (isUtf8TupelIndicator(ch)) {
83  if (len - i < 1) { // too short
84  return false;
85  }
86  if (isUtf8OverlongTupel(ch)) { // not minimally encoded
87  return false;
88  }
89  if (!isUtf8Continuation(s[i + 1])) { // not followed by 10xx xxxx
90  return false;
91  }
92  i += 1;
93  } else if (isUtf8TripleIndicator(ch)) {
94  if (len - i < 2) { // too short
95  return false;
96  }
97  if (isUtf8OverlongTriple(ch, s[i + 1])) { // not minimally encoded
98  return false;
99  }
100  if (!isUtf8Continuation(s[i + 2])) { // not followed by 10xx xxxx
101  return false;
102  }
103  i += 2;
104  } else if (isUtf8QuartetIndicator(ch)) {
105  if (len - i < 3) { // too short
106  return false;
107  }
108  if (isUtf8OverlongQuartet(ch, s[i + 1])) { // not minimally encoded
109  return false;
110  }
111  if (!isUtf8Continuation(s[i + 2]) || !isUtf8Continuation(s[i + 3])) { // not followed by 2x 10xx xxxx
112  return false;
113  }
114  i += 3;
115  } else if (isUtf8QuintetIndicator(ch)) {
116  if (len - i < 4) { // too short
117  return false;
118  }
119  if (isUtf8OverlongQuintet(ch, s[i + 1])) { // not minimally encoded
120  return false;
121  }
122  if (!isUtf8Continuation(s[i + 2]) || !isUtf8Continuation(s[i + 3]) || !isUtf8Continuation(s[i + 4])) { // not followed by 3x 10xx xxxx
123  return false;
124  }
125  i += 4;
126  } else if (isUtf8SextetIndicator(ch)) {
127  if (len - i < 5) { // too short
128  return false;
129  }
130  if (isUtf8OverlongSextet(ch, s[i + 1])) { // not minimally encoded
131  return false;
132  }
133  if (!isUtf8Continuation(s[i + 2]) || !isUtf8Continuation(s[i + 3]) || !isUtf8Continuation(s[i + 4])
134  || !isUtf8Continuation(s[i + 5])) { // not followed by 4x 10xx xxxx
135  return false;
136  }
137  i += 5;
138  } else {
139  return false;
140  }
141  }
142  return true;
143 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Fri Apr 16 2021 23:09:34 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.