Libksieve

utf8validator.cpp
1/* -*- c++ -*-
2 utf8validator.cpp
3
4 This file is part of KSieve,
5 the KDE internet mail/usenet news message filtering library.
6 SPDX-FileCopyrightText: 2002-2003 Marc Mutz <mutz@kde.org>
7
8 SPDX-License-Identifier: GPL-2.0-only
9*/
10
11#include "utf8validator.h"
12
13#include <qglobal.h>
14
15static inline bool isChar8Bit(signed char ch)
16{
17 return ch < 0;
18}
19
20static inline bool isUtf8TupelIndicator(unsigned char ch)
21{
22 return (ch & 0xE0) == 0xC0; // 110x xxxx
23}
24
25static inline bool isUtf8OverlongTupel(unsigned char ch)
26{
27 return (ch & 0xFE) == 0xC0;
28}
29
30static inline bool isUtf8TripleIndicator(unsigned char ch)
31{
32 return (ch & 0xF0) == 0xE0; // 1110 xxxx
33}
34
35static inline bool isUtf8OverlongTriple(unsigned char ch1, unsigned char ch2)
36{
37 return (ch1 & 0xFF) == 0xE0 && (ch2 & 0xE0) == 0x80;
38}
39
40static inline bool isUtf8QuartetIndicator(unsigned char ch)
41{
42 return (ch & 0xF8) == 0xF0; // 1111 0xxx
43}
44
45static inline bool isUtf8OverlongQuartet(unsigned char ch1, unsigned char ch2)
46{
47 return (ch1 & 0xFF) == 0xF0 && (ch2 & 0xF0) == 0x80;
48}
49
50static inline bool isUtf8QuintetIndicator(unsigned char ch)
51{
52 return (ch & 0xFC) == 0xF8; // 1111 10xx
53}
54
55static inline bool isUtf8OverlongQuintet(unsigned char ch1, unsigned char ch2)
56{
57 return (ch1 & 0xFF) == 0xF8 && (ch2 & 0xF8) == 0x80;
58}
59
60static inline bool isUtf8SextetIndicator(unsigned char ch)
61{
62 return (ch & 0xFE) == 0xFC; // 1111 110x
63}
64
65static inline bool isUtf8OverlongSextet(unsigned char ch1, unsigned char ch2)
66{
67 return (ch1 & 0xFF) == 0xFC && (ch2 & 0xFC) == 0x80;
68}
69
70static inline bool isUtf8Continuation(unsigned char ch)
71{
72 return (ch & 0xC0) == 0x80;
73}
74
75bool KSieve::isValidUtf8(const char *s, unsigned int len)
76{
77 for (unsigned int i = 0; i < len; ++i) {
78 const unsigned char ch = s[i];
79 if (!isChar8Bit(ch)) {
80 continue;
81 }
82 if (isUtf8TupelIndicator(ch)) {
83 if (len - i < 1) { // too short
84 return false;
85 }
86 if (isUtf8OverlongTupel(ch)) { // not minimally encoded
87 return false;
88 }
89 if (!isUtf8Continuation(s[i + 1])) { // not followed by 10xx xxxx
90 return false;
91 }
92 i += 1;
93 } else if (isUtf8TripleIndicator(ch)) {
94 if (len - i < 2) { // too short
95 return false;
96 }
97 if (isUtf8OverlongTriple(ch, s[i + 1])) { // not minimally encoded
98 return false;
99 }
100 if (!isUtf8Continuation(s[i + 2])) { // not followed by 10xx xxxx
101 return false;
102 }
103 i += 2;
104 } else if (isUtf8QuartetIndicator(ch)) {
105 if (len - i < 3) { // too short
106 return false;
107 }
108 if (isUtf8OverlongQuartet(ch, s[i + 1])) { // not minimally encoded
109 return false;
110 }
111 if (!isUtf8Continuation(s[i + 2]) || !isUtf8Continuation(s[i + 3])) { // not followed by 2x 10xx xxxx
112 return false;
113 }
114 i += 3;
115 } else if (isUtf8QuintetIndicator(ch)) {
116 if (len - i < 4) { // too short
117 return false;
118 }
119 if (isUtf8OverlongQuintet(ch, s[i + 1])) { // not minimally encoded
120 return false;
121 }
122 if (!isUtf8Continuation(s[i + 2]) || !isUtf8Continuation(s[i + 3]) || !isUtf8Continuation(s[i + 4])) { // not followed by 3x 10xx xxxx
123 return false;
124 }
125 i += 4;
126 } else if (isUtf8SextetIndicator(ch)) {
127 if (len - i < 5) { // too short
128 return false;
129 }
130 if (isUtf8OverlongSextet(ch, s[i + 1])) { // not minimally encoded
131 return false;
132 }
133 if (!isUtf8Continuation(s[i + 2]) || !isUtf8Continuation(s[i + 3]) || !isUtf8Continuation(s[i + 4])
134 || !isUtf8Continuation(s[i + 5])) { // not followed by 4x 10xx xxxx
135 return false;
136 }
137 i += 5;
138 } else {
139 return false;
140 }
141 }
142 return true;
143}
This file is part of the KDE documentation.
Documentation copyright © 1996-2025 The KDE developers.
Generated on Fri Jan 3 2025 12:01:21 by doxygen 1.12.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.