KMime

kmime_charfreq.cpp
Go to the documentation of this file.
1/*
2 kmime_charfreq.cpp
3
4 KMime, the KDE Internet mail/usenet news message library.
5 SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org>
6
7 SPDX-License-Identifier: LGPL-2.0-or-later
8*/
9
10/**
11 @file
12 This file is part of the API for handling MIME data and
13 defines the CharFreq class.
14
15 @brief
16 Defines the CharFreq class.
17
18 @authors Marc Mutz <mutz@kde.org>
19*/
20
21#include "kmime_charfreq_p.h"
22#include "kmime_debug.h"
23
24using namespace KMime;
25
26CharFreq::CharFreq(QByteArrayView buf)
27 : mNUL(0),
28 mCTL(0),
29 mCR(0), mLF(0),
30 mCRLF(0),
31 mPrintable(0),
32 mEightBit(0),
33 mTotal(0),
34 mLineMin(0xffffffff),
35 mLineMax(0)
36{
37 if (!buf.isEmpty()) {
38 count(buf.data(), buf.size());
39 }
40}
41
42static inline bool isWS(char ch)
43{
44 return (ch == '\t' || ch == ' ');
45}
46
47void CharFreq::count(const char *it, size_t len)
48{
49 const char *end = it + len;
50 uint currentLineLength = 0;
51 // initialize the prevChar with LF so that From_ detection works w/o
52 // special-casing:
53 char prevChar = '\n';
54 char prevPrevChar = 0;
55
56 for (; it != end ; ++it) {
57 ++currentLineLength;
58 switch (*it) {
59 case '\0': ++mNUL; break;
60 case '\r': ++mCR; break;
61 case '\n': ++mLF;
62 if (prevChar == '\r') {
63 --currentLineLength; ++mCRLF;
64 }
65 if (currentLineLength >= mLineMax) {
66 mLineMax = currentLineLength - 1;
67 }
68 if (currentLineLength <= mLineMin) {
69 mLineMin = currentLineLength - 1;
70 }
71 if (!mTrailingWS) {
72 if (isWS(prevChar) ||
73 (prevChar == '\r' && isWS(prevPrevChar))) {
74 mTrailingWS = true;
75 }
76 }
77 currentLineLength = 0;
78 break;
79 case 'F': // check for lines starting with From_ if not found already:
80 if (!mLeadingFrom) {
81 if (prevChar == '\n' && end - it >= 5 &&
82 !qstrncmp("From ", it, 5)) {
83 mLeadingFrom = true;
84 }
85 }
86 ++mPrintable;
87 break;
88 default: {
89 uchar c = *it;
90 if (c == '\t' || (c >= ' ' && c <= '~')) {
91 ++mPrintable;
92 } else if (c == 127 || c < ' ') {
93 ++mCTL;
94 } else {
95 ++mEightBit;
96 }
97 }
98 }
99 prevPrevChar = prevChar;
100 prevChar = *it;
101 }
102
103 // consider the length of the last line
104 if (currentLineLength >= mLineMax) {
105 mLineMax = currentLineLength;
106 }
107 if (currentLineLength <= mLineMin) {
108 mLineMin = currentLineLength;
109 }
110
111 // check whether the last character is tab or space
112 if (isWS(prevChar)) {
113 mTrailingWS = true;
114 }
115
116 mTotal = len;
117}
118
119bool CharFreq::isEightBitData() const
120{
121 return type() == EightBitData;
122}
123
124bool CharFreq::isEightBitText() const
125{
126 return type() == EightBitText;
127}
128
129bool CharFreq::isSevenBitData() const
130{
131 return type() == SevenBitData;
132}
133
134bool CharFreq::isSevenBitText() const
135{
136 return type() == SevenBitText;
137}
138
139bool CharFreq::hasTrailingWhitespace() const
140{
141 return mTrailingWS;
142}
143
144bool CharFreq::hasLeadingFrom() const
145{
146 return mLeadingFrom;
147}
148
149CharFreq::Type CharFreq::type() const
150{
151#if 0
152 qCDebug(KMIME_LOG)("Total: %d; NUL: %d; CTL: %d;\n"
153 "CR: %d; LF: %d; CRLF: %d;\n"
154 "lineMin: %d; lineMax: %d;\n"
155 "printable: %d; eightBit: %d;\n"
156 "trailing whitespace: %s;\n"
157 "leading 'From ': %s;\n",
158 total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
159 printable, eightBit,
160 mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no");
161#endif
162 if (mNUL) { // must be binary
163 return Binary;
164 }
165
166 // doesn't contain NUL's:
167 if (mEightBit) {
168 if (mLineMax > 988) {
169 return EightBitData; // not allowed in 8bit
170 }
171 if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) {
172 return EightBitData;
173 }
174 return EightBitText;
175 }
176
177 // doesn't contain NUL's, nor 8bit chars:
178 if (mLineMax > 988) {
179 return SevenBitData;
180 }
181 if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) {
182 return SevenBitData;
183 }
184
185 // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars:
186 return SevenBitText;
187}
188
189float CharFreq::printableRatio() const
190{
191 if (mTotal) {
192 return float(mPrintable) / float(mTotal);
193 } else {
194 return 0;
195 }
196}
197
198float CharFreq::controlCodesRatio() const
199{
200 if (mTotal) {
201 return float(mCTL) / float(mTotal);
202 } else {
203 return 0;
204 }
205}
206
Type type(const QSqlDatabase &db)
const QList< QKeySequence > & end()
const_pointer data() const const
bool isEmpty() const const
qsizetype size() const const
This file is part of the KDE documentation.
Documentation copyright © 1996-2024 The KDE developers.
Generated on Tue Mar 26 2024 11:20:12 by doxygen 1.10.0 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.