KMime

kmime_charfreq.cpp
Go to the documentation of this file.
1 /*
2  kmime_charfreq.cpp
3 
4  KMime, the KDE Internet mail/usenet news message library.
5  SPDX-FileCopyrightText: 2001-2002 Marc Mutz <[email protected]>
6 
7  SPDX-License-Identifier: LGPL-2.0-or-later
8 */
9 
10 /**
11  @file
12  This file is part of the API for handling MIME data and
13  defines the CharFreq class.
14 
15  @brief
16  Defines the CharFreq class.
17 
18  @authors Marc Mutz <[email protected]>
19 */
20 
21 #include "kmime_charfreq.h"
22 #include "kmime_debug.h"
23 
24 using namespace KMime;
25 
26 /**
27  * Private class that helps to provide binary compatibility between releases.
28  * @internal
29  */
30 //@cond PRIVATE
31 //class KMime::CharFreq::Private
32 //{
33 // public:
34 //};
35 //@endcond
36 
38  : mNUL(0),
39  mCTL(0),
40  mCR(0), mLF(0),
41  mCRLF(0),
42  mPrintable(0),
43  mEightBit(0),
44  mTotal(0),
45  mLineMin(0xffffffff),
46  mLineMax(0)
47 {
48  if (!buf.isEmpty()) {
49  count(buf.data(), buf.size());
50  }
51 }
52 
53 CharFreq::CharFreq(const char *buf, size_t len)
54  : mNUL(0),
55  mCTL(0),
56  mCR(0), mLF(0),
57  mCRLF(0),
58  mPrintable(0),
59  mEightBit(0),
60  mTotal(0),
61  mLineMin(0xffffffff),
62  mLineMax(0)
63 {
64  if (buf && len > 0) {
65  count(buf, len);
66  }
67 }
68 
69 //@cond PRIVATE
70 static inline bool isWS(char ch)
71 {
72  return (ch == '\t' || ch == ' ');
73 }
74 //@endcond
75 
76 void CharFreq::count(const char *it, size_t len)
77 {
78  const char *end = it + len;
79  uint currentLineLength = 0;
80  // initialize the prevChar with LF so that From_ detection works w/o
81  // special-casing:
82  char prevChar = '\n';
83  char prevPrevChar = 0;
84 
85  for (; it != end ; ++it) {
86  ++currentLineLength;
87  switch (*it) {
88  case '\0': ++mNUL; break;
89  case '\r': ++mCR; break;
90  case '\n': ++mLF;
91  if (prevChar == '\r') {
92  --currentLineLength; ++mCRLF;
93  }
94  if (currentLineLength >= mLineMax) {
95  mLineMax = currentLineLength - 1;
96  }
97  if (currentLineLength <= mLineMin) {
98  mLineMin = currentLineLength - 1;
99  }
100  if (!mTrailingWS) {
101  if (isWS(prevChar) ||
102  (prevChar == '\r' && isWS(prevPrevChar))) {
103  mTrailingWS = true;
104  }
105  }
106  currentLineLength = 0;
107  break;
108  case 'F': // check for lines starting with From_ if not found already:
109  if (!mLeadingFrom) {
110  if (prevChar == '\n' && end - it >= 5 &&
111  !qstrncmp("From ", it, 5)) {
112  mLeadingFrom = true;
113  }
114  }
115  ++mPrintable;
116  break;
117  default: {
118  uchar c = *it;
119  if (c == '\t' || (c >= ' ' && c <= '~')) {
120  ++mPrintable;
121  } else if (c == 127 || c < ' ') {
122  ++mCTL;
123  } else {
124  ++mEightBit;
125  }
126  }
127  }
128  prevPrevChar = prevChar;
129  prevChar = *it;
130  }
131 
132  // consider the length of the last line
133  if (currentLineLength >= mLineMax) {
134  mLineMax = currentLineLength;
135  }
136  if (currentLineLength <= mLineMin) {
137  mLineMin = currentLineLength;
138  }
139 
140  // check whether the last character is tab or space
141  if (isWS(prevChar)) {
142  mTrailingWS = true;
143  }
144 
145  mTotal = len;
146 }
147 
149 {
150  return type() == EightBitData;
151 }
152 
154 {
155  return type() == EightBitText;
156 }
157 
159 {
160  return type() == SevenBitData;
161 }
162 
164 {
165  return type() == SevenBitText;
166 }
167 
169 {
170  return mTrailingWS;
171 }
172 
174 {
175  return mLeadingFrom;
176 }
177 
179 {
180 #if 0
181  qCDebug(KMIME_LOG)("Total: %d; NUL: %d; CTL: %d;\n"
182  "CR: %d; LF: %d; CRLF: %d;\n"
183  "lineMin: %d; lineMax: %d;\n"
184  "printable: %d; eightBit: %d;\n"
185  "trailing whitespace: %s;\n"
186  "leading 'From ': %s;\n",
187  total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
188  printable, eightBit,
189  mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no");
190 #endif
191  if (mNUL) { // must be binary
192  return Binary;
193  }
194 
195  // doesn't contain NUL's:
196  if (mEightBit) {
197  if (mLineMax > 988) {
198  return EightBitData; // not allowed in 8bit
199  }
200  if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) {
201  return EightBitData;
202  }
203  return EightBitText;
204  }
205 
206  // doesn't contain NUL's, nor 8bit chars:
207  if (mLineMax > 988) {
208  return SevenBitData;
209  }
210  if ((mLF != mCRLF && mCRLF > 0) || mCR != mCRLF || controlCodesRatio() > 0.2) {
211  return SevenBitData;
212  }
213 
214  // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars:
215  return SevenBitText;
216 }
217 
219 {
220  if (mTotal) {
221  return float(mPrintable) / float(mTotal);
222  } else {
223  return 0;
224  }
225 }
226 
228 {
229  if (mTotal) {
230  return float(mCTL) / float(mTotal);
231  } else {
232  return 0;
233  }
234 }
235 
bool isEightBitText() const
Returns true if the data Type is EightBitText; false otherwise.
bool isEightBitData() const
Returns true if the data Type is EightBitData; false otherwise.
bool isEmpty() const const
bool isSevenBitText() const
Returns true if the data Type is SevenBitText; false otherwise.
This file is part of the API for handling MIME data and defines the CharFreq class.
bool hasTrailingWhitespace() const
Returns true if the data contains trailing whitespace.
Type
The different types of data.
bool isSevenBitData() const
Returns true if the data Type is SevenBitData; false otherwise.
float printableRatio() const
Returns the percentage of printable characters in the data.
CharFreq(const QByteArray &buf)
Constructs a Character Frequency instance for a buffer buf of QByteArray data.
bool hasLeadingFrom() const
Returns true if the data contains a line that starts with "From ".
float controlCodesRatio() const
Returns the percentage of control code characters (CTLs) in the data.
char * data()
int size() const const
Type type() const
Returns the data Type as derived from the class heuristics.
This file is part of the KDE documentation.
Documentation copyright © 1996-2021 The KDE developers.
Generated on Sat Sep 25 2021 23:14:46 by doxygen 1.8.11 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.