• Skip to content
  • Skip to link menu
KDE API Reference
  • KDE API Reference
  • kdepimlibs API Reference
  • KDE Home
  • Contact Us
 

KMIME Library

  • sources
  • kde-4.14
  • kdepimlibs
  • kmime
kmime_charfreq.cpp
Go to the documentation of this file.
1 /*
2  kmime_charfreq.cpp
3 
4  KMime, the KDE Internet mail/usenet news message library.
5  Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Library General Public
9  License as published by the Free Software Foundation; either
10  version 2 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Library General Public License for more details.
16 
17  You should have received a copy of the GNU Library General Public License
18  along with this library; see the file COPYING.LIB. If not, write to
19  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  Boston, MA 02110-1301, USA.
21 */
22 
34 #include "kmime_charfreq.h"
35 
36 using namespace KMime;
37 
42 //@cond PRIVATE
43 //class KMime::CharFreq::Private
44 //{
45 // public:
46 //};
47 //@endcond
48 
49 CharFreq::CharFreq( const QByteArray &buf )
50  : mNUL( 0 ),
51  mCTL( 0 ),
52  mCR( 0 ), mLF( 0 ),
53  mCRLF( 0 ),
54  mPrintable( 0 ),
55  mEightBit( 0 ),
56  mTotal( 0 ),
57  mLineMin( 0xffffffff ),
58  mLineMax( 0 ),
59  mTrailingWS( false ),
60  mLeadingFrom( false )
61 {
62  if ( !buf.isEmpty() ) {
63  count( buf.data(), buf.size() );
64  }
65 }
66 
67 CharFreq::CharFreq( const char *buf, size_t len )
68  : mNUL( 0 ),
69  mCTL( 0 ),
70  mCR( 0 ), mLF( 0 ),
71  mCRLF( 0 ),
72  mPrintable( 0 ),
73  mEightBit( 0 ),
74  mTotal( 0 ),
75  mLineMin( 0xffffffff ),
76  mLineMax( 0 ),
77  mTrailingWS( false ),
78  mLeadingFrom( false )
79 {
80  if ( buf && len > 0 ) {
81  count( buf, len );
82  }
83 }
84 
85 //@cond PRIVATE
86 static inline bool isWS( char ch )
87 {
88  return ( ch == '\t' || ch == ' ' );
89 }
90 //@endcond
91 
92 void CharFreq::count( const char *it, size_t len )
93 {
94  const char *end = it + len;
95  uint currentLineLength = 0;
96  // initialize the prevChar with LF so that From_ detection works w/o
97  // special-casing:
98  char prevChar = '\n';
99  char prevPrevChar = 0;
100 
101  for ( ; it != end ; ++it ) {
102  ++currentLineLength;
103  switch ( *it ) {
104  case '\0': ++mNUL; break;
105  case '\r': ++mCR; break;
106  case '\n': ++mLF;
107  if ( prevChar == '\r' ) {
108  --currentLineLength; ++mCRLF;
109  }
110  if ( currentLineLength >= mLineMax ) {
111  mLineMax = currentLineLength-1;
112  }
113  if ( currentLineLength <= mLineMin ) {
114  mLineMin = currentLineLength-1;
115  }
116  if ( !mTrailingWS ) {
117  if ( isWS( prevChar ) ||
118  ( prevChar == '\r' && isWS( prevPrevChar ) ) ) {
119  mTrailingWS = true;
120  }
121  }
122  currentLineLength = 0;
123  break;
124  case 'F': // check for lines starting with From_ if not found already:
125  if ( !mLeadingFrom ) {
126  if ( prevChar == '\n' && end - it >= 5 &&
127  !qstrncmp( "From ", it, 5 ) ) {
128  mLeadingFrom = true;
129  }
130  }
131  ++mPrintable;
132  break;
133  default:
134  {
135  uchar c = *it;
136  if ( c == '\t' || ( c >= ' ' && c <= '~' ) ) {
137  ++mPrintable;
138  } else if ( c == 127 || c < ' ' ) {
139  ++mCTL;
140  } else {
141  ++mEightBit;
142  }
143  }
144  }
145  prevPrevChar = prevChar;
146  prevChar = *it;
147  }
148 
149  // consider the length of the last line
150  if ( currentLineLength >= mLineMax ) {
151  mLineMax = currentLineLength;
152  }
153  if ( currentLineLength <= mLineMin ) {
154  mLineMin = currentLineLength;
155  }
156 
157  // check whether the last character is tab or space
158  if ( isWS( prevChar ) ) {
159  mTrailingWS = true;
160  }
161 
162  mTotal = len;
163 }
164 
165 bool CharFreq::isEightBitData() const
166 {
167  return type() == EightBitData;
168 }
169 
170 bool CharFreq::isEightBitText() const
171 {
172  return type() == EightBitText;
173 }
174 
175 bool CharFreq::isSevenBitData() const
176 {
177  return type() == SevenBitData;
178 }
179 
180 bool CharFreq::isSevenBitText() const
181 {
182  return type() == SevenBitText;
183 }
184 
185 bool CharFreq::hasTrailingWhitespace() const
186 {
187  return mTrailingWS;
188 }
189 
190 bool CharFreq::hasLeadingFrom() const
191 {
192  return mLeadingFrom;
193 }
194 
195 CharFreq::Type CharFreq::type() const
196 {
197 #if 0
198  qDebug( "Total: %d; NUL: %d; CTL: %d;\n"
199  "CR: %d; LF: %d; CRLF: %d;\n"
200  "lineMin: %d; lineMax: %d;\n"
201  "printable: %d; eightBit: %d;\n"
202  "trailing whitespace: %s;\n"
203  "leading 'From ': %s;\n",
204  total, NUL, CTL, CR, LF, CRLF, lineMin, lineMax,
205  printable, eightBit,
206  mTrailingWS ? "yes" : "no" , mLeadingFrom ? "yes" : "no" );
207 #endif
208  if ( mNUL ) { // must be binary
209  return Binary;
210  }
211 
212  // doesn't contain NUL's:
213  if ( mEightBit ) {
214  if ( mLineMax > 988 ) {
215  return EightBitData; // not allowed in 8bit
216  }
217  if ( ( mLF != mCRLF && mCRLF > 0 ) || mCR != mCRLF || controlCodesRatio() > 0.2 ) {
218  return EightBitData;
219  }
220  return EightBitText;
221  }
222 
223  // doesn't contain NUL's, nor 8bit chars:
224  if ( mLineMax > 988 ) {
225  return SevenBitData;
226  }
227  if ( ( mLF != mCRLF && mCRLF > 0 ) || mCR != mCRLF || controlCodesRatio() > 0.2 ) {
228  return SevenBitData;
229  }
230 
231  // no NUL, no 8bit chars, no excessive CTLs and no lines > 998 chars:
232  return SevenBitText;
233 }
234 
235 float CharFreq::printableRatio() const
236 {
237  if ( mTotal ) {
238  return float( mPrintable ) / float( mTotal );
239  } else {
240  return 0;
241  }
242 }
243 
244 float CharFreq::controlCodesRatio() const
245 {
246  if ( mTotal ) {
247  return float( mCTL ) / float( mTotal );
248  } else {
249  return 0;
250  }
251 }
252 
KMime::CharFreq::EightBitData
8bit binary
Definition: kmime_charfreq.h:103
KMime::CharFreq::SevenBitData
7bit binary
Definition: kmime_charfreq.h:105
KMime::CharFreq::EightBitText
8bit text
Definition: kmime_charfreq.h:106
KMime::CharFreq::isEightBitText
bool isEightBitText() const
Returns true if the data Type is EightBitText; false otherwise.
Definition: kmime_charfreq.cpp:170
QByteArray
KMime::CharFreq::Binary
8bit binary
Definition: kmime_charfreq.h:104
KMime::CharFreq::isEightBitData
bool isEightBitData() const
Returns true if the data Type is EightBitData; false otherwise.
Definition: kmime_charfreq.cpp:165
QByteArray::isEmpty
bool isEmpty() const
KMime::CharFreq::isSevenBitText
bool isSevenBitText() const
Returns true if the data Type is SevenBitText; false otherwise.
Definition: kmime_charfreq.cpp:180
kmime_charfreq.h
This file is part of the API for handling MIME data and defines the CharFreq class.
KMime::CharFreq::hasTrailingWhitespace
bool hasTrailingWhitespace() const
Returns true if the data contains trailing whitespace.
Definition: kmime_charfreq.cpp:185
KMime::CharFreq::Type
Type
The different types of data.
Definition: kmime_charfreq.h:101
KMime::CharFreq::isSevenBitData
bool isSevenBitData() const
Returns true if the data Type is SevenBitData; false otherwise.
Definition: kmime_charfreq.cpp:175
KMime::CharFreq::printableRatio
float printableRatio() const
Returns the percentage of printable characters in the data.
Definition: kmime_charfreq.cpp:235
KMime::CharFreq::CharFreq
CharFreq(const QByteArray &buf)
Constructs a Character Frequency instance for a buffer buf of QByteArray data.
Definition: kmime_charfreq.cpp:49
KMime::CharFreq::hasLeadingFrom
bool hasLeadingFrom() const
Returns true if the data contains a line that starts with "From ".
Definition: kmime_charfreq.cpp:190
KMime::CharFreq::controlCodesRatio
float controlCodesRatio() const
Returns the percentage of control code characters (CTLs) in the data.
Definition: kmime_charfreq.cpp:244
QByteArray::data
char * data()
QByteArray::size
int size() const
KMime::CharFreq::type
Type type() const
Returns the data Type as derived from the class heuristics.
Definition: kmime_charfreq.cpp:195
KMime::CharFreq::SevenBitText
7bit text
Definition: kmime_charfreq.h:107
This file is part of the KDE documentation.
Documentation copyright © 1996-2020 The KDE developers.
Generated on Mon Jun 22 2020 13:37:18 by doxygen 1.8.7 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Related Pages

kdepimlibs API Reference

Skip menu "kdepimlibs API Reference"
  • akonadi
  •   contact
  •   kmime
  •   socialutils
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2

Search



Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal