24 #include "kmime_util.h"
25 #include "kmime_util_p.h"
29 #include "kmime_header_parsing.h"
30 #include "kmime_message.h"
31 #include "kmime_warning.h"
33 #include <config-kmime.h>
37 #include <kcharsets.h>
41 #include <QtCore/QList>
42 #include <QtCore/QString>
43 #include <QtCore/QTextCodec>
49 #include <boost/concept_check.hpp>
51 using namespace KMime;
55 QList<QByteArray> c_harsetCache;
56 QList<QByteArray> l_anguageCache;
57 QString f_allbackCharEnc;
58 bool u_seOutlookEncoding =
false;
62 foreach (
const QByteArray& charset, c_harsetCache ) {
63 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
68 c_harsetCache.append( name.toUpper() );
70 return c_harsetCache.last();
75 foreach (
const QByteArray& language, l_anguageCache ) {
76 if ( qstricmp( name.data(), language.data() ) == 0 ) {
81 l_anguageCache.append( name.toUpper() );
83 return l_anguageCache.last();
88 uint sLength = s.length();
89 for ( uint i=0; i<sLength; i++ ) {
90 if ( s.at( i ).toLatin1() <= 0 ) {
100 case Headers::CE7Bit:
return QString::fromLatin1(
"7bit" );
101 case Headers::CE8Bit:
return QString::fromLatin1(
"8bit" );
102 case Headers::CEquPr:
return QString::fromLatin1(
"quoted-printable" );
103 case Headers::CEbase64:
return QString::fromLatin1(
"base64" );
104 case Headers::CEuuenc:
return QString::fromLatin1(
"uuencode" );
105 case Headers::CEbinary:
return QString::fromLatin1(
"binary" );
106 default:
return QString::fromLatin1(
"unknown" );
112 QList<Headers::contentEncoding> allowed;
115 switch ( cf.
type() ) {
117 allowed << Headers::CE7Bit;
119 allowed << Headers::CE8Bit;
125 allowed << Headers::CEquPr;
126 allowed << Headers::CEbase64;
128 allowed << Headers::CEbase64;
129 allowed << Headers::CEquPr;
133 allowed << Headers::CEbase64;
144 const uchar specialsMap[16] = {
145 0x00, 0x00, 0x00, 0x00,
146 0x20, 0xCA, 0x00, 0x3A,
147 0x80, 0x00, 0x00, 0x1C,
148 0x00, 0x00, 0x00, 0x00
152 const uchar tSpecialsMap[16] = {
153 0x00, 0x00, 0x00, 0x00,
154 0x20, 0xC9, 0x00, 0x3F,
155 0x80, 0x00, 0x00, 0x1C,
156 0x00, 0x00, 0x00, 0x00
160 const uchar aTextMap[16] = {
161 0x00, 0x00, 0x00, 0x00,
162 0x5F, 0x35, 0xFF, 0xC5,
163 0x7F, 0xFF, 0xFF, 0xE3,
164 0xFF, 0xFF, 0xFF, 0xFE
168 const uchar tTextMap[16] = {
169 0x00, 0x00, 0x00, 0x00,
170 0x5F, 0x36, 0xFF, 0xC0,
171 0x7F, 0xFF, 0xFF, 0xE3,
172 0xFF, 0xFF, 0xFF, 0xFE
176 const uchar eTextMap[16] = {
177 0x00, 0x00, 0x00, 0x00,
178 0x40, 0x35, 0xFF, 0xC0,
179 0x7F, 0xFF, 0xFF, 0xE0,
180 0x7F, 0xFF, 0xFF, 0xE0
185 f_allbackCharEnc = fallbackCharEnc;
190 return f_allbackCharEnc;
195 u_seOutlookEncoding = violateStandard;
200 return u_seOutlookEncoding;
205 const QByteArray &defaultCS,
bool forceCS )
208 QByteArray spaceBuffer;
209 const char *scursor = src.constData();
210 const char *send = scursor + src.length();
211 bool onlySpacesSinceLastWord =
false;
213 while ( scursor != send ) {
215 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
216 spaceBuffer += *scursor++;
221 if ( *scursor ==
'=' ) {
225 const char *start = scursor;
226 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
227 result += decoded.toUtf8();
228 onlySpacesSinceLastWord =
true;
231 if ( onlySpacesSinceLastWord ) {
232 result += spaceBuffer;
233 onlySpacesSinceLastWord =
false;
241 if ( onlySpacesSinceLastWord ) {
242 result += spaceBuffer;
243 onlySpacesSinceLastWord =
false;
251 const QString tryUtf8 = QString::fromUtf8( result );
252 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
253 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
254 return codec->toUnicode( result );
266 static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
269 bool addressHeader,
bool allow8BitHeaders )
273 bool nonAscii=
false, ok=
true, useQEncoding=
false;
276 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
281 usedCS = KGlobal::locale()->encoding();
282 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
286 if ( charset.isEmpty() )
287 usedCS = codec->name();
292 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
293 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
294 if ( converterState.invalidChars > 0 ) {
296 codec = QTextCodec::codecForName( usedCS );
297 encoded8Bit = codec->fromUnicode( src );
300 if ( usedCS.contains(
"8859-" ) ) {
304 if ( allow8BitHeaders ) {
308 uint encoded8BitLength = encoded8Bit.length();
309 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
310 if ( encoded8Bit[i] ==
' ' ) {
315 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
316 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
324 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
329 for (
int x=end; x<encoded8Bit.length(); x++ ) {
330 if ( ( (
signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] ==
'\033' ) ||
331 ( addressHeader && ( strchr(reservedCharacters, encoded8Bit[x]) != 0 ) ) ) {
334 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
341 result = encoded8Bit.left( start ) +
"=?" + usedCS;
343 if ( useQEncoding ) {
347 for (
int i=start; i<end; i++ ) {
352 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
353 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
354 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
358 hexcode = ((c & 0xF0) >> 4) + 48;
359 if ( hexcode >= 58 ) {
363 hexcode = (c & 0x0F) + 48;
364 if ( hexcode >= 58 ) {
372 result +=
"?B?" + encoded8Bit.mid( start, end - start ).toBase64();
376 result += encoded8Bit.right( encoded8Bit.length() - end );
378 result = encoded8Bit;
384 QByteArray encodeRFC2047Sentence(
const QString& src,
const QByteArray& charset )
387 QList<QChar> splitChars;
388 splitChars << QLatin1Char(
',') << QLatin1Char(
'\"') << QLatin1Char(
';') << QLatin1Char(
'\\');
389 const QChar *ch = src.constData();
390 const int length = src.length();
397 while (pos < length) {
399 const bool isAscii = ch->unicode() < 127;
400 const bool isReserved = (strchr( reservedCharacters, ch->toAscii() ) != 0);
401 if ( isAscii && isReserved ) {
402 const int wordSize = pos - wordStart;
404 const QString word = src.mid( wordStart, wordSize );
408 result += ch->toAscii();
416 const int wordSize = pos - wordStart;
418 const QString word = src.mid( wordStart, pos - wordStart );
434 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
436 if ( charset ==
"us-ascii" )
437 latin = str.toAscii();
439 latin = codec->fromUnicode( str );
441 latin = str.toLocal8Bit();
444 for ( l = latin.data(); *l; ++l ) {
445 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) )
452 QByteArray result = charset +
"''";
453 for ( l = latin.data(); *l; ++l ) {
454 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
455 if( !needsQuoting ) {
456 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
457 int len = especials.length();
458 for (
int i = 0; i < len; i++ )
459 if ( *l == especials[i] ) {
464 if ( needsQuoting ) {
466 unsigned char hexcode;
467 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
471 hexcode = ( *l & 0x0F ) + 48;
487 int p = str.indexOf(
'\'');
488 if (p < 0)
return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ))->toUnicode( str );
491 QByteArray charset = str.left(p);
493 QByteArray st = str.mid( str.lastIndexOf(
'\'') + 1 );
497 while (p < (
int)st.length())
503 if ( p + 2 < st.length() ) {
504 ch = st.at(p+1) - 48;
507 ch2 = st.at(p+2) - 48;
510 st[p] = ch * 16 + ch2;
516 kDebug() <<
"Got pre-decoded:" << st;
518 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
519 if ( !charsetcodec || forceCS )
520 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
522 usedCS = charsetcodec->name();
523 return charsetcodec->toUnicode( st );
534 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
538 unsigned int timeval;
542 ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
543 timeval = (now / ran) + getpid();
545 for (
int i=0; i<10; i++ ) {
546 pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
552 ret.setNum( timeval );
567 if ( header.isEmpty() ) {
571 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
572 while ( ( foldMid = header.indexOf(
'\n', pos ) ) >= 0 ) {
573 foldBegin = foldEnd = foldMid;
575 while ( foldBegin > 0 ) {
576 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
582 while ( foldEnd <= header.length() - 1 ) {
583 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
586 else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
587 header[foldEnd] ==
'=' && foldEnd + 2 < header.length() &&
588 ( ( header[foldEnd + 1] ==
'0' &&
589 header[foldEnd + 2] ==
'9' ) ||
590 ( header[foldEnd + 1] ==
'2' &&
591 header[foldEnd + 2] ==
'0' ) ) ) {
600 result += header.mid( pos, foldBegin - pos );
601 if ( foldEnd < header.length() -1 )
605 const int len = header.length();
607 result += header.mid( pos, len - pos );
612 int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
615 int len = src.length() - 1;
620 if ( dataBegin < 0 ) {
625 if ( dataBegin > len ) {
633 if ( src.at(end) ==
'\n' && end + 1 < len &&
634 ( src[end+1] ==
' ' || src[end+1] ==
'\t' ) ) {
641 if ( src.at(end) !=
'\n' ) {
643 end = src.indexOf(
'\n', end + 1 );
644 if ( end == -1 || end == len ) {
648 else if ( src[end+1] ==
' ' || src[end+1] ==
'\t' ||
649 ( src[end+1] ==
'=' && end+3 <= len &&
650 ( ( src[end+2] ==
'0' && src[end+3] ==
'9' ) ||
651 ( src[end+2] ==
'2' && src[end+3] ==
'0' ) ) ) ) {
668 int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
674 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
678 const char *p = strcasestr( src.constData(), n.constData() );
682 begin = p - src.constData();
688 dataBegin = begin + name.length() + 1;
690 if ( src.at( dataBegin ) ==
' ' ) {
693 end = findHeaderLineEnd( src, dataBegin, folded );
709 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
715 result = src.mid( begin, end - begin );
718 QByteArray hdrValue = src.mid( begin, end - begin );
726 QList<QByteArray>
extractHeaders(
const QByteArray &src,
const QByteArray &name )
730 QList<QByteArray> result;
731 QByteArray copySrc( src );
733 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
737 while ( begin >= 0 ) {
739 result.append( copySrc.mid( begin, end - begin ) );
741 QByteArray hdrValue = copySrc.mid( begin, end - begin );
746 copySrc = copySrc.mid( end );
747 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
755 void removeHeader( QByteArray &header,
const QByteArray &name )
757 int begin, end, dummy;
758 begin = indexOfHeader( header, name, end, dummy );
760 header.remove( begin, end - begin + 1 );
767 ret.replace(
"\r\n",
"\n" );
780 ret.replace(
'\n',
"\r\n" );
784 QByteArray
LFtoCRLF(
const char *s )
791 template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
793 bool inQuote =
false;
794 for (
int i = 0; i < str.length(); ++i ) {
795 if ( str[i] == CharType(
'"' ) ) {
800 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
810 removeQuotesGeneric<QByteArray,char>( str );
815 removeQuotesGeneric<QString,QLatin1Char>( str );
818 template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
819 void addQuotes_impl( StringType &str,
bool forceQuotes )
821 bool needsQuotes=
false;
822 for (
int i=0; i < str.length(); i++ ) {
823 const CharType cur = str.at( i );
824 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String(
"\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
827 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
828 str.insert( i, CharConverterType(
'\\' ) );
833 if ( needsQuotes || forceQuotes ) {
834 str.insert( 0, CharConverterType(
'\"' ) );
835 str.append( StringConverterType(
"\"" ) );
841 addQuotes_impl<QByteArray,char,char,char*,QLatin1String>( str, forceQuotes );
846 addQuotes_impl<QString,QChar,QLatin1Char,QLatin1String,QString>( str, forceQuotes );
851 const int LRO = 0x202D;
852 const int RLO = 0x202E;
853 const int LRE = 0x202A;
854 const int RLE = 0x202B;
855 const int PDF = 0x202C;
857 QString result = input;
859 int openDirChangers = 0;
860 int numPDFsRemoved = 0;
861 for (
int i = 0; i < input.length(); i++ ) {
862 const ushort &code = input.at( i ).unicode();
863 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
866 else if ( code == PDF ) {
867 if ( openDirChangers > 0 ) {
872 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
873 result.remove( i - numPDFsRemoved, 1 );
879 if ( openDirChangers > 0 ) {
880 kWarning() <<
"Possible Unicode spoofing detected in" << input;
885 for (
int i = openDirChangers; i > 0; i-- ) {
886 if ( result.endsWith( QLatin1Char(
'"' ) ) )
887 result.insert( result.length() - 1, QChar( PDF ) );
889 result += QChar( PDF );
898 const int LRO = 0x202D;
899 const int RLO = 0x202E;
900 const int LRE = 0x202A;
901 const int RLE = 0x202B;
902 QString result = input;
903 result.remove( LRO );
904 result.remove( RLO );
905 result.remove( LRE );
906 result.remove( RLE );
910 static bool isCryptoPart(
Content* content )
920 const QByteArray lowerSubType = contentType->
subType().toLower();
921 return ( contentType->
mediaType().toLower() ==
"application" &&
922 ( lowerSubType ==
"pgp-encrypted" ||
923 lowerSubType ==
"pgp-signature" ||
924 lowerSubType ==
"pkcs7-mime" ||
925 lowerSubType ==
"pkcs7-signature" ||
926 lowerSubType ==
"x-pkcs7-signature" ||
927 ( lowerSubType ==
"octet-stream" &&
936 bool emptyFilename =
true;
938 emptyFilename =
false;
941 emptyFilename =
false;
944 if( !emptyFilename && !isCryptoPart( content ) )
964 if ( contentType->
isSubtype(
"signed" ) ||
965 contentType->
isSubtype(
"pgp-signature" ) ||
966 contentType->
isSubtype(
"pkcs7-signature" ) ||
967 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
970 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
971 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
984 if ( contentType->
isSubtype(
"encrypted" ) ||
985 contentType->
isSubtype(
"pgp-encrypted" ) ||
986 contentType->
isSubtype(
"pkcs7-mime" ) ||
1003 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) )