24 #include "kmime_util.h"
25 #include "kmime_util_p.h"
29 #include "kmime_header_parsing.h"
30 #include "kmime_message.h"
31 #include "kmime_warning.h"
33 #include <config-kmime.h>
37 #include <kcharsets.h>
41 #include <QtCore/QList>
42 #include <QtCore/QString>
43 #include <QtCore/QTextCodec>
49 #include <boost/concept_check.hpp>
51 using namespace KMime;
55 QList<QByteArray> c_harsetCache;
56 QList<QByteArray> l_anguageCache;
57 QString f_allbackCharEnc;
58 bool u_seOutlookEncoding =
false;
62 foreach (
const QByteArray& charset, c_harsetCache ) {
63 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
68 c_harsetCache.append( name.toUpper() );
70 return c_harsetCache.last();
75 foreach (
const QByteArray& language, l_anguageCache ) {
76 if ( qstricmp( name.data(), language.data() ) == 0 ) {
81 l_anguageCache.append( name.toUpper() );
83 return l_anguageCache.last();
88 uint sLength = s.length();
89 for ( uint i=0; i<sLength; i++ ) {
90 if ( s.at( i ).toLatin1() <= 0 ) {
100 case Headers::CE7Bit:
return QString::fromLatin1(
"7bit" );
101 case Headers::CE8Bit:
return QString::fromLatin1(
"8bit" );
102 case Headers::CEquPr:
return QString::fromLatin1(
"quoted-printable" );
103 case Headers::CEbase64:
return QString::fromLatin1(
"base64" );
104 case Headers::CEuuenc:
return QString::fromLatin1(
"uuencode" );
105 case Headers::CEbinary:
return QString::fromLatin1(
"binary" );
106 default:
return QString::fromLatin1(
"unknown" );
112 QList<Headers::contentEncoding> allowed;
115 switch ( cf.
type() ) {
117 allowed << Headers::CE7Bit;
119 allowed << Headers::CE8Bit;
125 allowed << Headers::CEquPr;
126 allowed << Headers::CEbase64;
128 allowed << Headers::CEbase64;
129 allowed << Headers::CEquPr;
133 allowed << Headers::CEbase64;
144 const uchar specialsMap[16] = {
145 0x00, 0x00, 0x00, 0x00,
146 0x20, 0xCA, 0x00, 0x3A,
147 0x80, 0x00, 0x00, 0x1C,
148 0x00, 0x00, 0x00, 0x00
152 const uchar tSpecialsMap[16] = {
153 0x00, 0x00, 0x00, 0x00,
154 0x20, 0xC9, 0x00, 0x3F,
155 0x80, 0x00, 0x00, 0x1C,
156 0x00, 0x00, 0x00, 0x00
160 const uchar aTextMap[16] = {
161 0x00, 0x00, 0x00, 0x00,
162 0x5F, 0x35, 0xFF, 0xC5,
163 0x7F, 0xFF, 0xFF, 0xE3,
164 0xFF, 0xFF, 0xFF, 0xFE
168 const uchar tTextMap[16] = {
169 0x00, 0x00, 0x00, 0x00,
170 0x5F, 0x36, 0xFF, 0xC0,
171 0x7F, 0xFF, 0xFF, 0xE3,
172 0xFF, 0xFF, 0xFF, 0xFE
176 const uchar eTextMap[16] = {
177 0x00, 0x00, 0x00, 0x00,
178 0x40, 0x35, 0xFF, 0xC0,
179 0x7F, 0xFF, 0xFF, 0xE0,
180 0x7F, 0xFF, 0xFF, 0xE0
185 f_allbackCharEnc = fallbackCharEnc;
190 return f_allbackCharEnc;
195 u_seOutlookEncoding = violateStandard;
200 return u_seOutlookEncoding;
205 const QByteArray &defaultCS,
bool forceCS )
208 QByteArray spaceBuffer;
209 const char *scursor = src.constData();
210 const char *send = scursor + src.length();
211 bool onlySpacesSinceLastWord =
false;
213 while ( scursor != send ) {
215 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
216 spaceBuffer += *scursor++;
221 if ( *scursor ==
'=' ) {
225 const char *start = scursor;
226 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
227 result += decoded.toUtf8();
228 onlySpacesSinceLastWord =
true;
231 if ( onlySpacesSinceLastWord ) {
232 result += spaceBuffer;
233 onlySpacesSinceLastWord =
false;
241 if ( onlySpacesSinceLastWord ) {
242 result += spaceBuffer;
243 onlySpacesSinceLastWord =
false;
251 const QString tryUtf8 = QString::fromUtf8( result );
252 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
253 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
254 return codec->toUnicode( result );
266 static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
269 bool addressHeader,
bool allow8BitHeaders )
273 bool nonAscii=
false, ok=
true, useQEncoding=
false;
276 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
281 usedCS = KGlobal::locale()->encoding();
282 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
285 if ( charset.isEmpty() ) {
286 usedCS = codec->name();
292 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
293 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
294 if ( converterState.invalidChars > 0 ) {
296 codec = QTextCodec::codecForName( usedCS );
297 encoded8Bit = codec->fromUnicode( src );
300 if ( usedCS.contains(
"8859-" ) ) {
304 if ( allow8BitHeaders ) {
308 uint encoded8BitLength = encoded8Bit.length();
309 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
310 if ( encoded8Bit[i] ==
' ' ) {
315 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
316 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
324 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
329 for (
int x=end; x<encoded8Bit.length(); x++ ) {
330 if ( ( (
signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] ==
'\033' ) ||
331 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
334 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
341 result = encoded8Bit.left( start ) +
"=?" + usedCS;
343 if ( useQEncoding ) {
347 for (
int i=start; i<end; i++ ) {
352 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
353 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
354 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
358 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
359 if ( hexcode >= 58 ) {
363 hexcode = ( c & 0x0F ) + 48;
364 if ( hexcode >= 58 ) {
372 result +=
"?B?" + encoded8Bit.mid( start, end - start ).toBase64();
376 result += encoded8Bit.right( encoded8Bit.length() - end );
378 result = encoded8Bit;
384 QByteArray encodeRFC2047Sentence(
const QString& src,
const QByteArray& charset )
387 QList<QChar> splitChars;
388 splitChars << QLatin1Char(
',' ) << QLatin1Char(
'\"' ) << QLatin1Char(
';' ) << QLatin1Char(
'\\' );
389 const QChar *ch = src.constData();
390 const int length = src.length();
397 while ( pos < length ) {
399 const bool isAscii = ch->unicode() < 127;
400 const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
401 if ( isAscii && isReserved ) {
402 const int wordSize = pos - wordStart;
403 if ( wordSize > 0 ) {
404 const QString word = src.mid( wordStart, wordSize );
408 result += ch->toLatin1();
416 const int wordSize = pos - wordStart;
417 if ( wordSize > 0 ) {
418 const QString word = src.mid( wordStart, pos - wordStart );
430 if ( str.isEmpty() ) {
434 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
436 if ( charset ==
"us-ascii" ) {
437 latin = str.toLatin1();
438 }
else if ( codec ) {
439 latin = codec->fromUnicode( str );
441 latin = str.toLocal8Bit();
445 for ( l = latin.data(); *l; ++l ) {
446 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
455 QByteArray result = charset +
"''";
456 for ( l = latin.data(); *l; ++l ) {
457 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
458 if ( !needsQuoting ) {
459 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
460 int len = especials.length();
461 for (
int i = 0; i < len; i++ ) {
462 if ( *l == especials[i] ) {
468 if ( needsQuoting ) {
470 unsigned char hexcode;
471 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
472 if ( hexcode >= 58 ) {
476 hexcode = ( *l & 0x0F ) + 48;
477 if ( hexcode >= 58 ) {
493 int p = str.indexOf(
'\'' );
495 return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
499 QByteArray charset = str.left( p );
501 QByteArray st = str.mid( str.lastIndexOf(
'\'' ) + 1 );
505 while ( p < (
int)st.length() ) {
506 if ( st.at( p ) == 37 ) {
509 if ( p + 2 < st.length() ) {
510 ch = st.at( p + 1 ) - 48;
514 ch2 = st.at( p + 2 ) - 48;
518 st[p] = ch * 16 + ch2;
519 st.remove( p + 1, 2 );
524 kDebug() <<
"Got pre-decoded:" << st;
526 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
527 if ( !charsetcodec || forceCS ) {
528 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
531 usedCS = charsetcodec->name();
532 return charsetcodec->toUnicode( st );
543 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
547 unsigned int timeval;
551 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
552 timeval = ( now / ran ) + getpid();
554 for (
int i = 0; i < 10; i++ ) {
555 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
561 ret.setNum( timeval );
576 if ( header.isEmpty() ) {
580 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
581 while ( ( foldMid = header.indexOf(
'\n', pos ) ) >= 0 ) {
582 foldBegin = foldEnd = foldMid;
584 while ( foldBegin > 0 ) {
585 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
591 while ( foldEnd <= header.length() - 1 ) {
592 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
594 }
else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
595 header[foldEnd] ==
'=' && foldEnd + 2 < header.length() &&
596 ( ( header[foldEnd + 1] ==
'0' &&
597 header[foldEnd + 2] ==
'9' ) ||
598 ( header[foldEnd + 1] ==
'2' &&
599 header[foldEnd + 2] ==
'0' ) ) ) {
608 result += header.mid( pos, foldBegin - pos );
609 if ( foldEnd < header.length() - 1 ) {
614 const int len = header.length();
616 result += header.mid( pos, len - pos );
621 int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
624 int len = src.length() - 1;
630 if ( dataBegin < 0 ) {
635 if ( dataBegin > len ) {
643 if ( src.at( end ) ==
'\n' && end + 1 < len &&
644 ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ) ) {
651 if ( src.at( end ) !=
'\n' ) {
653 end = src.indexOf(
'\n', end + 1 );
654 if ( end == -1 || end == len ) {
657 }
else if ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ||
658 ( src[end + 1] ==
'=' && end + 3 <= len &&
659 ( ( src[end + 2] ==
'0' && src[end + 3] ==
'9' ) ||
660 ( src[end + 2] ==
'2' && src[end + 3] ==
'0' ) ) ) ) {
678 int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
684 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
688 const char *p = strcasestr( src.constData(), n.constData() );
692 begin = p - src.constData();
698 dataBegin = begin + name.length() + 1;
700 if ( src.at( dataBegin ) ==
' ' ) {
703 end = findHeaderLineEnd( src, dataBegin, folded );
719 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
725 result = src.mid( begin, end - begin );
728 QByteArray hdrValue = src.mid( begin, end - begin );
736 QList<QByteArray>
extractHeaders(
const QByteArray &src,
const QByteArray &name )
740 QList<QByteArray> result;
741 QByteArray copySrc( src );
743 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
747 while ( begin >= 0 ) {
749 result.append( copySrc.mid( begin, end - begin ) );
751 QByteArray hdrValue = copySrc.mid( begin, end - begin );
756 copySrc = copySrc.mid( end );
757 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
764 void removeHeader( QByteArray &header,
const QByteArray &name )
766 int begin, end, dummy;
767 begin = indexOfHeader( header, name, end, dummy );
769 header.remove( begin, end - begin + 1 );
776 ret.replace(
"\r\n",
"\n" );
789 ret.replace(
'\n',
"\r\n" );
793 QByteArray
LFtoCRLF(
const char *s )
800 template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
802 bool inQuote =
false;
803 for (
int i = 0; i < str.length(); ++i ) {
804 if ( str[i] == CharType(
'"' ) ) {
809 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
819 removeQuotesGeneric<QByteArray, char>( str );
824 removeQuotesGeneric<QString, QLatin1Char>( str );
827 template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
828 void addQuotes_impl( StringType &str,
bool forceQuotes )
830 bool needsQuotes=
false;
831 for (
int i=0; i < str.length(); i++ ) {
832 const CharType cur = str.at( i );
833 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String(
"\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
836 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
837 str.insert( i, CharConverterType(
'\\' ) );
842 if ( needsQuotes || forceQuotes ) {
843 str.insert( 0, CharConverterType(
'\"' ) );
844 str.append( StringConverterType(
"\"" ) );
850 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
855 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
860 const int LRO = 0x202D;
861 const int RLO = 0x202E;
862 const int LRE = 0x202A;
863 const int RLE = 0x202B;
864 const int PDF = 0x202C;
866 QString result = input;
868 int openDirChangers = 0;
869 int numPDFsRemoved = 0;
870 for (
int i = 0; i < input.length(); i++ ) {
871 const ushort &code = input.at( i ).unicode();
872 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
874 }
else if ( code == PDF ) {
875 if ( openDirChangers > 0 ) {
879 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
880 result.remove( i - numPDFsRemoved, 1 );
886 if ( openDirChangers > 0 ) {
887 kWarning() <<
"Possible Unicode spoofing detected in" << input;
892 for (
int i = openDirChangers; i > 0; i-- ) {
893 if ( result.endsWith( QLatin1Char(
'"' ) ) ) {
894 result.insert( result.length() - 1, QChar( PDF ) );
896 result += QChar( PDF );
906 const int LRO = 0x202D;
907 const int RLO = 0x202E;
908 const int LRE = 0x202A;
909 const int RLE = 0x202B;
910 QString result = input;
911 result.remove( LRO );
912 result.remove( RLO );
913 result.remove( LRE );
914 result.remove( RLE );
918 static bool isCryptoPart(
Content* content )
930 const QByteArray lowerSubType = contentType->
subType().toLower();
931 return ( contentType->
mediaType().toLower() ==
"application" &&
932 ( lowerSubType ==
"pgp-encrypted" ||
933 lowerSubType ==
"pgp-signature" ||
934 lowerSubType ==
"pkcs7-mime" ||
935 lowerSubType ==
"pkcs7-signature" ||
936 lowerSubType ==
"x-pkcs7-signature" ||
937 ( lowerSubType ==
"octet-stream" &&
947 bool emptyFilename =
true;
950 emptyFilename =
false;
953 if ( emptyFilename &&
956 emptyFilename =
false;
960 if ( !emptyFilename && !isCryptoPart( content ) ) {
982 if ( contentType->
isSubtype(
"signed" ) ||
983 contentType->
isSubtype(
"pgp-signature" ) ||
984 contentType->
isSubtype(
"pkcs7-signature" ) ||
985 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
988 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
989 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
1002 if ( contentType->
isSubtype(
"encrypted" ) ||
1003 contentType->
isSubtype(
"pgp-encrypted" ) ||
1004 contentType->
isSubtype(
"pkcs7-mime" ) ||
1006 message->
mainBodyPart(
"application/pgp-encrypted" ) ||
1022 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) ) {