• Skip to content
  • Skip to link menu
KDE 4.6 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • KDE Home
  • Contact Us
 

KMIME Library

kmime_util.cpp

00001 /*
00002   kmime_util.cpp
00003 
00004   KMime, the KDE Internet mail/usenet news message library.
00005   Copyright (c) 2001 the KMime authors.
00006   See file AUTHORS for details
00007 
00008   This library is free software; you can redistribute it and/or
00009   modify it under the terms of the GNU Library General Public
00010   License as published by the Free Software Foundation; either
00011   version 2 of the License, or (at your option) any later version.
00012 
00013   This library is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016   Library General Public License for more details.
00017 
00018   You should have received a copy of the GNU Library General Public License
00019   along with this library; see the file COPYING.LIB.  If not, write to
00020   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021   Boston, MA 02110-1301, USA.
00022 */
00023 
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 
00027 #include "kmime_charfreq.h"
00028 #include "kmime_codecs.h"
00029 #include "kmime_header_parsing.h"
00030 #include "kmime_message.h"
00031 #include "kmime_warning.h"
00032 
00033 #include <config-kmime.h>
00034 #include <kdefakes.h> // for strcasestr
00035 #include <kglobal.h>
00036 #include <klocale.h>
00037 #include <kcharsets.h>
00038 #include <kcodecs.h>
00039 #include <kdebug.h>
00040 
00041 #include <QtCore/QList>
00042 #include <QtCore/QString>
00043 #include <QtCore/QTextCodec>
00044 
00045 #include <ctype.h>
00046 #include <time.h>
00047 #include <stdlib.h>
00048 #include <unistd.h>
00049 #include <boost/concept_check.hpp>
00050 
00051 using namespace KMime;
00052 
00053 namespace KMime {
00054 
00055 QList<QByteArray> c_harsetCache;
00056 QList<QByteArray> l_anguageCache;
00057 QString f_allbackCharEnc;
00058 bool u_seOutlookEncoding = false;
00059 
00060 QByteArray cachedCharset( const QByteArray &name )
00061 {
00062   foreach ( const QByteArray& charset, c_harsetCache ) {
00063     if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00064       return charset;
00065     }
00066   }
00067 
00068   c_harsetCache.append( name.toUpper() );
00069   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00070   return c_harsetCache.last();
00071 }
00072 
00073 QByteArray cachedLanguage( const QByteArray &name )
00074 {
00075   foreach ( const QByteArray& language, l_anguageCache ) {
00076     if ( qstricmp( name.data(), language.data() ) == 0 ) {
00077       return language;
00078     }
00079   }
00080 
00081   l_anguageCache.append( name.toUpper() );
00082   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00083   return l_anguageCache.last();
00084 }
00085 
00086 bool isUsAscii( const QString &s )
00087 {
00088   uint sLength = s.length();
00089   for ( uint i=0; i<sLength; i++ ) {
00090     if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
00091       return false;
00092     }
00093   }
00094   return true;
00095 }
00096 
00097 QString nameForEncoding( Headers::contentEncoding enc )
00098 {
00099   switch( enc ) {
00100     case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
00101     case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
00102     case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
00103     case Headers::CEbase64: return QString::fromLatin1( "base64" );
00104     case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
00105     case Headers::CEbinary: return QString::fromLatin1( "binary" );
00106     default: return QString::fromLatin1( "unknown" );
00107   }
00108 }
00109 
00110 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
00111 {
00112   QList<Headers::contentEncoding> allowed;
00113   CharFreq cf( data );
00114 
00115   switch ( cf.type() ) {
00116     case CharFreq::SevenBitText:
00117       allowed << Headers::CE7Bit;
00118     case CharFreq::EightBitText:
00119       allowed << Headers::CE8Bit;
00120     case CharFreq::SevenBitData:
00121       if ( cf.printableRatio() > 5.0/6.0 ) {
00122         // let n the length of data and p the number of printable chars.
00123         // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
00124         // => qp < base64 iff p > 5n/6.
00125         allowed << Headers::CEquPr;
00126         allowed << Headers::CEbase64;
00127       } else {
00128         allowed << Headers::CEbase64;
00129         allowed << Headers::CEquPr;
00130       }
00131       break;
00132     case CharFreq::EightBitData:
00133       allowed << Headers::CEbase64;
00134       break;
00135     case CharFreq::None:
00136     default:
00137       Q_ASSERT( false );
00138   }
00139 
00140   return allowed;
00141 }
00142 
00143 // "(),.:;<>@[\]
00144 const uchar specialsMap[16] = {
00145   0x00, 0x00, 0x00, 0x00, // CTLs
00146   0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
00147   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00148   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00149 };
00150 
00151 // "(),:;<>@[\]/=?
00152 const uchar tSpecialsMap[16] = {
00153   0x00, 0x00, 0x00, 0x00, // CTLs
00154   0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
00155   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00156   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00157 };
00158 
00159 // all except specials, CTLs, SPACE.
00160 const uchar aTextMap[16] = {
00161   0x00, 0x00, 0x00, 0x00,
00162   0x5F, 0x35, 0xFF, 0xC5,
00163   0x7F, 0xFF, 0xFF, 0xE3,
00164   0xFF, 0xFF, 0xFF, 0xFE
00165 };
00166 
00167 // all except tspecials, CTLs, SPACE.
00168 const uchar tTextMap[16] = {
00169   0x00, 0x00, 0x00, 0x00,
00170   0x5F, 0x36, 0xFF, 0xC0,
00171   0x7F, 0xFF, 0xFF, 0xE3,
00172   0xFF, 0xFF, 0xFF, 0xFE
00173 };
00174 
00175 // none except a-zA-Z0-9!*+-/
00176 const uchar eTextMap[16] = {
00177   0x00, 0x00, 0x00, 0x00,
00178   0x40, 0x35, 0xFF, 0xC0,
00179   0x7F, 0xFF, 0xFF, 0xE0,
00180   0x7F, 0xFF, 0xFF, 0xE0
00181 };
00182 
00183 void setFallbackCharEncoding(const QString& fallbackCharEnc)
00184 {
00185   f_allbackCharEnc = fallbackCharEnc;
00186 }
00187 
00188 QString fallbackCharEncoding()
00189 {
00190   return f_allbackCharEnc;
00191 }
00192 
00193 void setUseOutlookAttachmentEncoding( bool violateStandard )
00194 {
00195   u_seOutlookEncoding = violateStandard;
00196 }
00197 
00198 bool useOutlookAttachmentEncoding()
00199 {
00200   return u_seOutlookEncoding;
00201 }
00202 
00203 
00204 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00205                              const QByteArray &defaultCS, bool forceCS )
00206 {
00207   QByteArray result;
00208   QByteArray spaceBuffer;
00209   const char *scursor = src.constData();
00210   const char *send = scursor + src.length();
00211   bool onlySpacesSinceLastWord = false;
00212 
00213   while ( scursor != send ) {
00214      // space
00215     if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00216       spaceBuffer += *scursor++;
00217       continue;
00218     }
00219 
00220     // possible start of an encoded word
00221     if ( *scursor == '=' ) {
00222       QByteArray language;
00223       QString decoded;
00224       ++scursor;
00225       const char *start = scursor;
00226       if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00227         result += decoded.toUtf8();
00228         onlySpacesSinceLastWord = true;
00229         spaceBuffer.clear();
00230       } else {
00231         if ( onlySpacesSinceLastWord ) {
00232           result += spaceBuffer;
00233           onlySpacesSinceLastWord = false;
00234         }
00235         result += '=';
00236         scursor = start; // reset cursor after parsing failure
00237       }
00238       continue;
00239     } else {
00240       // unencoded data
00241       if ( onlySpacesSinceLastWord ) {
00242         result += spaceBuffer;
00243         onlySpacesSinceLastWord = false;
00244       }
00245       result += *scursor;
00246       ++scursor;
00247     }
00248   }
00249   // If there are any chars that couldn't be decoded in UTF-8,
00250   //  use the fallback charset if it exists
00251   const QString tryUtf8 = QString::fromUtf8( result );
00252   if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
00253     QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
00254     return codec->toUnicode( result );
00255   } else {
00256     return tryUtf8;
00257   }
00258 }
00259 
00260 QString decodeRFC2047String( const QByteArray &src )
00261 {
00262   QByteArray usedCS;
00263   return decodeRFC2047String( src, usedCS, "utf-8", false );
00264 }
00265 
00266 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00267                                 bool addressHeader, bool allow8BitHeaders )
00268 {
00269   QByteArray encoded8Bit, result;
00270   int start=0, end=0;
00271   bool nonAscii=false, ok=true, useQEncoding=false;
00272 
00273   // fromLatin1() is safe here, codecForName() uses toLatin1() internally
00274   const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
00275 
00276   QByteArray usedCS;
00277   if ( !ok ) {
00278     //no codec available => try local8Bit and hope the best ;-)
00279     usedCS = KGlobal::locale()->encoding();
00280     codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
00281   }
00282   else {
00283     Q_ASSERT( codec );
00284     if ( charset.isEmpty() )
00285       usedCS = codec->name();
00286     else
00287       usedCS = charset;
00288   }
00289 
00290   if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
00291     useQEncoding = true;
00292   }
00293 
00294   encoded8Bit = codec->fromUnicode( src );
00295 
00296   if ( allow8BitHeaders ) {
00297     return encoded8Bit;
00298   }
00299 
00300   uint encoded8BitLength = encoded8Bit.length();
00301   for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00302     if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
00303       start = i + 1;
00304     }
00305 
00306     // encode escape character, for japanese encodings...
00307     if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00308          ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00309       end = start;   // non us-ascii char found, now we determine where to stop encoding
00310       nonAscii = true;
00311       break;
00312     }
00313   }
00314 
00315   if ( nonAscii ) {
00316     while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00317       // we encode complete words
00318       end++;
00319     }
00320 
00321     for ( int x=end; x<encoded8Bit.length(); x++ ) {
00322       if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00323            ( addressHeader && ( strchr("\"()<>@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) {
00324         end = x;     // we found another non-ascii word
00325 
00326         while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00327           // we encode complete words
00328           end++;
00329         }
00330       }
00331     }
00332 
00333     result = encoded8Bit.left( start ) + "=?" + usedCS;
00334 
00335     if ( useQEncoding ) {
00336       result += "?Q?";
00337 
00338       char c, hexcode;// "Q"-encoding implementation described in RFC 2047
00339       for ( int i=start; i<end; i++ ) {
00340         c = encoded8Bit[i];
00341         if ( c == ' ' ) { // make the result readable with not MIME-capable readers
00342           result += '_';
00343         } else {
00344           if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
00345               ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||  // with "From" & "To" headers
00346               ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00347             result += c;
00348           } else {
00349             result += '=';                 // "stolen" from KMail ;-)
00350             hexcode = ((c & 0xF0) >> 4) + 48;
00351             if ( hexcode >= 58 ) {
00352               hexcode += 7;
00353             }
00354             result += hexcode;
00355             hexcode = (c & 0x0F) + 48;
00356             if ( hexcode >= 58 ) {
00357               hexcode += 7;
00358             }
00359             result += hexcode;
00360           }
00361         }
00362       }
00363     } else {
00364       result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00365     }
00366 
00367     result +="?=";
00368     result += encoded8Bit.right( encoded8Bit.length() - end );
00369   } else {
00370     result = encoded8Bit;
00371   }
00372 
00373   return result;
00374 }
00375 
00376 
00377 //-----------------------------------------------------------------------------
00378 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
00379 {
00380   if ( str.isEmpty() )
00381     return QByteArray();
00382 
00383   
00384   const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
00385   QByteArray latin;
00386   if ( charset == "us-ascii" )
00387     latin = str.toAscii();
00388   else if ( codec )
00389     latin = codec->fromUnicode( str );
00390   else
00391     latin = str.toLocal8Bit();
00392 
00393   char *l;
00394   for ( l = latin.data(); *l; ++l ) {
00395     if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) )
00396       // *l is control character or 8-bit char
00397       break;
00398   }
00399   if ( !*l )
00400     return latin;
00401 
00402   QByteArray result = charset + "''";
00403   for ( l = latin.data(); *l; ++l ) {
00404     bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' );
00405     if( !needsQuoting ) {
00406       const QByteArray especials = "()<>@,;:\"/[]?.= \033";
00407       int len = especials.length();
00408       for ( int i = 0; i < len; i++ )
00409         if ( *l == especials[i] ) {
00410           needsQuoting = true;
00411           break;
00412         }
00413     }
00414     if ( needsQuoting ) {
00415       result += '%';
00416       unsigned char hexcode;
00417       hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00418       if ( hexcode >= 58 )
00419         hexcode += 7;
00420       result += hexcode;
00421       hexcode = ( *l & 0x0F ) + 48;
00422       if ( hexcode >= 58 )
00423         hexcode += 7;
00424       result += hexcode;
00425     } else {
00426       result += *l;
00427     }
00428   }
00429   return result;
00430 }
00431 
00432 
00433 //-----------------------------------------------------------------------------
00434 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
00435   bool forceCS )
00436 {
00437   int p = str.indexOf('\'');
00438   if (p < 0) return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS  ))->toUnicode( str );
00439 
00440   
00441   QByteArray charset = str.left(p);
00442 
00443   QByteArray st = str.mid( str.lastIndexOf('\'') + 1 );
00444   
00445   char ch, ch2;
00446   p = 0;
00447   while (p < (int)st.length())
00448   {
00449     if (st.at(p) == 37)
00450     {
00451       // Only try to decode the percent-encoded character if the percent sign
00452       // is really followed by two other characters, see testcase at bug 163024
00453       if ( p + 2 < st.length() ) {
00454         ch = st.at(p+1) - 48;
00455         if (ch > 16)
00456           ch -= 7;
00457         ch2 = st.at(p+2) - 48;
00458         if (ch2 > 16)
00459           ch2 -= 7;
00460         st[p] = ch * 16 + ch2;
00461         st.remove( p+1, 2 );
00462       }
00463     }
00464     p++;
00465   }
00466   kDebug() << "Got pre-decoded:" << st;
00467   QString result;
00468   const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
00469   if ( !charsetcodec || forceCS )
00470     charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
00471 
00472   usedCS = charsetcodec->name();
00473   return charsetcodec->toUnicode( st );
00474 }
00475 
00476 QString decodeRFC2231String( const QByteArray &src )
00477 {
00478   QByteArray usedCS;
00479   return decodeRFC2231String( src, usedCS, "utf-8", false );
00480 }
00481 
00482 QByteArray uniqueString()
00483 {
00484   static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00485   time_t now;
00486   char p[11];
00487   int pos, ran;
00488   unsigned int timeval;
00489 
00490   p[10] = '\0';
00491   now = time( 0 );
00492   ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00493   timeval = (now / ran) + getpid();
00494 
00495   for ( int i=0; i<10; i++ ) {
00496     pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00497     //kDebug() << pos;
00498     p[i] = chars[pos];
00499   }
00500 
00501   QByteArray ret;
00502   ret.setNum( timeval );
00503   ret += '.';
00504   ret += p;
00505 
00506   return ret;
00507 }
00508 
00509 QByteArray multiPartBoundary()
00510 {
00511   return "nextPart" + uniqueString();
00512 }
00513 
00514 QByteArray unfoldHeader( const QByteArray &header )
00515 {
00516   QByteArray result;
00517   int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00518   while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00519     foldBegin = foldEnd = foldMid;
00520     // find the first space before the line-break
00521     while ( foldBegin > 0 ) {
00522       if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
00523         break;
00524       }
00525       --foldBegin;
00526     }
00527     // find the first non-space after the line-break
00528     while ( foldEnd <= header.length() - 1 ) {
00529       if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
00530         ++foldEnd;
00531       }
00532       else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' &&
00533                 header[foldEnd] == '=' && foldEnd + 2 < header.length() &&
00534                 ( ( header[foldEnd + 1] == '0' &&
00535                     header[foldEnd + 2] == '9' ) ||
00536                   ( header[foldEnd + 1] == '2' &&
00537                     header[foldEnd + 2] == '0' ) ) ) {
00538         // bug #86302: malformed header continuation starting with =09/=20
00539         foldEnd += 3;
00540       }
00541       else {
00542         break;
00543       }
00544     }
00545 
00546     result += header.mid( pos, foldBegin - pos );
00547     if ( foldEnd < header.length() -1 )
00548       result += ' ';
00549     pos = foldEnd;
00550   }
00551   result += header.mid( pos, header.length() - pos );
00552   return result;
00553 }
00554 
00555 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
00556 {
00557   int end = dataBegin;
00558   int len = src.length() - 1;
00559 
00560   if ( folded )
00561     *folded = false;
00562 
00563   if ( dataBegin < 0 ) {
00564     // Not found
00565     return -1;
00566   }
00567 
00568   if ( dataBegin > len ) {
00569     // No data available
00570     return len + 1;
00571   }
00572 
00573   // If the first line contains nothing, but the next line starts with a space
00574   // or a tab, that means a stupid mail client has made the first header field line
00575   // entirely empty, and has folded the rest to the next line(s).
00576   if ( src.at(end) == '\n' && end + 1 < len &&
00577        ( src[end+1] == ' ' || src[end+1] == '\t' ) ) {
00578 
00579     // Skip \n and first whitespace
00580     dataBegin += 2;
00581     end += 2;
00582   }
00583 
00584   if ( src.at(end) != '\n' ) {  // check if the header is not empty
00585     while ( true ) {
00586       end = src.indexOf( '\n', end + 1 );
00587       if ( end == -1 || end == len ) {
00588         // end of string
00589         break;
00590       }
00591       else if ( src[end+1] == ' ' || src[end+1] == '\t' ||
00592                 ( src[end+1] == '=' && end+3 <= len &&
00593                   ( ( src[end+2] == '0' && src[end+3] == '9' ) ||
00594                     ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) {
00595         // next line is header continuation or starts with =09/=20 (bug #86302)
00596         if ( folded )
00597           *folded = true;
00598       } else {
00599         // end of header (no header continuation)
00600         break;
00601       }
00602     }
00603   }
00604 
00605   if ( end < 0 ) {
00606     end = len + 1; //take the rest of the string
00607   }
00608   return end;
00609 }
00610 
00611 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00612 {
00613   QByteArray n = name;
00614   n.append( ':' );
00615   int begin = -1;
00616 
00617   if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00618     begin = 0;
00619   } else {
00620     n.prepend('\n');
00621     const char *p = strcasestr( src.constData(), n.constData() );
00622     if ( !p ) {
00623       begin = -1;
00624     } else {
00625       begin = p - src.constData();
00626       ++begin;
00627     }
00628   }
00629 
00630   if ( begin > -1) {     //there is a header with the given name
00631     dataBegin = begin + name.length() + 1; //skip the name
00632     // skip the usual space after the colon
00633     if ( src.at( dataBegin ) == ' ' ) {
00634       ++dataBegin;
00635     }
00636     end = findHeaderLineEnd( src, dataBegin, folded );
00637     return begin;
00638 
00639   } else {
00640     dataBegin = -1;
00641     return -1; //header not found
00642   }
00643 }
00644 
00645 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00646 {
00647   int begin, end;
00648   bool folded;
00649   indexOfHeader( src, name, end, begin, &folded );
00650 
00651   if ( begin >= 0 ) {
00652     if ( !folded ) {
00653       return src.mid( begin, end - begin );
00654     } else {
00655       QByteArray hdrValue = src.mid( begin, end - begin );
00656       return unfoldHeader( hdrValue );
00657     }
00658   } else {
00659     return QByteArray(); //header not found
00660   }
00661 }
00662 
00663 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00664 {
00665   int begin, end;
00666   bool folded;
00667   QList<QByteArray> result;
00668   QByteArray copySrc( src );
00669 
00670   indexOfHeader( copySrc, name, end, begin, &folded );
00671   while ( begin >= 0 ) {
00672     if ( !folded ) {
00673       result.append( copySrc.mid( begin, end - begin ) );
00674     } else {
00675       QByteArray hdrValue = copySrc.mid( begin, end - begin );
00676       result.append( unfoldHeader( hdrValue ) );
00677     }
00678 
00679     // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
00680     copySrc = copySrc.mid( end );
00681     indexOfHeader( copySrc, name, end, begin, &folded );
00682   }
00683 
00684   return result;
00685 }
00686 
00687 void removeHeader( QByteArray &header, const QByteArray &name )
00688 {
00689   int begin, end, dummy;
00690   begin = indexOfHeader( header, name, end, dummy );
00691   if ( begin >= 0 ) {
00692     header.remove( begin, end - begin + 1 );
00693   }
00694 }
00695 
00696 QByteArray CRLFtoLF( const QByteArray &s )
00697 {
00698   QByteArray ret = s;
00699   ret.replace( "\r\n", "\n" );
00700   return ret;
00701 }
00702 
00703 QByteArray CRLFtoLF( const char *s )
00704 {
00705   QByteArray ret = s;
00706   return CRLFtoLF( ret );
00707 }
00708 
00709 QByteArray LFtoCRLF( const QByteArray &s )
00710 {
00711   QByteArray ret = s;
00712   ret.replace( '\n', "\r\n" );
00713   return ret;
00714 }
00715 
00716 QByteArray LFtoCRLF( const char *s )
00717 {
00718   QByteArray ret = s;
00719   return LFtoCRLF( ret );
00720 }
00721 
00722 namespace {
00723 template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str )
00724 {
00725   bool inQuote = false;
00726   for ( int i = 0; i < str.length(); ++i ) {
00727     if ( str[i] == CharType( '"' ) ) {
00728       str.remove( i, 1 );
00729       i--;
00730       inQuote = !inQuote;
00731     } else {
00732       if ( inQuote && ( str[i] == CharType( '\\' ) ) ) {
00733         str.remove( i, 1 );
00734       }
00735     }
00736   }
00737 }
00738 }
00739 
00740 void removeQuots( QByteArray &str )
00741 {
00742   removeQuotesGeneric<QByteArray,char>( str );
00743 }
00744 
00745 void removeQuots( QString &str )
00746 {
00747   removeQuotesGeneric<QString,QLatin1Char>( str );
00748 }
00749 
00750 template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString>
00751 void addQuotes_impl( StringType &str, bool forceQuotes )
00752 {
00753   bool needsQuotes=false;
00754   for ( int i=0; i < str.length(); i++ ) {
00755     const CharType cur = str.at( i );
00756     if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
00757       needsQuotes = true;
00758     }
00759     if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) {
00760       str.insert( i, CharConverterType( '\\' ) );
00761       i++;
00762     }
00763   }
00764 
00765   if ( needsQuotes || forceQuotes ) {
00766     str.insert( 0, CharConverterType( '\"' ) );
00767     str.append( StringConverterType( "\"" ) );
00768   }
00769 }
00770 
00771 void addQuotes( QByteArray &str, bool forceQuotes )
00772 {
00773   addQuotes_impl<QByteArray,char,char,char*,QLatin1String>( str, forceQuotes );
00774 }
00775 
00776 void addQuotes( QString &str, bool forceQuotes )
00777 {
00778   addQuotes_impl<QString,QChar,QLatin1Char,QLatin1String,QString>( str, forceQuotes );
00779 }
00780 
00781 KMIME_EXPORT QString balanceBidiState( const QString &input )
00782 {
00783   const int LRO = 0x202D;
00784   const int RLO = 0x202E;
00785   const int LRE = 0x202A;
00786   const int RLE = 0x202B;
00787   const int PDF = 0x202C;
00788 
00789   QString result = input;
00790 
00791   int openDirChangers = 0;
00792   int numPDFsRemoved = 0;
00793   for ( int i = 0; i < input.length(); i++ ) {
00794     const ushort &code = input.at( i ).unicode();
00795     if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
00796       openDirChangers++;
00797     }
00798     else if ( code == PDF ) {
00799       if ( openDirChangers > 0 ) {
00800         openDirChangers--;
00801       }
00802       else {
00803         // One PDF too much, remove it
00804         kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
00805         result.remove( i - numPDFsRemoved, 1 );
00806         numPDFsRemoved++;
00807       }
00808     }
00809   }
00810 
00811   if ( openDirChangers > 0 ) {
00812     kWarning() << "Possible Unicode spoofing detected in" << input;
00813 
00814     // At PDF chars to the end until the correct state is restored.
00815     // As a special exception, when encountering quoted strings, place the PDF before
00816     // the last quote.
00817     for ( int i = openDirChangers; i > 0; i-- ) {
00818       if ( result.endsWith( QLatin1Char( '"' ) ) )
00819         result.insert( result.length() - 1, QChar( PDF ) );
00820       else
00821         result += QChar( PDF );
00822     }
00823   }
00824 
00825   return result;
00826 }
00827 
00828 QString removeBidiControlChars( const QString &input )
00829 {
00830   const int LRO = 0x202D;
00831   const int RLO = 0x202E;
00832   const int LRE = 0x202A;
00833   const int RLE = 0x202B;
00834   QString result = input;
00835   result.remove( LRO );
00836   result.remove( RLO );
00837   result.remove( LRE );
00838   result.remove( RLE );
00839   return result;
00840 }
00841 
00842 static bool isCryptoPart( Content* content )
00843 {
00844   if( !content->contentType( false ) )
00845     return false;
00846 
00847   if( content->contentType()->subType().toLower() == "octet-stream" &&
00848       !content->contentDisposition( false ) )
00849     return false;
00850 
00851   const Headers::ContentType *contentType = content->contentType();
00852   const QByteArray lowerSubType = contentType->subType().toLower();
00853   return ( contentType->mediaType().toLower() == "application" &&
00854          ( lowerSubType == "pgp-encrypted" ||
00855            lowerSubType == "pgp-signature" ||
00856            lowerSubType == "pkcs7-mime" ||
00857            lowerSubType == "pkcs7-signature" ||
00858            lowerSubType == "x-pkcs7-signature" ||
00859            ( lowerSubType == "octet-stream" &&
00860              content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) );
00861 }
00862 
00863 bool hasAttachment( Content* content )
00864 {
00865   if( !content )
00866     return false;
00867 
00868   bool emptyFilename = true;
00869   if( content->contentDisposition( false ) && !content->contentDisposition()->filename().isEmpty() )
00870     emptyFilename = false;
00871 
00872   if( emptyFilename && content->contentType( false ) && !content->contentType()->name().isEmpty() )
00873     emptyFilename = false;
00874 
00875   // ignore crypto parts
00876   if( !emptyFilename && !isCryptoPart( content ) )
00877     return true;
00878 
00879   // Ok, content itself is not an attachment. now we deal with multiparts
00880   if( content->contentType()->isMultipart() ) {
00881     Q_FOREACH( Content* child, content->contents() ) {
00882       if( hasAttachment( child ) )
00883         return true;
00884     }
00885   }
00886   
00887   return false;
00888 }
00889 
00890 bool isSigned( Message *message )
00891 {
00892   if ( !message )
00893     return false;
00894 
00895   const KMime::Headers::ContentType* const contentType = message->contentType();
00896   if ( contentType->isSubtype( "signed" ) ||
00897        contentType->isSubtype( "pgp-signature" ) ||
00898        contentType->isSubtype( "pkcs7-signature" ) ||
00899        contentType->isSubtype( "x-pkcs7-signature" ) ||
00900        message->mainBodyPart( "multipart/signed" ) ||
00901        message->mainBodyPart( "application/pgp-signature" ) ||
00902        message->mainBodyPart( "application/pkcs7-signature" ) ||
00903        message->mainBodyPart( "application/x-pkcs7-signature" ) ) {
00904     return true;
00905   }
00906 
00907   return false;
00908 }
00909 
00910 bool isEncrypted( Message *message )
00911 {
00912   if ( !message )
00913     return false;
00914 
00915   const KMime::Headers::ContentType* const contentType = message->contentType();
00916   if ( contentType->isSubtype( "encrypted" ) ||
00917        contentType->isSubtype( "pgp-encrypted" ) ||
00918        contentType->isSubtype( "pkcs7-mime" ) || 
00919        message->mainBodyPart( "multipart/encrypted" ) ||
00920        message->mainBodyPart( "application/pgp-encrypted" ) ||
00921        message->mainBodyPart( "application/pkcs7-mime" ) ) {
00922     return true;
00923   }
00924 
00925   return false;
00926 }
00927 
00928 bool isInvitation( Content *content )
00929 {
00930   if ( !content )
00931     return false;
00932 
00933   const KMime::Headers::ContentType* const contentType = content->contentType( false );
00934 
00935   if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) )
00936     return true;
00937 
00938   return false;
00939 }
00940 
00941 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.7.3
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal