• Skip to content
  • Skip to link menu
KDE 4.2 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KIMAP Library

rfccodecs.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  *
00003  *   rfccodecs.cpp - handler for various rfc/mime encodings
00004  *   Copyright (C) 2000 s.carstens@gmx.de
00005  *
00006  *   This library is free software; you can redistribute it and/or
00007  *   modify it under the terms of the GNU Library General Public
00008  *   License as published by the Free Software Foundation; either
00009  *   version 2 of the License, or (at your option) any later version.
00010  *
00011  *   This library is distributed in the hope that it will be useful,
00012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *   Library General Public License for more details.
00015  *
00016  *   You should have received a copy of the GNU Library General Public License
00017  *   along with this library; see the file COPYING.LIB.  If not, write to
00018  *   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  *   Boston, MA 02110-1301, USA.
00020  *
00021  *********************************************************************/
00033 #include "rfccodecs.h"
00034 
00035 #include <ctype.h>
00036 #include <sys/types.h>
00037 
00038 #include <stdio.h>
00039 #include <stdlib.h>
00040 
00041 #include <QtCore/QTextCodec>
00042 #include <QtCore/QBuffer>
00043 #include <QtCore/QRegExp>
00044 #include <QtCore/QByteArray>
00045 #include <QtCore/QLatin1Char>
00046 #include <kcodecs.h>
00047 
00048 using namespace KIMAP;
00049 
00050 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
00051 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
00052 
00053 //@cond PRIVATE
00054 static const unsigned char base64chars[] =
00055   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
00056 #define UNDEFINED 64
00057 #define MAXLINE  76
00058 static const char especials[17] = "()<>@,;:\"/[]?.= ";
00059 
00060 /* UTF16 definitions */
00061 #define UTF16MASK       0x03FFUL
00062 #define UTF16SHIFT      10
00063 #define UTF16BASE       0x10000UL
00064 #define UTF16HIGHSTART  0xD800UL
00065 #define UTF16HIGHEND    0xDBFFUL
00066 #define UTF16LOSTART    0xDC00UL
00067 #define UTF16LOEND      0xDFFFUL
00068 //@endcond
00069 
00070 //-----------------------------------------------------------------------------
00071 QString KIMAP::decodeImapFolderName( const QString &inSrc )
00072 {
00073   unsigned char c, i, bitcount;
00074   unsigned long ucs4, utf16, bitbuf;
00075   unsigned char base64[256], utf8[6];
00076   unsigned int srcPtr = 0;
00077   QByteArray dst;
00078   QByteArray src = inSrc.toAscii ();
00079   uint srcLen = inSrc.length();
00080 
00081   /* initialize modified base64 decoding table */
00082   memset( base64, UNDEFINED, sizeof( base64 ) );
00083   for ( i = 0; i < sizeof( base64chars ); ++i ) {
00084     base64[(int)base64chars[i]] = i;
00085   }
00086 
00087   /* loop until end of string */
00088   while ( srcPtr < srcLen ) {
00089     c = src[srcPtr++];
00090     /* deal with literal characters and &- */
00091     if ( c != '&' || src[srcPtr] == '-' ) {
00092       /* encode literally */
00093       dst += c;
00094       /* skip over the '-' if this is an &- sequence */
00095       if ( c == '&' ) {
00096         srcPtr++;
00097       }
00098     } else {
00099       /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
00100       bitbuf = 0;
00101       bitcount = 0;
00102       ucs4 = 0;
00103       while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) {
00104         ++srcPtr;
00105         bitbuf = ( bitbuf << 6 ) | c;
00106         bitcount += 6;
00107         /* enough bits for a UTF-16 character? */
00108         if ( bitcount >= 16 ) {
00109           bitcount -= 16;
00110           utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff;
00111           /* convert UTF16 to UCS4 */
00112           if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
00113             ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
00114             continue;
00115           } else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
00116             ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
00117           } else {
00118             ucs4 = utf16;
00119           }
00120           /* convert UTF-16 range of UCS4 to UTF-8 */
00121           if ( ucs4 <= 0x7fUL ) {
00122             utf8[0] = ucs4;
00123             i = 1;
00124           } else if ( ucs4 <= 0x7ffUL ) {
00125             utf8[0] = 0xc0 | ( ucs4 >> 6 );
00126             utf8[1] = 0x80 | ( ucs4 & 0x3f );
00127             i = 2;
00128           } else if ( ucs4 <= 0xffffUL ) {
00129             utf8[0] = 0xe0 | ( ucs4 >> 12 );
00130             utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
00131             utf8[2] = 0x80 | ( ucs4 & 0x3f );
00132             i = 3;
00133           } else {
00134             utf8[0] = 0xf0 | ( ucs4 >> 18 );
00135             utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f );
00136             utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
00137             utf8[3] = 0x80 | ( ucs4 & 0x3f );
00138             i = 4;
00139           }
00140           /* copy it */
00141           for ( c = 0; c < i; ++c ) {
00142             dst += utf8[c];
00143           }
00144         }
00145       }
00146       /* skip over trailing '-' in modified UTF-7 encoding */
00147       if ( src[srcPtr] == '-' ) {
00148         ++srcPtr;
00149       }
00150     }
00151   }
00152   return QString::fromUtf8( dst.data () );
00153 }
00154 
00155 //-----------------------------------------------------------------------------
00156 QString KIMAP::quoteIMAP( const QString &src )
00157 {
00158   uint len = src.length();
00159   QString result;
00160   result.reserve( 2 * len );
00161   for ( unsigned int i = 0; i < len; i++ ) {
00162     if ( src[i] == '"' || src[i] == '\\' ) {
00163       result += '\\';
00164     }
00165     result += src[i];
00166   }
00167   //result.squeeze(); - unnecessary and slow
00168   return result;
00169 }
00170 
00171 //-----------------------------------------------------------------------------
00172 QString KIMAP::encodeImapFolderName( const QString &inSrc )
00173 {
00174   unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
00175   unsigned int ucs4, bitbuf;
00176   QByteArray src = inSrc.toUtf8 ();
00177   QString dst;
00178 
00179   int srcPtr = 0;
00180   utf7mode = 0;
00181   utf8total = 0;
00182   bitstogo = 0;
00183   utf8pos = 0;
00184   bitbuf = 0;
00185   ucs4 = 0;
00186   while ( srcPtr < src.length () ) {
00187     c = (unsigned char)src[srcPtr++];
00188     /* normal character? */
00189     if ( c >= ' ' && c <= '~' ) {
00190       /* switch out of UTF-7 mode */
00191       if ( utf7mode ) {
00192         if ( bitstogo ) {
00193           dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
00194           bitstogo = 0;
00195         }
00196         dst += '-';
00197         utf7mode = 0;
00198       }
00199       dst += c;
00200       /* encode '&' as '&-' */
00201       if ( c == '&' ) {
00202         dst += '-';
00203       }
00204       continue;
00205     }
00206     /* switch to UTF-7 mode */
00207     if ( !utf7mode ) {
00208       dst += '&';
00209       utf7mode = 1;
00210     }
00211     /* Encode US-ASCII characters as themselves */
00212     if ( c < 0x80 ) {
00213       ucs4 = c;
00214       utf8total = 1;
00215     } else if ( utf8total ) {
00216       /* save UTF8 bits into UCS4 */
00217       ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL );
00218       if ( ++utf8pos < utf8total ) {
00219         continue;
00220       }
00221     } else {
00222       utf8pos = 1;
00223       if ( c < 0xE0 ) {
00224         utf8total = 2;
00225         ucs4 = c & 0x1F;
00226       } else if ( c < 0xF0 ) {
00227         utf8total = 3;
00228         ucs4 = c & 0x0F;
00229       } else {
00230         /* NOTE: can't convert UTF8 sequences longer than 4 */
00231         utf8total = 4;
00232         ucs4 = c & 0x03;
00233       }
00234       continue;
00235     }
00236     /* loop to split ucs4 into two utf16 chars if necessary */
00237     utf8total = 0;
00238     do
00239     {
00240       if ( ucs4 >= UTF16BASE ) {
00241         ucs4 -= UTF16BASE;
00242         bitbuf =
00243           ( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
00244         ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
00245         utf16flag = 1;
00246       } else {
00247         bitbuf = ( bitbuf << 16 ) | ucs4;
00248         utf16flag = 0;
00249       }
00250       bitstogo += 16;
00251       /* spew out base64 */
00252       while ( bitstogo >= 6 ) {
00253         bitstogo -= 6;
00254         dst +=
00255           base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F];
00256       }
00257     }
00258     while ( utf16flag );
00259   }
00260   /* if in UTF-7 mode, finish in ASCII */
00261   if ( utf7mode ) {
00262     if ( bitstogo ) {
00263       dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
00264     }
00265     dst += '-';
00266   }
00267   return quoteIMAP( dst );
00268 }
00269 
00270 //-----------------------------------------------------------------------------
00271 QTextCodec *KIMAP::codecForName( const QString &str )
00272 {
00273   if ( str.isEmpty () ) {
00274     return 0;
00275   }
00276   return QTextCodec::codecForName ( str.toLower ().
00277                                     replace ( "windows", "cp" ).toLatin1 () );
00278 }
00279 
00280 //-----------------------------------------------------------------------------
00281 const QString KIMAP::decodeRFC2047String( const QString &str )
00282 {
00283   QString throw_away;
00284 
00285   return decodeRFC2047String( str, throw_away );
00286 }
00287 
00288 //-----------------------------------------------------------------------------
00289 const QString KIMAP::decodeRFC2047String( const QString &str,
00290                                           QString &charset )
00291 {
00292   QString throw_away;
00293 
00294   return decodeRFC2047String( str, charset, throw_away );
00295 }
00296 
00297 //-----------------------------------------------------------------------------
00298 const QString KIMAP::decodeRFC2047String( const QString &str,
00299                                           QString &charset,
00300                                           QString &language )
00301 {
00302   //do we have a rfc string
00303   if ( !str.contains( "=?" ) ) {
00304     return str;
00305   }
00306 
00307   // FIXME get rid of the conversion?
00308   QByteArray aStr = str.toAscii ();  // QString.length() means Unicode chars
00309   QByteArray result;
00310   char *pos, *beg, *end, *mid = 0;
00311   QByteArray cstr;
00312   char encoding = 0, ch;
00313   bool valid;
00314   const int maxLen = 200;
00315   int i;
00316 
00317 //  result.truncate(aStr.length());
00318   for ( pos = aStr.data (); *pos; pos++ ) {
00319     if ( pos[0] != '=' || pos[1] != '?' ) {
00320       result += *pos;
00321       continue;
00322     }
00323     beg = pos + 2;
00324     end = beg;
00325     valid = true;
00326     // parse charset name
00327     for ( i = 2, pos += 2;
00328           i < maxLen &&
00329               ( *pos != '?' && ( ispunct( *pos ) || isalnum ( *pos ) ) );
00330           i++ )
00331       pos++;
00332     if ( *pos != '?' || i < 4 || i >= maxLen ) {
00333       valid = false;
00334     } else {
00335       charset = QByteArray( beg, i - 1 );  // -2 + 1 for the zero
00336       int pt = charset.lastIndexOf( '*' );
00337       if ( pt != -1 ) {
00338         // save language for later usage
00339         language = charset.right( charset.length () - pt - 1 );
00340 
00341         // tie off language as defined in rfc2047
00342         charset.truncate( pt );
00343       }
00344       // get encoding and check delimiting question marks
00345       encoding = toupper( pos[1] );
00346       if ( pos[2] != '?' ||
00347            ( encoding != 'Q' && encoding != 'B' &&
00348              encoding != 'q' && encoding != 'b' ) ) {
00349         valid = false;
00350       }
00351       pos += 3;
00352       i += 3;
00353 //  kDebug() << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
00354     }
00355     if ( valid ) {
00356       mid = pos;
00357       // search for end of encoded part
00358       while ( i < maxLen && *pos && !( *pos == '?' && *( pos + 1 ) == '=' ) ) {
00359         i++;
00360         pos++;
00361       }
00362       end = pos + 2;//end now points to the first char after the encoded string
00363       if ( i >= maxLen || !*pos ) {
00364         valid = false;
00365       }
00366     }
00367     if ( valid ) {
00368       ch = *pos;
00369       *pos = '\0';
00370       cstr = QByteArray (mid).left( (int)( mid - pos - 1 ) );
00371       if ( encoding == 'Q' ) {
00372         // decode quoted printable text
00373         for ( i = cstr.length () - 1; i >= 0; i-- ) {
00374           if ( cstr[i] == '_' ) {
00375             cstr[i] = ' ';
00376           }
00377         }
00378 //    kDebug() << "before QP '"
00379 //    << cstr << "'";
00380         cstr = KCodecs::quotedPrintableDecode( cstr );
00381 //    kDebug() << "after QP '"
00382 //    << cstr << "'";
00383       } else {
00384         // decode base64 text
00385         cstr = QByteArray::fromBase64( cstr );
00386       }
00387       *pos = ch;
00388       int len = cstr.length();
00389       for ( i = 0; i < len; i++ ) {
00390         result += cstr[i];
00391       }
00392 
00393       pos = end - 1;
00394     } else {
00395 //    kDebug() << "invalid";
00396       //result += "=?";
00397       //pos = beg -1; // because pos gets increased shortly afterwards
00398       pos = beg - 2;
00399       result += *pos++;
00400       result += *pos;
00401     }
00402   }
00403   if ( !charset.isEmpty () ) {
00404     QTextCodec *aCodec = codecForName( charset.toAscii () );
00405     if ( aCodec ) {
00406 //    kDebug() << "Codec is" << aCodec->name();
00407       return aCodec->toUnicode( result );
00408     }
00409   }
00410   return result;
00411 }
00412 
00413 //-----------------------------------------------------------------------------
00414 const QString KIMAP::encodeRFC2047String( const QString &str )
00415 {
00416   return encodeRFC2047String( str.toLatin1() );
00417 }
00418 
00419 //-----------------------------------------------------------------------------
00420 const QByteArray KIMAP::encodeRFC2047String( const QByteArray &str )
00421 {
00422   if ( str.isEmpty () ) {
00423     return str;
00424   }
00425 
00426   const signed char *latin =
00427     reinterpret_cast<const signed char *>
00428     ( str.data() ), *l, *start, *stop;
00429   char hexcode;
00430   int numQuotes, i;
00431   int rptr = 0;
00432   // My stats show this number results in 12 resize() out of 73,000
00433   int resultLen = 3 * str.length() / 2;
00434   QByteArray result( resultLen, '\0' );
00435 
00436   while ( *latin ) {
00437     l = latin;
00438     start = latin;
00439     while ( *l ) {
00440       if ( *l == 32 ) {
00441         start = l + 1;
00442       }
00443       if ( *l < 0 ) {
00444         break;
00445       }
00446       l++;
00447     }
00448     if ( *l ) {
00449       numQuotes = 1;
00450       while ( *l ) {
00451         /* The encoded word must be limited to 75 character */
00452         for ( i = 0; i < 16; i++ ) {
00453           if ( *l == especials[i] ) {
00454             numQuotes++;
00455           }
00456         }
00457         if ( *l < 0 ) {
00458           numQuotes++;
00459         }
00460         /* Stop after 58 = 75 - 17 characters or at "<user@host..." */
00461         if ( l - start + 2 * numQuotes >= 58 || *l == 60 ) {
00462           break;
00463         }
00464         l++;
00465       }
00466       if ( *l ) {
00467         stop = l - 1;
00468         while ( stop >= start && *stop != 32 ) {
00469           stop--;
00470         }
00471         if ( stop <= start ) {
00472           stop = l;
00473         }
00474       } else {
00475         stop = l;
00476       }
00477       if ( resultLen - rptr - 1 <= start -  latin + 1 + 16 ) {
00478         // =?iso-88...
00479         resultLen += ( start - latin + 1 ) * 2 + 20; // more space
00480         result.resize( resultLen );
00481       }
00482       while ( latin < start ) {
00483         result[rptr++] = *latin;
00484         latin++;
00485       }
00486       result.replace( rptr, 15, "=?iso-8859-1?q?" );
00487       rptr += 15;
00488       if ( resultLen - rptr - 1 <= 3 * ( stop - latin + 1 ) ) {
00489         resultLen += ( stop - latin + 1 ) * 4 + 20; // more space
00490         result.resize( resultLen );
00491       }
00492       while ( latin < stop ) {
00493         // can add up to 3 chars/iteration
00494         numQuotes = 0;
00495         for ( i = 0; i < 16; i++ ) {
00496           if ( *latin == especials[i] ) {
00497             numQuotes = 1;
00498           }
00499         }
00500         if ( *latin < 0 ) {
00501           numQuotes = 1;
00502         }
00503         if ( numQuotes ) {
00504           result[rptr++] = '=';
00505           hexcode = ( ( *latin & 0xF0 ) >> 4 ) + 48;
00506           if ( hexcode >= 58 ) {
00507             hexcode += 7;
00508           }
00509           result[rptr++] = hexcode;
00510           hexcode = ( *latin & 0x0F ) + 48;
00511           if ( hexcode >= 58 ) {
00512             hexcode += 7;
00513           }
00514           result[rptr++] = hexcode;
00515         } else {
00516           result[rptr++] = *latin;
00517         }
00518         latin++;
00519       }
00520       result[rptr++] = '?';
00521       result[rptr++] = '=';
00522     } else {
00523       while ( *latin ) {
00524         if ( rptr == resultLen - 1 ) {
00525           resultLen += 30;
00526           result.resize( resultLen );
00527         }
00528         result[rptr++] = *latin;
00529         latin++;
00530       }
00531     }
00532   }
00533   result[rptr] = 0;
00534   return result;
00535 }
00536 
00537 //-----------------------------------------------------------------------------
00538 const QString KIMAP::encodeRFC2231String( const QString &str )
00539 {
00540   if ( str.isEmpty () ) {
00541     return str;
00542   }
00543 
00544   signed char *latin = (signed char *)calloc( 1, str.length () + 1 );
00545   char *latin_us = (char *)latin;
00546   strcpy( latin_us, str.toLatin1 () );
00547   signed char *l = latin;
00548   char hexcode;
00549   int i;
00550   bool quote;
00551   while ( *l ) {
00552     if ( *l < 0 ) {
00553       break;
00554     }
00555     l++;
00556   }
00557   if ( !*l ) {
00558     free( latin );
00559     return str;
00560   }
00561   QByteArray result;
00562   l = latin;
00563   while ( *l ) {
00564     quote = *l < 0;
00565     for ( i = 0; i < 16; i++ ) {
00566       if ( *l == especials[i] ) {
00567         quote = true;
00568       }
00569     }
00570     if ( quote ) {
00571       result += '%';
00572       hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00573       if ( hexcode >= 58 ) {
00574         hexcode += 7;
00575       }
00576       result += hexcode;
00577       hexcode = ( *l & 0x0F ) + 48;
00578       if ( hexcode >= 58 ) {
00579         hexcode += 7;
00580       }
00581       result += hexcode;
00582     } else {
00583       result += *l;
00584     }
00585     l++;
00586   }
00587   free( latin );
00588   return result;
00589 }
00590 
00591 //-----------------------------------------------------------------------------
00592 const QString KIMAP::decodeRFC2231String( const QString &str )
00593 {
00594   int p = str.indexOf ( '\'' );
00595 
00596   //see if it is an rfc string
00597   if ( p < 0 ) {
00598     return str;
00599   }
00600 
00601   int l = str.lastIndexOf( '\'' );
00602 
00603   //second is language
00604   if ( p >= l ) {
00605     return str;
00606   }
00607 
00608   //first is charset or empty
00609   QString charset = str.left ( p );
00610   QString st = str.mid ( l + 1 );
00611   QString language = str.mid ( p + 1, l - p - 1 );
00612 
00613   //kDebug() << "Charset:" << charset << "Language:" << language;
00614 
00615   char ch, ch2;
00616   p = 0;
00617   while ( p < (int) st.length () ) {
00618     if ( st.at( p ) == 37 ) {
00619       ch = st.at( p + 1 ).toLatin1 () - 48;
00620       if ( ch > 16 ) {
00621         ch -= 7;
00622       }
00623       ch2 = st.at( p + 2 ).toLatin1 () - 48;
00624       if ( ch2 > 16 ) {
00625         ch2 -= 7;
00626       }
00627       st.replace( p, 1, ch * 16 + ch2 );
00628       st.remove ( p + 1, 2 );
00629     }
00630     p++;
00631   }
00632   return st;
00633 }

KIMAP Library

Skip menu "KIMAP Library"
  • Main Page
  • File List
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  • kabc
  • kblog
  • kcal
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.5.7.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal