• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdepimlibs-4.10.5 API Reference
  • KDE Home
  • Contact Us
 

kpimutils

  • kpimutils
linklocator.cpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2002 Dave Corrie <kde@davecorrie.com>
3 
4  This library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Library General Public
6  License as published by the Free Software Foundation; either
7  version 2 of the License, or (at your option) any later version.
8 
9  This library is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  Library General Public License for more details.
13 
14  You should have received a copy of the GNU Library General Public License
15  along with this library; see the file COPYING.LIB. If not, write to
16  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  Boston, MA 02110-1301, USA.
18 */
29 #include "linklocator.h"
30 
31 #include <KEmoticons>
32 
33 #include <QtCore/QCoreApplication>
34 #include <QtCore/QFile>
35 #include <QtCore/QRegExp>
36 #include <QTextDocument>
37 
38 #include <climits>
39 
40 using namespace KPIMUtils;
41 
46 //@cond PRIVATE
47 class KPIMUtils::LinkLocator::Private
48 {
49  public:
50  int mMaxUrlLen;
51  int mMaxAddressLen;
52 };
53 //@endcond
54 
55 // Use a static for this as calls to the KEmoticons constructor are expensive.
56 K_GLOBAL_STATIC( KEmoticons, sEmoticons )
57 
58 LinkLocator::LinkLocator( const QString &text, int pos )
59  : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private )
60 {
61  d->mMaxUrlLen = 4096;
62  d->mMaxAddressLen = 255;
63 
64  // If you change either of the above values for maxUrlLen or
65  // maxAddressLen, then please also update the documentation for
66  // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
67  // default values used for the maxUrlLen/maxAddressLen parameters
68  // of convertToHtml().
69 }
70 
71 LinkLocator::~LinkLocator()
72 {
73  delete d;
74 }
75 
76 void LinkLocator::setMaxUrlLen( int length )
77 {
78  d->mMaxUrlLen = length;
79 }
80 
81 int LinkLocator::maxUrlLen() const
82 {
83  return d->mMaxUrlLen;
84 }
85 
86 void LinkLocator::setMaxAddressLen( int length )
87 {
88  d->mMaxAddressLen = length;
89 }
90 
91 int LinkLocator::maxAddressLen() const
92 {
93  return d->mMaxAddressLen;
94 }
95 
96 QString LinkLocator::getUrl()
97 {
98  QString url;
99  if ( atUrl() ) {
100  // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C
101  // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall
102  // be allowed and should be ignored when the URI is extracted.
103 
104  // This implementation follows this recommendation and
105  // allows the URL to be enclosed within different kind of brackets/quotes
106  // If an URL is enclosed, whitespace characters are allowed and removed, otherwise
107  // the URL ends with the first whitespace
108  // Also, if the URL is enclosed in brackets, the URL itself is not allowed
109  // to contain the closing bracket, as this would be detected as the end of the URL
110 
111  QChar beforeUrl, afterUrl;
112 
113  // detect if the url has been surrounded by brackets or quotes
114  if ( mPos > 0 ) {
115  beforeUrl = mText[mPos - 1];
116 
117  /*if ( beforeUrl == '(' ) {
118  afterUrl = ')';
119  } else */if ( beforeUrl == '[' ) {
120  afterUrl = ']';
121  } else if ( beforeUrl == '<' ) {
122  afterUrl = '>';
123  } else if ( beforeUrl == '>' ) { // for e.g. <link>http://.....</link>
124  afterUrl = '<';
125  } else if ( beforeUrl == '"' ) {
126  afterUrl = '"';
127  }
128  }
129 
130  url.reserve( maxUrlLen() ); // avoid allocs
131  int start = mPos;
132  while ( ( mPos < (int)mText.length() ) &&
133  ( mText[mPos].isPrint() || mText[mPos].isSpace() ) &&
134  ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) ||
135  ( !afterUrl.isNull() && mText[mPos] != afterUrl ) ) ) {
136  if ( !mText[mPos].isSpace() ) { // skip whitespace
137  url.append( mText[mPos] );
138  if ( url.length() > maxUrlLen() ) {
139  break;
140  }
141  }
142 
143  mPos++;
144  }
145 
146  if ( isEmptyUrl( url ) || ( url.length() > maxUrlLen() ) ) {
147  mPos = start;
148  url = "";
149  } else {
150  --mPos;
151  }
152  }
153 
154  // HACK: This is actually against the RFC. However, most people don't properly escape the URL in
155  // their text with "" or <>. That leads to people writing an url, followed immediatley by
156  // a dot to finish the sentence. That would lead the parser to include the dot in the url,
157  // even though that is not wanted. So work around that here.
158  // Most real-life URLs hopefully don't end with dots or commas.
159  QList<QChar> wordBoundaries;
160  wordBoundaries << '.' << ',' << ':' << '!' << '?' << ')' << '>';
161  if ( url.length() > 1 ) {
162  do {
163  if ( wordBoundaries.contains( url.at( url.length() - 1 ) ) ) {
164  url.chop( 1 );
165  --mPos;
166  } else {
167  break;
168  }
169  } while( url.length() > 1 );
170  }
171 
172  return url;
173 }
174 
175 // keep this in sync with KMMainWin::slotUrlClicked()
176 bool LinkLocator::atUrl() const
177 {
178  // the following characters are allowed in a dot-atom (RFC 2822):
179  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
180  const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
181 
182  // the character directly before the URL must not be a letter, a number or
183  // any other character allowed in a dot-atom (RFC 2822).
184  if ( ( mPos > 0 ) &&
185  ( mText[mPos-1].isLetterOrNumber() ||
186  ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) {
187  return false;
188  }
189 
190  QChar ch = mText[mPos];
191  return
192  ( ch == 'h' && ( mText.mid( mPos, 7 ) == QLatin1String( "http://" ) ||
193  mText.mid( mPos, 8 ) == QLatin1String( "https://" ) ) ) ||
194  ( ch == 'v' && mText.mid( mPos, 6 ) == QLatin1String( "vnc://" ) ) ||
195  ( ch == 'f' && ( mText.mid( mPos, 7 ) == QLatin1String( "fish://" ) ||
196  mText.mid( mPos, 6 ) == QLatin1String( "ftp://" ) ||
197  mText.mid( mPos, 7 ) == QLatin1String( "ftps://" ) ) ) ||
198  ( ch == 's' && ( mText.mid( mPos, 7 ) == QLatin1String( "sftp://" ) ||
199  mText.mid( mPos, 6 ) == QLatin1String( "smb://" ) ) ) ||
200  ( ch == 'm' && mText.mid( mPos, 7 ) == QLatin1String( "mailto:" ) ) ||
201  ( ch == 'w' && mText.mid( mPos, 4 ) == QLatin1String( "www." ) ) ||
202  ( ch == 'f' && ( mText.mid( mPos, 4 ) == QLatin1String( "ftp." ) ||
203  mText.mid( mPos, 7 ) == QLatin1String( "file://" ) ) )||
204  ( ch == 'n' && mText.mid( mPos, 5 ) == QLatin1String( "news:" ) );
205 }
206 
207 bool LinkLocator::isEmptyUrl( const QString &url ) const
208 {
209  return url.isEmpty() ||
210  url == QLatin1String( "http://" ) ||
211  url == QLatin1String( "https://" ) ||
212  url == QLatin1String( "fish://" ) ||
213  url == QLatin1String( "ftp://" ) ||
214  url == QLatin1String( "ftps://" ) ||
215  url == QLatin1String( "sftp://" ) ||
216  url == QLatin1String( "smb://" ) ||
217  url == QLatin1String( "vnc://" ) ||
218  url == QLatin1String( "mailto" ) ||
219  url == QLatin1String( "www" ) ||
220  url == QLatin1String( "ftp" ) ||
221  url == QLatin1String( "news" ) ||
222  url == QLatin1String( "news://" );
223 }
224 
225 QString LinkLocator::getEmailAddress()
226 {
227  QString address;
228 
229  if ( mText[mPos] == '@' ) {
230  // the following characters are allowed in a dot-atom (RFC 2822):
231  // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
232  const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" );
233 
234  // determine the local part of the email address
235  int start = mPos - 1;
236  while ( start >= 0 && mText[start].unicode() < 128 &&
237  ( mText[start].isLetterOrNumber() ||
238  mText[start] == '@' || // allow @ to find invalid email addresses
239  allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) {
240  if ( mText[start] == '@' ) {
241  return QString(); // local part contains '@' -> no email address
242  }
243  --start;
244  }
245  ++start;
246  // we assume that an email address starts with a letter or a digit
247  while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) {
248  ++start;
249  }
250  if ( start == mPos ) {
251  return QString(); // local part is empty -> no email address
252  }
253 
254  // determine the domain part of the email address
255  int dotPos = INT_MAX;
256  int end = mPos + 1;
257  while ( end < (int)mText.length() &&
258  ( mText[end].isLetterOrNumber() ||
259  mText[end] == '@' || // allow @ to find invalid email addresses
260  mText[end] == '.' ||
261  mText[end] == '-' ) ) {
262  if ( mText[end] == '@' ) {
263  return QString(); // domain part contains '@' -> no email address
264  }
265  if ( mText[end] == '.' ) {
266  dotPos = qMin( dotPos, end ); // remember index of first dot in domain
267  }
268  ++end;
269  }
270  // we assume that an email address ends with a letter or a digit
271  while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) {
272  --end;
273  }
274  if ( end == mPos ) {
275  return QString(); // domain part is empty -> no email address
276  }
277  if ( dotPos >= end ) {
278  return QString(); // domain part doesn't contain a dot
279  }
280 
281  if ( end - start > maxAddressLen() ) {
282  return QString(); // too long -> most likely no email address
283  }
284  address = mText.mid( start, end - start );
285 
286  mPos = end - 1;
287  }
288  return address;
289 }
290 
291 QString LinkLocator::convertToHtml( const QString &plainText, int flags,
292  int maxUrlLen, int maxAddressLen )
293 {
294  LinkLocator locator( plainText );
295  locator.setMaxUrlLen( maxUrlLen );
296  locator.setMaxAddressLen( maxAddressLen );
297 
298  QString str;
299  QString result( (QChar*)0, (int)locator.mText.length() * 2 );
300  QChar ch;
301  int x;
302  bool startOfLine = true;
303  QString emoticon;
304 
305  for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length();
306  locator.mPos++, x++ ) {
307  ch = locator.mText[locator.mPos];
308  if ( flags & PreserveSpaces ) {
309  if ( ch == ' ' ) {
310  if ( locator.mPos + 1 < locator.mText.length() ) {
311  if ( locator.mText[locator.mPos + 1] != ' ' ) {
312 
313  // A single space, make it breaking if not at the start or end of the line
314  const bool endOfLine = locator.mText[locator.mPos + 1] == '\n';
315  if ( !startOfLine && !endOfLine ) {
316  result += ' ';
317  } else {
318  result += "&nbsp;";
319  }
320  } else {
321 
322  // Whitespace of more than one space, make it all non-breaking
323  while ( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == ' ' ) {
324  result += "&nbsp;";
325  locator.mPos++;
326  x++;
327  }
328 
329  // We incremented once to often, undo that
330  locator.mPos--;
331  x--;
332  }
333  } else {
334  // Last space in the text, it is non-breaking
335  result += "&nbsp;";
336  }
337 
338  if ( startOfLine ) {
339  startOfLine = false;
340  }
341  continue;
342  } else if ( ch == '\t' ) {
343  do {
344  result += "&nbsp;";
345  x++;
346  } while ( ( x & 7 ) != 0 );
347  x--;
348  startOfLine = false;
349  continue;
350  }
351  }
352  if ( ch == '\n' ) {
353  result += "<br />\n"; // Keep the \n, so apps can figure out the quoting levels correctly.
354  startOfLine = true;
355  x = -1;
356  continue;
357  }
358 
359  startOfLine = false;
360  if ( ch == '&' ) {
361  result += "&amp;";
362  } else if ( ch == '"' ) {
363  result += "&quot;";
364  } else if ( ch == '<' ) {
365  result += "&lt;";
366  } else if ( ch == '>' ) {
367  result += "&gt;";
368  } else {
369  const int start = locator.mPos;
370  if ( !( flags & IgnoreUrls ) ) {
371  str = locator.getUrl();
372  if ( !str.isEmpty() ) {
373  QString hyperlink;
374  if ( str.left( 4 ) == "www." ) {
375  hyperlink = "http://" + str;
376  } else if ( str.left( 4 ) == "ftp." ) {
377  hyperlink = "ftp://" + str;
378  } else {
379  hyperlink = str;
380  }
381 
382  result += "<a href=\"" + hyperlink + "\">" + Qt::escape( str ) + "</a>";
383  x += locator.mPos - start;
384  continue;
385  }
386  str = locator.getEmailAddress();
387  if ( !str.isEmpty() ) {
388  // len is the length of the local part
389  int len = str.indexOf( '@' );
390  QString localPart = str.left( len );
391 
392  // remove the local part from the result (as '&'s have been expanded to
393  // &amp; we have to take care of the 4 additional characters per '&')
394  result.truncate( result.length() -
395  len - ( localPart.count( '&' ) * 4 ) );
396  x -= len;
397 
398  result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
399  x += str.length() - 1;
400  continue;
401  }
402  }
403  if ( flags & HighlightText ) {
404  str = locator.highlightedText();
405  if ( !str.isEmpty() ) {
406  result += str;
407  x += locator.mPos - start;
408  continue;
409  }
410  }
411  result += ch;
412  }
413  }
414 
415  if ( flags & ReplaceSmileys ) {
416  QStringList exclude;
417  exclude << "(c)" << "(C)" << "&gt;:-(" << "&gt;:(" << "(B)" << "(b)" << "(P)" << "(p)";
418  exclude << "(O)" << "(o)" << "(D)" << "(d)" << "(E)" << "(e)" << "(K)" << "(k)";
419  exclude << "(I)" << "(i)" << "(L)" << "(l)" << "(8)" << "(T)" << "(t)" << "(G)";
420  exclude << "(g)" << "(F)" << "(f)" << "(H)";
421  exclude << "8)" << "(N)" << "(n)" << "(Y)" << "(y)" << "(U)" << "(u)" << "(W)" << "(w)";
422  static QString cachedEmoticonsThemeName;
423  if ( cachedEmoticonsThemeName.isEmpty() ) {
424  cachedEmoticonsThemeName = KEmoticons::currentThemeName();
425  }
426  result =
427  sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons(
428  result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude );
429  }
430 
431  return result;
432 }
433 
434 QString LinkLocator::pngToDataUrl( const QString &iconPath )
435 {
436  if ( iconPath.isEmpty() ) {
437  return QString();
438  }
439 
440  QFile pngFile( iconPath );
441  if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) {
442  return QString();
443  }
444 
445  QByteArray ba = pngFile.readAll();
446  pngFile.close();
447  return QString::fromLatin1( "data:image/png;base64,%1" ).arg( ba.toBase64().constData() );
448 }
449 
450 QString LinkLocator::highlightedText()
451 {
452  // formating symbols must be prepended with a whitespace
453  if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) {
454  return QString();
455  }
456 
457  const QChar ch = mText[mPos];
458  if ( ch != '/' && ch != '*' && ch != '_' && ch != '-' ) {
459  return QString();
460  }
461 
462  QRegExp re =
463  QRegExp( QString( "\\%1((\\w+)([\\s-']\\w+)*( ?[,.:\\?!;])?)\\%2" ).arg( ch ).arg( ch ) );
464  re.setMinimal( true );
465  if ( re.indexIn( mText, mPos ) == mPos ) {
466  int length = re.matchedLength();
467  // there must be a whitespace after the closing formating symbol
468  if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) {
469  return QString();
470  }
471  mPos += length - 1;
472  switch ( ch.toLatin1() ) {
473  case '*':
474  return "<b>*" + re.cap( 1 ) + "*</b>";
475  case '_':
476  return "<u>_" + re.cap( 1 ) + "_</u>";
477  case '/':
478  return "<i>/" + re.cap( 1 ) + "/</i>";
479  case '-':
480  return "<strike>-" + re.cap( 1 ) + "-</strike>";
481  }
482  }
483  return QString();
484 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2013 The KDE developers.
Generated on Sat Jul 13 2013 01:26:05 by doxygen 1.8.3.1 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

kpimutils

Skip menu "kpimutils"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Modules

kdepimlibs-4.10.5 API Reference

Skip menu "kdepimlibs-4.10.5 API Reference"
  • akonadi
  •   contact
  •   kmime
  •   socialutils
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal