00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "loader.h"
00012 #include "dataretriever.h"
00013 #include "documentsource.h"
00014 #include "feed.h"
00015 #include "global.h"
00016 #include "parsercollection.h"
00017
00018 #include <kio/global.h>
00019 #include <kurl.h>
00020
00021 #include <QtCore/QBuffer>
00022 #include <QtCore/QRegExp>
00023 #include <QtCore/QStringList>
00024
00025 #include <boost/shared_ptr.hpp>
00026
00027
00028 #include <iostream>
00029
00030 namespace Syndication {
00031
00032 struct Loader::LoaderPrivate
00033 {
00034 LoaderPrivate() : retriever(0), lastError(Success),
00035 retrieverError(0)
00036 {
00037 }
00038
00039 ~LoaderPrivate()
00040 {
00041 delete retriever;
00042 }
00043
00044 DataRetriever* retriever;
00045 Syndication::ErrorCode lastError;
00046 int retrieverError;
00047 KUrl discoveredFeedURL;
00048 KUrl url;
00049 };
00050
00051 Loader* Loader::create()
00052 {
00053 return new Loader;
00054 }
00055
00056 Loader *Loader::create(QObject* object, const char* slot)
00057 {
00058 Loader *loader = create();
00059 connect(loader, SIGNAL(loadingComplete(Syndication::Loader*,
00060 Syndication::FeedPtr, Syndication::ErrorCode)),
00061 object, slot);
00062 return loader;
00063 }
00064
00065 Loader::Loader() : d(new LoaderPrivate)
00066 {
00067 }
00068
00069 Loader::~Loader()
00070 {
00071 delete d;
00072 }
00073
00074 void Loader::loadFrom(const KUrl& url)
00075 {
00076 loadFrom(url, new FileRetriever);
00077 }
00078
00079 void Loader::loadFrom(const KUrl &url, DataRetriever *retriever)
00080 {
00081 if (d->retriever != 0L)
00082 return;
00083
00084 d->url = url;
00085 d->retriever = retriever;
00086
00087 connect(d->retriever, SIGNAL(dataRetrieved(const QByteArray&, bool)),
00088 this, SLOT(slotRetrieverDone(const QByteArray&, bool)));
00089
00090 d->retriever->retrieveData(url);
00091 }
00092
00093 int Loader::retrieverError() const
00094 {
00095 return d->retrieverError;
00096 }
00097
00098 Syndication::ErrorCode Loader::errorCode() const
00099 {
00100 return d->lastError;
00101 }
00102
00103 void Loader::abort()
00104 {
00105 if (d && d->retriever)
00106 {
00107 d->retriever->abort();
00108 delete d->retriever;
00109 d->retriever = 0L;
00110 }
00111
00112 emit loadingComplete(this, FeedPtr(), Aborted);
00113 delete this;
00114 }
00115
00116 KUrl Loader::discoveredFeedURL() const
00117 {
00118 return d->discoveredFeedURL;
00119 }
00120
00121 void Loader::slotRetrieverDone(const QByteArray& data, bool success)
00122 {
00123 d->retrieverError = d->retriever->errorCode();
00124 ErrorCode status = Success;
00125 FeedPtr feed;
00126 bool isFileRetriever = dynamic_cast<FileRetriever*>(d->retriever) != 0;
00127 delete d->retriever;
00128 d->retriever = 0;
00129
00130 if (success)
00131 {
00132 DocumentSource src(data, d->url.url());
00133 feed = parserCollection()->parse(src);
00134
00135 if (parserCollection()->lastError() != Syndication::Success)
00136 {
00137 status = parserCollection()->lastError();
00138 discoverFeeds(data);
00139 }
00140 }
00141 else
00142 {
00143 if (isFileRetriever)
00144 {
00145
00146
00147 status = FileNotFound;
00148 std::cout << "file retriever error: " << d->retrieverError << std::endl;
00149 }
00150 else
00151 {
00152
00153 status = OtherRetrieverError;
00154 }
00155 }
00156
00157 emit loadingComplete(this, feed, status);
00158
00159 delete this;
00160 }
00161
00162 void Loader::discoverFeeds(const QByteArray &data)
00163 {
00164 QString str = QString(data).simplified();
00165 QString s2;
00166
00167
00168
00169
00170
00171
00172 QRegExp rx( "(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)", Qt::CaseInsensitive );
00173 if (rx.indexIn(str)!=-1)
00174 s2=rx.cap(1);
00175 else{
00176
00177 int pos=0;
00178 QStringList feeds;
00179 QString host=d->url.host();
00180 rx.setPattern("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)");
00181 while ( pos >= 0 ) {
00182 pos = rx.indexIn( str, pos );
00183 s2=rx.cap(1);
00184 if (s2.endsWith(QLatin1String(".rdf")) ||
00185 s2.endsWith(QLatin1String(".rss")) ||
00186 s2.endsWith(QLatin1String(".xml")))
00187 feeds.append(s2);
00188 if ( pos >= 0 ) {
00189 pos += rx.matchedLength();
00190 }
00191 }
00192
00193 KUrl testURL;
00194
00195 QStringList::const_iterator end( feeds.constEnd() );
00196 for ( QStringList::const_iterator it = feeds.constBegin(); it != end; ++it ) {
00197 testURL=*it;
00198 if (testURL.host()==host)
00199 {
00200 s2=*it;
00201 break;
00202 }
00203 }
00204 }
00205
00206 if (s2.isNull())
00207 {
00208 return;
00209 }
00210
00211 if (KUrl::isRelativeUrl(s2))
00212 {
00213 if (s2.startsWith(QLatin1String("//")))
00214 {
00215 s2=s2.prepend(d->url.protocol()+':');
00216 d->discoveredFeedURL=s2;
00217 }
00218 else if (s2.startsWith('/'))
00219 {
00220 d->discoveredFeedURL=d->url;
00221 d->discoveredFeedURL.setPath(s2);
00222 }
00223 else
00224 {
00225 d->discoveredFeedURL=d->url;
00226 d->discoveredFeedURL.addPath(s2);
00227 }
00228 d->discoveredFeedURL.cleanPath();
00229 }
00230 else
00231 d->discoveredFeedURL=s2;
00232
00233 d->discoveredFeedURL.cleanPath();
00234 }
00235
00236 }
00237
00238 #include "loader.moc"