Version: (using KDE KDE 3.5.2) Installed from: Ubuntu Packages When dragging a URL of an HTML page to aKregator, it won't always find the RSS feed. I've found this bug while trying to add both http://softiesonrails.com/ and http://podcast.rubyonrails.org/. The link tags look fine, I don't see any reason why akregator would fail to find the feed.
Confirmed. Reason for this is the regexp which only matches when attributes "rel" and "href" appear in this order (src/librss/loader.cpp).
SVN commit 575039 by osterfeld: use the more robust FeedDetector class from the konq plugin instead of the old and unmaintained "feed discovery" code in loader.cpp BUG: 126128 M +1 -1 Makefile.am A feeddetector.cpp [License: GPL (v2+) (+Qt exception)] A feeddetector.h [License: GPL (v2+) (+Qt exception)] M +25 -63 loader.cpp --- branches/KDE/3.5/kdepim/akregator/src/librss/Makefile.am #575038:575039 @@ -9,7 +9,7 @@ loader.h librss.h enclosure.h librsslocal_la_SOURCES = article.cpp document.cpp image.cpp textinput.cpp \ - tools_p.cpp loader.cpp enclosure.cpp category.cpp + tools_p.cpp loader.cpp enclosure.cpp category.cpp feeddetector.cpp librsslocal_la_METASOURCES = AUTO --- branches/KDE/3.5/kdepim/akregator/src/librss/loader.cpp #575038:575039 @@ -10,6 +10,7 @@ */ #include "loader.h" #include "document.h" +#include "feeddetector.h" #include <kio/job.h> #include <kprocess.h> @@ -377,74 +378,35 @@ void Loader::discoverFeeds(const QByteArray &data) { QString str = QString(data).simplifyWhiteSpace(); - QString s2; - //QTextStream ts( &str, IO_WriteOnly ); - //ts << data.data(); - - // "<[\\s]link[^>]*rel[\\s]=[\\s]\\\"[\\s]alternate[\\s]\\\"[^>]*>" - // "type[\\s]=[\\s]\\\"application/rss+xml\\\"" - // "href[\\s]=[\\s]\\\"application/rss+xml\\\"" - QRegExp rx( "(?:REL)[^=]*=[^sAa]*(?:service.feed|ALTERNATE)[\\s]*[^s][^s](?:[^>]*)(?:HREF)[^=]*=[^A-Z0-9-_~,./$]*([^'\">\\s]*)", false); - if (rx.search(str)!=-1) - s2=rx.cap(1); - else{ - // does not support Atom/RSS autodiscovery.. try finding feeds by brute force.... - int pos=0; - QStringList feeds; - QString host=d->url.host(); - rx.setPattern("(?:<A )[^H]*(?:HREF)[^=]*=[^A-Z0-9-_~,./]*([^'\">\\s]*)"); - while ( pos >= 0 ) { - pos = rx.search( str, pos ); - s2=rx.cap(1); - if (s2.endsWith(".rdf") || s2.endsWith(".rss") || s2.endsWith(".xml")) - feeds.append(s2); - if ( pos >= 0 ) { - pos += rx.matchedLength(); - } - } - - s2=feeds.first(); - KURL testURL; - // loop through, prefer feeds on same host - QStringList::Iterator end( feeds.end() ); - for ( QStringList::Iterator it = feeds.begin(); it != end; ++it ) { - testURL=*it; - if (testURL.host()==host) - { - s2=*it; - break; - } - } - } - - if (s2.isNull()) { - //kdDebug() << "No feed found for a site" << endl; - return; - } - - if (KURL::isRelativeURL(s2)) + + QStringList feeds; + + FeedDetectorEntryList list = FeedDetector::extractFromLinkTags(str); + + for (FeedDetectorEntryList::ConstIterator it = list.begin(); it != list.end(); ++it) { - if (s2.startsWith("//")) + feeds += (*it).url(); + } + + if (list.isEmpty()) + feeds = FeedDetector::extractBruteForce(str); + + QString feed = feeds.first(); + QString host = d->url.host(); + KURL testURL; + // loop through, prefer feeds on same host + QStringList::Iterator end( feeds.end() ); + for ( QStringList::Iterator it = feeds.begin(); it != end; ++it) + { + testURL=*it; + if (testURL.host() == host) { - s2=s2.prepend(d->url.protocol()+":"); - d->discoveredFeedURL=s2; + feed = *it; + break; } - else if (s2.startsWith("/")) - { - d->discoveredFeedURL=d->url; - d->discoveredFeedURL.setPath(s2); - } - else - { - d->discoveredFeedURL=d->url; - d->discoveredFeedURL.addPath(s2); - } - d->discoveredFeedURL.cleanPath(); } - else - d->discoveredFeedURL=s2; - d->discoveredFeedURL.cleanPath(); + d->discoveredFeedURL = feed.isNull() ? QString() : FeedDetector::fixRelativeURL(feed, d->url); } #include "loader.moc"