Version: 1.2 (using KDE KDE 3.5.0) Installed from: Gentoo Packages Compiler: 3.3.6 (Gentoo 3.3.6, ssp-3.3.6-1.0, pie-8.7.8) OS: Linux An example feed that shows the behaviour: http://mostlyharmful.blogdns.com/atom/ If an article is loaded from this atom feed, html tags are visible. The feed is definitely correct; Akregator has the wrong behaviour as it displays the html markup as text. eg. "<p>" rather than either ignoring it, or parsing the tag. Atom validator results: http://feedvalidator.org/check.cgi?url=http%3A%2F%2Fmostlyharmful.blogdns.com%2Fatom%2F In section 4.1.3.3 of the Atom spec (http://www.ietf.org/rfc/rfc4287.txt), it states that all html markup within <content type="html"> should be escaped and that the markup may be used by Atom processors.
SVN commit 498704 by osterfeld: fix atom:content parsing: Don't show tags when for Atom 1.0 feeds with escaped HTML in it BUG: 112491, 117938 M +36 -15 tools_p.cpp --- branches/KDE/3.5/kdepim/akregator/src/librss/tools_p.cpp #498703:498704 @@ -47,21 +47,42 @@ QDomElement e = node.toElement(); QString result; - if (elemName == "content" && ((e.hasAttribute("mode") && e.attribute("mode") == "xml") || !e.hasAttribute("mode"))) - result = childNodesAsXML(node); - else - result = e.text(); - - bool hasPre = result.contains("<pre>",false); - bool hasHtml = hasPre || result.contains("<"); // FIXME: test if we have html, should be more clever -> regexp - if(!isInlined && !hasHtml) // perform nl2br if not a inline elt and it has no html elts - result = result = result.replace(QChar('\n'), "<br />"); - if(!hasPre) // strip white spaces if no <pre> - result = result.simplifyWhiteSpace(); - - if (result.isEmpty()) - return QString::null; - + bool doHTMLCheck = true; + + if (elemName == "content") // we have Atom here + { + doHTMLCheck = false; + // the first line is always the Atom 0.3, the second Atom 1.0 + if (( e.hasAttribute("mode") && e.attribute("mode") == "escaped" && e.attribute("type") == "text/html" ) + || (!e.hasAttribute("mode") && e.attribute("type") == "html")) + { + result = KCharsets::resolveEntities(e.text().simplifyWhiteSpace()); // escaped html + } + else if (( e.hasAttribute("mode") && e.attribute("mode") == "escaped" && e.attribute("type") == "text/plain" ) + || (!e.hasAttribute("mode") && e.attribute("type") == "text")) + { + result = e.text().stripWhiteSpace(); // plain text + } + else if (( e.hasAttribute("mode") && e.attribute("mode") == "xml" ) + || (!e.hasAttribute("mode") && e.attribute("type") == "xhtml")) + { + result = childNodesAsXML(e); // embedded XHMTL + } + + } + + if (doHTMLCheck) // check for HTML; not necessary for Atom:content + { + bool hasPre = result.contains("<pre>",false); + bool hasHtml = hasPre || result.contains("<"); // FIXME: test if we have html, should be more clever -> regexp + if(!isInlined && !hasHtml) // perform nl2br if not a inline elt and it has no html elts + result = result = result.replace(QChar('\n'), "<br />"); + if(!hasPre) // strip white spaces if no <pre> + result = result.simplifyWhiteSpace(); + + if (result.isEmpty()) + return QString::null; + } return result; }