Version: 3.3 (using KDE 3.3.1, (3.1)) Compiler: gcc version 3.3.5 (Debian 1:3.3.5-2) OS: Linux (i686) release 2.4.22 When I move mouse pointer over html-file and popup window show informaton about file, encoding of html-header is wrong (tried on CP1251 and koi8-r).
Created attachment 9872 [details] Header of html-file in popup window have wrong encoding
Eeek. It just handles everything as latin1. How broken
SVN commit 607813 by mkoller: BUG: 100126 Handle charset definition to correctly display the HTML page title M +26 -4 kfile_html.cpp --- branches/KDE/3.5/kdeaddons/kfile-plugins/html/kfile_html.cpp #607812:607813 @@ -28,6 +28,7 @@ #include <qcstring.h> #include <qfile.h> #include <qregexp.h> +#include <qtextcodec.h> typedef KGenericFactory<KHtmlPlugin> HtmlFactory; @@ -73,7 +74,7 @@ f.readBlock(data.data(), maxBufSize); data[maxBufSize]='\0'; - QString s(data.data()); + QString s(data); int start=0, last=0; QRegExp exp; @@ -90,10 +91,11 @@ last += exp.matchedLength(); } + QString title; exp.setPattern("<\\s*title\\s*>\\s*(.*)\\s*<\\s*/\\s*title\\s*>"); if (exp.search(s, last) != -1) { - appendItem(group, "Title", exp.cap(1)); + title = exp.cap(1); last += exp.matchedLength(); } @@ -101,8 +103,10 @@ QString meta, name, content; exp.setPattern("<\\s*meta\\s*([^>]*)\\s*>"); - QRegExp rxName = QRegExp("(?:name|http-equiv)\\s*=\\s*\"([^\"]+)\""); - QRegExp rxContent = QRegExp("content\\s*=\\s*\"([^\"]+)\""); + QRegExp rxName("(?:name|http-equiv)\\s*=\\s*\"([^\"]+)\"", false); + QRegExp rxContent("content\\s*=\\s*\"([^\"]+)\"", false); + QRegExp rxCharset("charset\\s*=\\s*(.*)", false); + QTextCodec *codec = 0; // find the meta tags last = 0; @@ -124,8 +128,26 @@ content = rxContent.cap(1); appendItem(metatags, name, content.left(50)); + + // check if it has a charset defined + if ( rxCharset.search(content) != -1 ) + { + kdDebug(7034) << "CodecForName : " << rxCharset.cap(1) << endl; + codec = QTextCodec::codecForName(rxCharset.cap(1).ascii()); + } } + if ( ! title.isEmpty() ) + { + if ( codec ) + { + title = codec->toUnicode(title.ascii()); + kdDebug(7034) << "Codec : " << codec->name() << endl; + } + + appendItem(group, "Title", title); + } + // find out if it contains javascript exp.setPattern("<script>");
*** Bug 102710 has been marked as a duplicate of this bug. ***