Bug 122047

Summary: ∉ is displayed as ¬in;
Product: [Applications] konqueror Reporter: Marijn Schouten <hkBst>
Component: khtml parsingAssignee: Konqueror Developers <konq-bugs>
Status: RESOLVED FIXED    
Severity: normal    
Priority: NOR    
Version: unspecified   
Target Milestone: ---   
Platform: Gentoo Packages   
OS: Linux   
Latest Commit: Version Fixed In:
Attachments: test case

Description Marijn Schouten 2006-02-15 18:00:14 UTC
Version:           3.5.1 (using KDE KDE 3.5.1)
Installed from:    Gentoo Packages
Compiler:          gcc-3.4.5-vanilla 
OS:                Linux

The &notin; html entity is displayed as &not;in;
Comment 1 Allan Sandfeld 2006-10-31 18:03:21 UTC
Created attachment 18337 [details]
test case

test case.

The problem is we support a IE quirk where we parse the beginning of a entity
Comment 2 Allan Sandfeld 2006-11-01 19:09:11 UTC
SVN commit 601019 by carewolf:

Dont let a IE quirk get in the way of parsing longer entities.
BUG: 122047


 M  +19 -7     htmltokenizer.cpp  
 M  +3 -2      htmltokenizer.h  


--- branches/KDE/3.5/kdelibs/khtml/html/htmltokenizer.cpp #601018:601019
@@ -395,7 +395,7 @@
         CachedScript* cs = 0;
 
         // forget what we just got, load from src url instead
-        if ( !currentScriptSrc.isEmpty() && javascript && 
+        if ( !currentScriptSrc.isEmpty() && javascript &&
              (cs = parser->doc()->docLoader()->requestScript(currentScriptSrc, scriptSrcCharset) )) {
             cachedScript.enqueue(cs);
         }
@@ -601,6 +601,7 @@
     if( start )
     {
         cBufferPos = 0;
+        entityLen = 0;
         Entity = SearchEntity;
     }
 
@@ -696,8 +697,8 @@
                 if ( tag == NoTag ) {
                     const entity* e = kde_findEntity(cBuffer, cBufferPos);
                     if ( e && e->code < 256 ) {
-                        Entity = SearchSemicolon;
-                        break;
+                        EntityChar = e->code;
+                        entityLen = cBufferPos;
                     }
                 }
             }
@@ -705,8 +706,10 @@
             if(Entity == SearchSemicolon) {
                 if(cBufferPos > 1) {
                     const entity *e = kde_findEntity(cBuffer, cBufferPos);
-                    if(e && ( e->code < 256 || *src == ';' ))
+                    if(e && ( e->code < 256 || *src == ';' )) {
                         EntityChar = e->code;
+                        entityLen = cBufferPos;
+                    }
                 }
             }
             break;
@@ -723,7 +726,17 @@
             if ( !EntityChar.isNull() ) {
                 checkBuffer();
                 // Just insert it
-                src.push( EntityChar );
+                *dest++ = EntityChar;
+                if (entityLen > 0 && entityLen < cBufferPos) {
+                    int rem = cBufferPos - entityLen;
+                    for(int i = 0; i < rem; i++)
+                        dest[i] = cBuffer[i+entityLen];
+                    dest += rem;
+                    if (pre)
+                        prePos += rem;
+                }
+                if (pre)
+                    prePos++;
             } else {
 #ifdef TOKEN_DEBUG
                 kdDebug( 6036 ) << "unknown entity!" << endl;
@@ -734,7 +747,6 @@
                 for(unsigned int i = 0; i < cBufferPos; i++)
                     dest[i] = cBuffer[i];
                 dest += cBufferPos;
-                Entity = NoEntity;
                 if (pre)
                     prePos += cBufferPos+1;
             }
@@ -1154,7 +1166,7 @@
                         type.compare("text/livescript") != 0 &&
 			type.compare("application/x-javascript") != 0 &&
 			type.compare("application/x-ecmascript") != 0 &&
-			type.compare("application/javascript") != 0 && 
+			type.compare("application/javascript") != 0 &&
 			type.compare("application/ecmascript") != 0 )
                         javascript = false;
                 } else if( a ) {
--- branches/KDE/3.5/kdelibs/khtml/html/htmltokenizer.h #601018:601019
@@ -133,10 +133,10 @@
     void timerEvent( QTimerEvent *e );
     virtual void setOnHold(bool _onHold);
     void abort() { m_abort = true; }
-    virtual void setAutoClose(bool b=true);    
+    virtual void setAutoClose(bool b=true);
     virtual bool isWaitingForScripts() const;
     virtual bool isExecutingScript() const;
-      
+
 protected:
     void reset();
     void addPending();
@@ -342,6 +342,7 @@
 #define CBUFLEN 1024
     char cBuffer[CBUFLEN+2];
     unsigned int cBufferPos;
+    unsigned int entityLen;
 
     khtml::TokenizerString src;