<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<!DOCTYPE bugzilla SYSTEM "https://bugs.kde.org/page.cgi?id=bugzilla.dtd">

<bugzilla version="5.0.6"
          urlbase="https://bugs.kde.org/"
          
          maintainer="sysadmin@kde.org"
>

    <bug>
          <bug_id>122047</bug_id>
          
          <creation_ts>2006-02-15 18:00:14 +0000</creation_ts>
          <short_desc>&amp;notin; is displayed as &amp;not;in;</short_desc>
          <delta_ts>2006-11-01 19:09:12 +0000</delta_ts>
          <reporter_accessible>1</reporter_accessible>
          <cclist_accessible>1</cclist_accessible>
          <classification_id>2</classification_id>
          <classification>Applications</classification>
          <product>konqueror</product>
          <component>khtml parsing</component>
          <version>unspecified</version>
          <rep_platform>Gentoo Packages</rep_platform>
          <op_sys>Linux</op_sys>
          <bug_status>RESOLVED</bug_status>
          <resolution>FIXED</resolution>
          
          
          <bug_file_loc></bug_file_loc>
          <status_whiteboard></status_whiteboard>
          <keywords></keywords>
          <priority>NOR</priority>
          <bug_severity>normal</bug_severity>
          <target_milestone>---</target_milestone>
          
          
          <everconfirmed>1</everconfirmed>
          <reporter name="Marijn Schouten">hkBst</reporter>
          <assigned_to name="Konqueror Bugs">konqueror-bugs-null</assigned_to>
          
          
          <cf_commitlink></cf_commitlink>
          <cf_versionfixedin></cf_versionfixedin>
          <cf_sentryurl></cf_sentryurl>
          <votes>0</votes>

      

      

      

          <comment_sort_order>oldest_to_newest</comment_sort_order>  
          <long_desc isprivate="0" >
    <commentid>414454</commentid>
    <comment_count>0</comment_count>
    <who name="Marijn Schouten">hkBst</who>
    <bug_when>2006-02-15 18:00:14 +0000</bug_when>
    <thetext>Version:           3.5.1 (using KDE KDE 3.5.1)
Installed from:    Gentoo Packages
Compiler:          gcc-3.4.5-vanilla 
OS:                Linux

The &amp;notin; html entity is displayed as &amp;not;in;</thetext>
  </long_desc><long_desc isprivate="0" >
    <commentid>481794</commentid>
    <comment_count>1</comment_count>
      <attachid>18337</attachid>
    <who name="Allan Sandfeld">kde</who>
    <bug_when>2006-10-31 18:03:21 +0000</bug_when>
    <thetext>Created attachment 18337
test case

test case.

The problem is we support a IE quirk where we parse the beginning of a entity</thetext>
  </long_desc><long_desc isprivate="0" >
    <commentid>482168</commentid>
    <comment_count>2</comment_count>
    <who name="Allan Sandfeld">kde</who>
    <bug_when>2006-11-01 19:09:11 +0000</bug_when>
    <thetext>SVN commit 601019 by carewolf:

Dont let a IE quirk get in the way of parsing longer entities.
BUG: 122047


 M  +19 -7     htmltokenizer.cpp  
 M  +3 -2      htmltokenizer.h  


--- branches/KDE/3.5/kdelibs/khtml/html/htmltokenizer.cpp #601018:601019
@@ -395,7 +395,7 @@
         CachedScript* cs = 0;
 
         // forget what we just got, load from src url instead
-        if ( !currentScriptSrc.isEmpty() &amp;&amp; javascript &amp;&amp; 
+        if ( !currentScriptSrc.isEmpty() &amp;&amp; javascript &amp;&amp;
              (cs = parser-&gt;doc()-&gt;docLoader()-&gt;requestScript(currentScriptSrc, scriptSrcCharset) )) {
             cachedScript.enqueue(cs);
         }
@@ -601,6 +601,7 @@
     if( start )
     {
         cBufferPos = 0;
+        entityLen = 0;
         Entity = SearchEntity;
     }
 
@@ -696,8 +697,8 @@
                 if ( tag == NoTag ) {
                     const entity* e = kde_findEntity(cBuffer, cBufferPos);
                     if ( e &amp;&amp; e-&gt;code &lt; 256 ) {
-                        Entity = SearchSemicolon;
-                        break;
+                        EntityChar = e-&gt;code;
+                        entityLen = cBufferPos;
                     }
                 }
             }
@@ -705,8 +706,10 @@
             if(Entity == SearchSemicolon) {
                 if(cBufferPos &gt; 1) {
                     const entity *e = kde_findEntity(cBuffer, cBufferPos);
-                    if(e &amp;&amp; ( e-&gt;code &lt; 256 || *src == &apos;;&apos; ))
+                    if(e &amp;&amp; ( e-&gt;code &lt; 256 || *src == &apos;;&apos; )) {
                         EntityChar = e-&gt;code;
+                        entityLen = cBufferPos;
+                    }
                 }
             }
             break;
@@ -723,7 +726,17 @@
             if ( !EntityChar.isNull() ) {
                 checkBuffer();
                 // Just insert it
-                src.push( EntityChar );
+                *dest++ = EntityChar;
+                if (entityLen &gt; 0 &amp;&amp; entityLen &lt; cBufferPos) {
+                    int rem = cBufferPos - entityLen;
+                    for(int i = 0; i &lt; rem; i++)
+                        dest[i] = cBuffer[i+entityLen];
+                    dest += rem;
+                    if (pre)
+                        prePos += rem;
+                }
+                if (pre)
+                    prePos++;
             } else {
 #ifdef TOKEN_DEBUG
                 kdDebug( 6036 ) &lt;&lt; &quot;unknown entity!&quot; &lt;&lt; endl;
@@ -734,7 +747,6 @@
                 for(unsigned int i = 0; i &lt; cBufferPos; i++)
                     dest[i] = cBuffer[i];
                 dest += cBufferPos;
-                Entity = NoEntity;
                 if (pre)
                     prePos += cBufferPos+1;
             }
@@ -1154,7 +1166,7 @@
                         type.compare(&quot;text/livescript&quot;) != 0 &amp;&amp;
 			type.compare(&quot;application/x-javascript&quot;) != 0 &amp;&amp;
 			type.compare(&quot;application/x-ecmascript&quot;) != 0 &amp;&amp;
-			type.compare(&quot;application/javascript&quot;) != 0 &amp;&amp; 
+			type.compare(&quot;application/javascript&quot;) != 0 &amp;&amp;
 			type.compare(&quot;application/ecmascript&quot;) != 0 )
                         javascript = false;
                 } else if( a ) {
--- branches/KDE/3.5/kdelibs/khtml/html/htmltokenizer.h #601018:601019
@@ -133,10 +133,10 @@
     void timerEvent( QTimerEvent *e );
     virtual void setOnHold(bool _onHold);
     void abort() { m_abort = true; }
-    virtual void setAutoClose(bool b=true);    
+    virtual void setAutoClose(bool b=true);
     virtual bool isWaitingForScripts() const;
     virtual bool isExecutingScript() const;
-      
+
 protected:
     void reset();
     void addPending();
@@ -342,6 +342,7 @@
 #define CBUFLEN 1024
     char cBuffer[CBUFLEN+2];
     unsigned int cBufferPos;
+    unsigned int entityLen;
 
     khtml::TokenizerString src;
 
</thetext>
  </long_desc>
      
          <attachment
              isobsolete="0"
              ispatch="0"
              isprivate="0"
          >
            <attachid>18337</attachid>
            <date>2006-10-31 18:03:21 +0000</date>
            <delta_ts>2006-10-31 18:03:21 +0000</delta_ts>
            <desc>test case</desc>
            <filename>122047.html</filename>
            <type>text/html</type>
            <size>530</size>
            <attacher name="Allan Sandfeld">kde</attacher>
            
              <data encoding="base64">PGh0bWw+CjxzdHlsZT4KCXRhYmxlIHsgZm9udC1mYW1pbHk6IGZpeGVkfQo8L3N0eWxlPgo8Ym9k
eT4KVGVzdCBvZiBIVE1MIGVudGl0aWVzIGluIHF1aXJreSBtb2RlOgo8dGFibGUgd2lkdGg9MjAw
Pgo8dHI+PHRkPgogICAgJmFtcDthbXA7CTx0ZD4mYW1wOwo8dHI+PHRkPgogICAgJmFtcDthbXAJ
PHRkPiZhbXAKPHRyPjx0ZD4KICAgICZhbXA7YW1wbGUJPHRkPiZhbXBsZQo8dHI+PHRkPgogICAg
JmFtcDtub3Q7CTx0ZD4mbm90Owo8dHI+PHRkPgogICAgJmFtcDtub3QJPHRkPiZub3QKPHRyPjx0
ZD4KICAgICZhbXA7bm90YXQJPHRkPiZub3RhdAo8dHI+PHRkPgogICAgJmFtcDtub3RpbjsJPHRk
PiZub3RpbjsKPHRyPjx0ZD4KICAgICZhbXA7bm90aW4JPHRkPiZub3Rpbgo8dHI+PHRkPgogICAg
JmFtcDtub3RpbmEJPHRkPiZub3RpbmEKPHRyPjx0ZD4KICAgICZhbXA7Z2U7CTx0ZD4mZ2U7Cjx0
cj48dGQ+CiAgICAmYW1wO2dlCTx0ZD4mZ2UKPHRyPjx0ZD4KICAgICZhbXA7Z2VsCTx0ZD4mZ2Vs
CjwvdGFibGU+CjwvYm9keT4=
</data>

          </attachment>
      

    </bug>

</bugzilla>