Bug 132472

Summary: java import : spaces in strings cause next member var to be ignored
Product: [Applications] umbrello Reporter: JP Fournier <jfournier121>
Component: generalAssignee: Umbrello Development Group <umbrello-devel>
Status: RESOLVED FIXED    
Severity: normal    
Priority: NOR    
Version: unspecified   
Target Milestone: ---   
Platform: Slackware   
OS: Linux   
Latest Commit: Version Fixed In:
Sentry Crash Report:
Attachments: patch that fixes this problem

Description JP Fournier 2006-08-16 02:29:38 UTC
Version:           1.5.4+ : svn 571261 (using KDE KDE 3.5.0)
Installed from:    Slackware Packages
Compiler:          gcc 
OS:                Linux

importing the class below results in the variable named string3 to be ignored.



public class Test12 {
	
	public String string2 = " a";
	public String string3 = "String3";
	public String string4 = "String4";

}
Comment 1 JP Fournier 2006-08-16 02:33:31 UTC
Created attachment 17386 [details]
patch that fixes this problem 

The attached patch fixes this issue, but I'd suspect there may be a better way.

diff svn/kdesdk/umbrello/umbrello/codeimport/nativeimportbase.cpp
kdesdk/umbrello/umbrello/codeimport/nativeimportbase.cpp > spaces.diff
Comment 2 Oliver Kellogg 2006-08-16 11:38:36 UTC
Hi JP, thanks for the patch - I'll apply as soon as I have svn connectivity.
Comment 3 Oliver Kellogg 2006-08-23 06:59:09 UTC
SVN commit 576104 by okellogg:

NativeImportBase::split(): New function to deal with string and char constants.
The fillSource() of other importers needs updating too (will be done shortly.)
BUG:132472


 M  +2 -2      ChangeLog  
 M  +6 -12     umbrello/codeimport/javaimport.cpp  
 M  +53 -6     umbrello/codeimport/nativeimportbase.cpp  
 M  +7 -0      umbrello/codeimport/nativeimportbase.h  


--- branches/KDE/3.5/kdesdk/umbrello/ChangeLog #576103:576104
@@ -8,8 +8,8 @@
 * Java import: unable to import AzareusCore (131961)
 * Java import: error on multidimensional arrays (132017)
 * Java import - array types not resolved correctly (132035)
-* Java import - "final" and comments in method declaration
-  not parsed correctly (132174)
+* Java import - "final" and comments in method declaration not parsed correctly (132174)
+* Java import: spaces in strings cause next member var to be ignored (132472)
 * Java import - static member vars ignored in interfaces (132657)
 
 Version 1.5.4
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/javaimport.cpp #576103:576104
@@ -63,23 +63,17 @@
 }
 
 void JavaImport::fillSource(QString word) {
+    if (word[0] == '"' || word[0] == '\'') {
+        // string constants are handled by NativeImportBase::split()
+        m_source.append(word);
+        return;
+    }
     QString lexeme;
     const uint len = word.length();
-    bool inString = false;
     for (uint i = 0; i < len; i++) {
         const QChar& c = word[i];
-        if (c == '"') {
+        if (c.isLetterOrNumber() || c == '_' || c == '.') {
             lexeme += c;
-            if (i == 0 || word[i - 1] != '\\') {
-                if (inString) {
-                    m_source.append(lexeme);
-                    lexeme = QString::null;
-                }
-                inString = !inString;
-            }
-        } else if (inString ||
-                   c.isLetterOrNumber() || c == '_' || c == '.') {
-            lexeme += c;
         } else {
             if (!lexeme.isEmpty()) {
                 m_source.append(lexeme);
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/nativeimportbase.cpp #576103:576104
@@ -188,6 +188,56 @@
     return false;  // The input was not completely consumed by preprocessing.
 }
 
+/// Split the line so that a string is returned as a single element of the list,
+/// when not in a string then split at white space.
+QStringList NativeImportBase::split(QString line) {
+    QStringList list;
+    QString listElement;
+    bool inString = false, inCharConst = false;
+    bool seenSpace = false;
+    line = line.stripWhiteSpace();
+    for (uint i = 0; i < line.length(); i++) {
+        const QChar& c = line[i];
+        if (c == '"') {
+            listElement += c;
+            if (i > 0 && line[i - 1] == '\\')
+                continue;
+            if (inString) {
+                list.append(listElement);
+                listElement = QString::null;
+            }
+            inString = !inString;
+            seenSpace = false;
+        } else if (c == '\'') {
+            listElement += c;
+            if (i > 0 && line[i - 1] == '\\')
+                continue;
+            if (inCharConst) {
+                list.append(listElement);
+                listElement = QString::null;
+            }
+            inCharConst = !inCharConst;
+            seenSpace = false;
+        } else if (inString || inCharConst) {
+            listElement += c;
+        } else if (c == ' ' || c == '\t') {
+            if (seenSpace)
+                continue;
+            seenSpace = true;
+            if (!listElement.isEmpty()) {
+                list.append(listElement);
+                listElement = QString::null;
+            }
+        } else {
+            listElement += c;
+            seenSpace = false;
+        }
+    }
+    if (!listElement.isEmpty())
+        list.append(listElement);
+    return list;
+}
+
 /// The lexer. Tokenizes the given string and fills `m_source'.
 /// Stores possible comments in `m_comment'.
 void NativeImportBase::scan(QString line) {
@@ -202,14 +252,11 @@
             return;
         line = line.left(pos);
     }
-    line = line.simplifyWhiteSpace();
-    if (line.isEmpty())
+    if (line.contains(QRegExp("^\\s*$")))
         return;
-    QStringList words = QStringList::split( QRegExp("\\s+"), line );
+    QStringList words = split(line);
     for (QStringList::Iterator it = words.begin(); it != words.end(); ++it) {
-        QString word = (*it).stripWhiteSpace();
-        if (word.isEmpty())
-            continue;
+        QString word = *it;
         fillSource(word);
     }
 }
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/nativeimportbase.h #576103:576104
@@ -111,6 +111,13 @@
     virtual bool preprocess(QString& line);
 
     /**
+     * Split the line so that a string is returned as a single element of the list.
+     * When not in a string then split at white space.
+     * The default implementation is suitable for C style strings and char constants.
+     */
+    virtual QStringList split(QString line);
+
+    /**
      * Analyze the given word and fill `m_source'.
      * A "word" is a whitespace delimited item from the input line.
      * To be provided by the specific importer class.
Comment 4 Oliver Kellogg 2006-08-23 23:09:14 UTC
SVN commit 576339 by okellogg:

Adapt native importers to the changed string processing logic which is moved
out of fillSource() to the new virtual method NativeImportBase::split().
CCBUG:132472


 M  +56 -10    adaimport.cpp  
 M  +10 -1     adaimport.h  
 M  +0 -5      javaimport.cpp  
 M  +15 -19    nativeimportbase.cpp  
 M  +1 -10     pascalimport.cpp  
 M  +1 -18     pythonimport.cpp  


--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/adaimport.cpp #576338:576339
@@ -37,22 +37,68 @@
     m_inGenericFormalPart = false;
 }
 
+/// Split the line so that a string is returned as a single element of the list,
+/// when not in a string then split at white space.
+QStringList AdaImport::split(QString line) {
+    QStringList list;
+    QString listElement;
+    bool inString = false;
+    bool seenSpace = false;
+    line = line.stripWhiteSpace();
+    uint len = line.length();
+    for (uint i = 0; i < len; i++) {
+        const QChar& c = line[i];
+        if (inString) {
+            listElement += c;
+            if (i > 0 && line[i - 1] == '"')
+                continue;   // escaped quotation mark
+            list.append(listElement);
+            listElement = QString::null;
+            inString = false;
+        } else if (c == '"') {
+            inString = true;
+            if (!listElement.isEmpty())
+                list.append(listElement);
+            listElement = QString(c);
+            seenSpace = false;
+        } else if (c == '\'') {
+            if (i < len - 2 && line[i + 2] == '\'') {
+                // character constant
+                if (!listElement.isEmpty())
+                    list.append(listElement);
+                listElement = line.mid(i, 3);
+                i += 2;
+                list.append(listElement);
+                listElement = QString::null;
+                continue;
+            }
+            listElement += c;
+            seenSpace = false;
+        } else if (c.isSpace()) {
+            if (seenSpace)
+                continue;
+            seenSpace = true;
+            if (!listElement.isEmpty()) {
+                list.append(listElement);
+                listElement = QString::null;
+            }
+        } else {
+            listElement += c;
+            seenSpace = false;
+        }
+    }
+    if (!listElement.isEmpty())
+        list.append(listElement);
+    return list;
+}
+
 void AdaImport::fillSource(QString word) {
     QString lexeme;
     const uint len = word.length();
-    bool inString = false;
     for (uint i = 0; i < len; i++) {
         QChar c = word[i];
-        if (c == '"') {
+        if (c.isLetterOrNumber() || c == '_' || c == '.' || c == '#') {
             lexeme += c;
-            if (inString) {
-                m_source.append(lexeme);
-                lexeme = QString::null;
-            }
-            inString = !inString;
-        } else if (inString ||
-                   c.isLetterOrNumber() || c == '_' || c == '.' || c == '#') {
-            lexeme += c;
         } else {
             if (!lexeme.isEmpty()) {
                 m_source.append(lexeme);
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/adaimport.h #576338:576339
@@ -5,7 +5,7 @@
  *   the Free Software Foundation; either version 2 of the License, or     *
  *   (at your option) any later version.                                   *
  *                                                                         *
- *  copyright (C) 2005                                                     *
+ *  copyright (C) 2005-2006                                                *
  *  Umbrello UML Modeller Authors <uml-devel@ uml.sf.net>                  *
  ***************************************************************************/
 
@@ -36,6 +36,15 @@
     bool parseStmt();
 
     /**
+     * Split the line so that a string is returned as a single element of the list.
+     * When not in a string then split at white space.
+     * Reimplementation of method from NativeImportBase is required because of
+     * Ada's tic which is liable to be confused with the beginning of a character
+     * constant.
+     */
+    QStringList split(QString line);
+
+    /**
      * Implement abstract operation from NativeImportBase.
      */
     void fillSource(QString word);
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/javaimport.cpp #576338:576339
@@ -63,11 +63,6 @@
 }
 
 void JavaImport::fillSource(QString word) {
-    if (word[0] == '"' || word[0] == '\'') {
-        // string constants are handled by NativeImportBase::split()
-        m_source.append(word);
-        return;
-    }
     QString lexeme;
     const uint len = word.length();
     for (uint i = 0; i < len; i++) {
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/nativeimportbase.cpp #576338:576339
@@ -193,33 +193,26 @@
 QStringList NativeImportBase::split(QString line) {
     QStringList list;
     QString listElement;
-    bool inString = false, inCharConst = false;
+    QChar stringIntro = 0;  // buffers the string introducer character
     bool seenSpace = false;
     line = line.stripWhiteSpace();
     for (uint i = 0; i < line.length(); i++) {
         const QChar& c = line[i];
-        if (c == '"') {
+        if (stringIntro) {        // we are in a string
             listElement += c;
-            if (i > 0 && line[i - 1] == '\\')
-                continue;
-            if (inString) {
-                list.append(listElement);
-                listElement = QString::null;
+            if (c == stringIntro) {
+                if (line[i - 1] != '\\') {
+                    list.append(listElement);
+                    listElement = QString::null;
+                    stringIntro = 0;  // we are no longer in a string
+                }
             }
-            inString = !inString;
-            seenSpace = false;
-        } else if (c == '\'') {
-            listElement += c;
-            if (i > 0 && line[i - 1] == '\\')
-                continue;
-            if (inCharConst) {
+        } else if (c == '"' || c == '\'') {
+            if (!listElement.isEmpty()) {
                 list.append(listElement);
-                listElement = QString::null;
             }
-            inCharConst = !inCharConst;
+            listElement = stringIntro = c;
             seenSpace = false;
-        } else if (inString || inCharConst) {
-            listElement += c;
         } else if (c == ' ' || c == '\t') {
             if (seenSpace)
                 continue;
@@ -257,7 +250,10 @@
     QStringList words = split(line);
     for (QStringList::Iterator it = words.begin(); it != words.end(); ++it) {
         QString word = *it;
-        fillSource(word);
+        if (word[0] == '"' || word[0] == '\'')
+            m_source.append(word);  // string constants are handled by split()
+        else
+            fillSource(word);
     }
 }
 
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/pascalimport.cpp #576338:576339
@@ -44,19 +44,10 @@
 void PascalImport::fillSource(QString word) {
     QString lexeme;
     const uint len = word.length();
-    bool inString = false;
     for (uint i = 0; i < len; i++) {
         QChar c = word[i];
-        if (c == '"') {
+        if (c.isLetterOrNumber() || c == '_' || c == '.' || c == '#') {
             lexeme += c;
-            if (inString) {
-                m_source.append(lexeme);
-                lexeme = QString::null;
-            }
-            inString = !inString;
-        } else if (inString ||
-                   c.isLetterOrNumber() || c == '_' || c == '.' || c == '#') {
-            lexeme += c;
         } else {
             if (!lexeme.isEmpty()) {
                 m_source.append(lexeme);
--- branches/KDE/3.5/kdesdk/umbrello/umbrello/codeimport/pythonimport.cpp #576338:576339
@@ -93,27 +93,10 @@
 void PythonImport::fillSource(QString word) {
     QString lexeme;
     const uint len = word.length();
-    QChar stringIntro = 0;  // buffers the string introducer character
     for (uint i = 0; i < len; i++) {
         const QChar& c = word[i];
-        if (stringIntro) {        // we are in a string
+        if (c.isLetterOrNumber() || c == '_' || c == '.') {
             lexeme += c;
-            if (c == stringIntro) {
-                if (word[i - 1] != '\\') {
-                    m_source.append(lexeme);
-                    m_srcIndex++;
-                    lexeme = QString::null;
-                }
-                stringIntro = 0;  // we are no longer in a string
-            }
-        } else if (c == '"' || c == '\'') {
-            if (!lexeme.isEmpty()) {
-                m_source.append(lexeme);
-                m_srcIndex++;
-            }
-            lexeme = stringIntro = c;
-        } else if (c.isLetterOrNumber() || c == '_' || c == '.') {
-            lexeme += c;
         } else {
             if (!lexeme.isEmpty()) {
                 m_source.append(lexeme);