Bug 105764 - Man2html does not handle the mdoc \& escape
Summary: Man2html does not handle the mdoc \& escape
Status: RESOLVED FIXED
Alias: None
Product: kio
Classification: Frameworks and Libraries
Component: man (show other bugs)
Version: unspecified
Platform: Compiled Sources Linux
: NOR normal
Target Milestone: ---
Assignee: Stephan Kulow
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2005-05-16 15:20 UTC by Nicolas Goutte
Modified: 2005-05-19 11:47 UTC (History)
0 users

See Also:
Latest Commit:
Version Fixed In:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Nicolas Goutte 2005-05-16 15:20:36 UTC
Version:            (using KDE Devel)
Installed from:    Compiled sources
Compiler:          gcc 3.3.1 -march=pentium2
OS:                Linux

Im mdoc, the sequence \& tells that the following characters are not to be recognized as sequence. However man2html does not ignore the sequence.

This breaks mdoc.sample(7)
Comment 1 Nicolas Goutte 2005-05-19 09:57:51 UTC
SVN commit 415646 by goutte:

- a few special characters more
- try to improve code for handling \&
  (However it is more a hack, as I have tried to change the signification
  of \& . I would say that the problem is not the processing of \&
  but the recognition of sequences, especially in mdoc pages.)
CCBUG:105764


 M  +27 -9     trunk/KDE/kdebase/kioslave/man/man2html.cpp  


--- trunk/KDE/kdebase/kioslave/man/man2html.cpp #415645:415646
@@ -250,7 +250,6 @@
 #endif
 
 // Missing characters from man(7):
-// \*R ®
 // \*S "Change to default font size"
 
 //used in expand_char, e.g. for "\(bu"
@@ -522,13 +521,12 @@
     { V('c','u'), 1, "∪" },
     { V('g','r'), 1, "V" }, // gradient ### TODO Where in Unicode?
     { V('C','R'), 1, "↵" },
-    { V('s','t'), 1, "-)" }, // "such that" ### TODO Where in Unicode?
+    { V('s','t'), 2, "-)" }, // "such that" ### TODO Where in Unicode?
     { V('/','_'), 1, "∠" },
     { V('w','p'), 1, "℘" },
     { V('l','z'), 1, "◊" },
     { V('a','n'), 1, "-" }, // "horizontal arrow extension"  ### TODO Where in Unicode?
     // mdoc-only, see mdoc.samples(7)
-    // ### TODO: the same sequence can start by \*
     { V('R','q'), 1, "”" },
     { V('L','q'), 1, "“" },
     { V('L','e'), 1, "≤" },
@@ -539,9 +537,11 @@
     { V('I','f'), 1, "∞" },
     { V('N','a'), 3, "NaN" }, // Not a Number ### TODO: does it exist in Unicode?
     { V('B','a'), 1, "|" },
+    { V('q',' '), 1, """ },
     // end mdoc-only
     // man(7)
-    { V('T','m'), 1, "™" }
+    { V('T','m'), 1, "™" }, // \*(TM
+    { V('R',' '), 1, "®" }  // \*R
     // end man(7)
 };
 
@@ -987,10 +987,13 @@
 static int skip_escape=0;
 static int single_escape=0;
 
+// ### TODO known missing escapes from groff(7):
+// ### TODO \& \! \) \:
+
 static char *scan_escape(char *c)
 {
-    const char *h=NULL;
-    char b[32];
+    const char *h=NULL; // help pointer
+    char b[32]; // help array
     INTDEF *intd;
     int exoutputp,exskipescape;
     int i,j;
@@ -1005,6 +1008,7 @@
     case ' ':
 	h=" ";curpos++; break;
     case '"': SKIPEOL; c--; h=""; break;
+    // ### TODO \# like \" but does not ignore the end of line (groff(7))
     case '$':
 	if (argument) {
 	    c++;
@@ -1029,7 +1033,17 @@
     case 'r':
     case 'u':
     case '\n':
-    case '&': h=""; break;
+        h=""; break;
+    case '&': // ### FIXME
+    {
+        // We need to print the next character, to skip its special meaning
+        c++;
+        // As we need a zero-terminated string we use the array b
+        b[0]=*c;
+        b[1]=0;
+        h=b;
+        break;
+    }
     case '(':
        c++;
        i= c[0]*256+c[1];
@@ -1044,7 +1058,7 @@
 	    c++;
 	} else
 	    i= *c *256+' ';
-	h = expand_string(i);
+        h = expand_string(i);  // ### TODO \*S has probably to done in another way, man(7)
 	break;
     case 'f':
 	c++;
@@ -1161,7 +1175,11 @@
 		default: b[0]=i; b[1]=0; h=b; curpos++; break;
 	}
 	break;
-    default: b[0]=*c; b[1]=0; h=b; curpos++; break;
+     case '\'': h="´";curpos++; break; // groff(7) ### TODO verify
+     case '`': h="`";curpos++; break; // groff(7)
+     case '-': h="-";curpos++; break; // groff(7)
+     case '.': h=".";curpos++; break; // groff(7)
+     default: b[0]=*c; b[1]=0; h=b; curpos++; break;
     }
     c++;
     if (!skip_escape) out_html(h);
Comment 2 Nicolas Goutte 2005-05-19 11:47:14 UTC
SVN commit 415679 by goutte:

- ignore correctly sequences preceded by \& (mdoc)
- \& is now again treated as a normal zero-width character (i.e. empty string)
BUG:105764
This a big step toward fully supporting the display of mdoc.samples(7)
CCBUG:105765


 M  +7 -15     trunk/KDE/kdebase/kioslave/man/man2html.cpp  


--- trunk/KDE/kdebase/kioslave/man/man2html.cpp #415678:415679
@@ -1033,17 +1033,8 @@
     case 'r':
     case 'u':
     case '\n':
+    case '&':
         h=""; break;
-    case '&': // ### FIXME
-    {
-        // We need to print the next character, to skip its special meaning
-        c++;
-        // As we need a zero-terminated string we use the array b
-        b[0]=*c;
-        b[1]=0;
-        h=b;
-        break;
-    }
     case '(':
        c++;
        i= c[0]*256+c[1];
@@ -3976,7 +3967,7 @@
 }
 */
 static int contained_tab=0;
-static int mandoc_line=0;	/* Signals whether to look for embedded mandoc
+static bool mandoc_line=false;	/* Signals whether to look for embedded mandoc
 				 * commands.
 				 */
 
@@ -4024,11 +4015,12 @@
 	    FLUSHIBP;
 	    h = scan_request(h);
 	    if (h && san && h[-1]=='\n') h--;
-	} else if (mandoc_line
+	} else if (mandoc_line // ### FIXME: a mdoc request must directly start after a space
+	           && *(h-1) && isspace(*(h-1)) // We can always go back, as there is at least the sequence at the start of line
 		   && *(h) && isupper(*(h))
 		   && *(h+1) && islower(*(h+1))
 		   && *(h+2) && isspace(*(h+2))) {
-	    /* BSD imbedded command eg ".It Fl Ar arg1 Fl Ar arg2" */
+	    // mdoc(7) embedded command eg ".It Fl Ar arg1 Fl Ar arg2"
 	    FLUSHIBP;
 	    h = scan_request(h);
 	    if (san && h[-1]=='\n') h--;
@@ -4172,8 +4164,8 @@
 {
     char *ret;
     char *end = c;
-    int oldval = mandoc_line;
-    mandoc_line = 1;
+    bool oldval = mandoc_line;
+    mandoc_line = true;
     while (*end && *end != '\n') {
         end++;
     }