[GRASS-SVN] r32990 - grass/trunk/tools/g.html2man

svn_grass at osgeo.org svn_grass at osgeo.org
Thu Aug 21 22:02:44 EDT 2008


Author: glynn
Date: 2008-08-21 22:02:44 -0400 (Thu, 21 Aug 2008)
New Revision: 32990

Modified:
   grass/trunk/tools/g.html2man/g.html2man.py
Log:
Better support for tables, lists, pre-formatted sections


Modified: grass/trunk/tools/g.html2man/g.html2man.py
===================================================================
--- grass/trunk/tools/g.html2man/g.html2man.py	2008-08-22 02:01:53 UTC (rev 32989)
+++ grass/trunk/tools/g.html2man/g.html2man.py	2008-08-22 02:02:44 UTC (rev 32990)
@@ -145,51 +145,84 @@
 
 excluded = setify(excluded)
 
-formats = {
+styles = {
     'b':        "\\fB@\\fR",
     'i':        "\\fI@\\fR",
     'em':       "\\fI@\\fR",
     'code':     "\\fC@\\fR",
     'span':     "\\fC@\\fR",
     'sup':      "\\u@\\d",
+    'hr':       ""
+    }
+
+formats = {
     'br':       "\n.br\n",
-    'hr':       "",
     'h2':       "\n.SH @",
     'h3':       "\n.SS @",
     'h4':       "\n.SS @",
     'dt':       ("\n.IP \"@\" 4m", 'no_nl'),
     'dd':       "\n.br\n@",
-    'ul':       ("\n.RS\n@\n.RE\n", 'in_ul'),
-    'menu':     ("\n.RS\n@\n.RE\n", 'in_ul'),
-    'dir':      ("\n.RS\n@\n.RE\n", 'in_ul'),
-    'ol':       ("\n..IP\n@\n.PP\n", 'index'),
+    'ul':       ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
+    'menu':     ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
+    'dir':      ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
+    'ol':       ("\n.IP\n@\n.PP\n", 'index'),
     'p':        "\n.PP\n@",
-    'pre':      ("\n\\fC\n.DS\n@\n.DE\n\\fR\n", 'preformat'),
-    'tr':       ("@\n.br\n", 'no_nl'),
-    'td':       "@\t"
+    'pre':      ("\n.br\n.nf\n\\fC\n@\n\\fR\n.fi\n", 'preformat')
     }
 
+formats.update(styles)
+
+def is_string(x):
+    return isinstance(x, types.StringType)
+
+def is_tuple(x):
+    return isinstance(x, types.TupleType)
+
+def is_list(x):
+    return isinstance(x, types.ListType)
+
+def is_blank(s):
+    return is_string(s) and s.strip() == ""
+
+def clean(content):
+    return [item for item in content if not is_blank(item)]
+
 class Formatter:
     def __init__(self, stream = sys.stdout):
 	self.stream = stream
-	self.style = dict(preformat = False, in_ul = False, no_nl = False, index = [])
+	self.style = dict(preformat = False,
+			  in_ul = False,
+			  no_nl = False,
+			  in_table = False,
+			  in_tr = False,
+			  index = [])
 	self.stack = []
 	self.strip_re = re.compile("\n[ \t]+")
 
+    def warning(self, msg):
+	sys.stderr.write(msg + '\n')
+
     def set(self, var, val):
 	self.style[var] = val
 
     def get(self, var):
 	return self.style[var]
 
+    def push(self, **kwargs):
+	self.stack.append(self.style.copy())
+	self.style.update(**kwargs)
+
+    def pop(self):
+	self.style = self.stack.pop()
+
     def show(self, s):
 	self.stream.write(s)
 
     def pp_with(self, content, var, val):
-	self.stack.append(self.style.copy())
+	self.push()
 	self.set(var, val)
 	self.pp(content)
-	self.style = self.stack.pop()
+	self.pop()
 
     def fmt(self, format, content, var = None):
 	(pre,sep,post) = format.partition("@")
@@ -209,7 +242,7 @@
 
     def pp_li(self, content):
 	if self.get('in_ul'):
-	    self.fmt("\n.IP\n@", content)
+	    self.fmt("\n.IP \(bu 4n\n@", content)
 	else:
 	    idx = self.get('index')
 	    idx[-1] += 1
@@ -225,14 +258,80 @@
 		  version +
 		  "\" \"Grass User's Manual\"")
 
+    def pp_tr(self, content):
+	content = clean(content)
+	self.push(in_tr = True)
+	col = 0
+	for item in content:
+	    if not is_tuple(item):
+		self.warning("invalid item in table row: %s" % str(item))
+		continue
+	    (tag, tail) = item
+	    if tag not in ['td', 'th']:
+		self.warning("invalid tag in table row: %s" % tag)
+		continue
+	    if col > 0:
+		self.show("\t \t")
+	    self.show("T{\n")
+	    self.pp(tail)
+	    self.show("\nT}")
+	    col += 1
+	self.show("\n")
+	self.pop()
+
+    def pp_tbody(self, content):
+	for item in content:
+	    if is_tuple(item):
+		(tag, tail) = item
+		if tag in ['thead', 'tbody', 'tfoot']:
+		    self.pp_tbody(tail)
+		elif tag == 'tr':
+		    self.pp_tr(tail)
+		    self.show(".sp 1\n")
+
+    def count_cols(self, content):
+	cols = 0
+	for item in content:
+	    n = 0
+	    if is_blank(item):
+		pass
+	    elif is_tuple(item):
+		(tag, tail) = item
+		if tag in ['thead', 'tbody', 'tfoot']:
+		    n = self.count_cols(tail)
+		elif tag == 'tr':
+		    n = len(clean(tail))
+		cols = max(cols, n)
+	    else:
+		self.warning("invalid item in table: %s" % str(item))
+	return cols
+
+    def pp_table(self, content):
+	cols = self.count_cols(content)
+	if cols == 0:
+	    return
+	self.show("\n.TS\nexpand;\n")
+	self.show(" lw1 ".join(["lw60" for i in range(cols)]) + ".\n")
+	self.pp_tbody(content)
+	self.show("\n.TE\n")
+
     def pp_tag(self, tag, content):
-	if tag in formats:
+	if self.get('in_tr') and tag not in styles:
+	    self.pp(content)
+	elif tag in formats:
 	    spec = formats[tag]
-	    if isinstance(spec, types.StringType):
+	    if is_string(spec):
 		self.fmt(spec, content)
 	    else:
 		(fmt, var) = spec
 		self.fmt(fmt, content, var)
+	elif tag == 'table':
+	    if self.get('in_table'):
+		self.warning("cannot handle nested tables")
+		return
+	    self.push(in_table = True)
+	    self.pp_table(content)
+	    self.pop()
 	elif tag == 'li':
 	    self.pp_li(content)
 	elif tag == 'title':
@@ -253,10 +352,7 @@
     def pp_text(self, content):
 	if content != "":
 	    if self.get('preformat'):
-		for line in content.splitlines(True):
-		    self.pp_string(line)
-		    if line.endswith("\n"):
-			self.show("\n.br\n")
+		self.pp_string(content)
 	    else:
 		s = self.strip_re.sub('\n', content)
 		self.pp_string(s)
@@ -266,12 +362,12 @@
 	    self.pp(item)
 
     def pp(self, content):
-	if isinstance(content, types.ListType):
+	if is_list(content):
 	    self.pp_list(content)
-	elif isinstance(content, types.TupleType):
+	elif is_tuple(content):
 	    (head, tail) = content
 	    self.pp_tag(head, tail)
-	elif isinstance(content, types.StringType):
+	elif is_string(content):
 	    self.pp_text(content)
 
 class MyHTMLParser(HTMLParser):



More information about the grass-commit mailing list