[GRASS-SVN] r32982 - in grass/trunk: man tools/g.html2man

svn_grass at osgeo.org svn_grass at osgeo.org
Thu Aug 21 12:23:12 EDT 2008


Author: glynn
Date: 2008-08-21 12:23:11 -0400 (Thu, 21 Aug 2008)
New Revision: 32982

Added:
   grass/trunk/tools/g.html2man/g.html2man.py
Modified:
   grass/trunk/man/Makefile
Log:
Add, use Python replacement for g.html2man


Modified: grass/trunk/man/Makefile
===================================================================
--- grass/trunk/man/Makefile	2008-08-21 15:33:16 UTC (rev 32981)
+++ grass/trunk/man/Makefile	2008-08-21 16:23:11 UTC (rev 32982)
@@ -6,7 +6,8 @@
 SECT = 1
 MANDIR  = $(GISBASE)/man/man$(SECT)
 HTMLDIR = $(GISBASE)/docs/html
-HTML2MAN = GRASS_PERL=${PERL} VERSION_NUMBER=${GRASS_VERSION_NUMBER} sh $(GRASS_HOME)/tools/g.html2man/g.html2man
+#HTML2MAN = GRASS_PERL=${PERL} VERSION_NUMBER=${GRASS_VERSION_NUMBER} sh $(GRASS_HOME)/tools/g.html2man/g.html2man
+HTML2MAN = $(GRASS_HOME)/tools/g.html2man/g.html2man.py
 
 MANPAGES := $(patsubst $(HTMLDIR)/%.html,$(MANDIR)/%.$(SECT),$(wildcard $(HTMLDIR)/*.html))
 

Added: grass/trunk/tools/g.html2man/g.html2man.py
===================================================================
--- grass/trunk/tools/g.html2man/g.html2man.py	                        (rev 0)
+++ grass/trunk/tools/g.html2man/g.html2man.py	2008-08-21 16:23:11 UTC (rev 32982)
@@ -0,0 +1,361 @@
+#!/usr/bin/env python
+# coding=iso-8859-1
+import sys
+import types
+import os
+import re
+from HTMLParser import HTMLParser
+from htmlentitydefs import entitydefs
+from StringIO import StringIO
+
+try:
+    version = os.environ['GRASS_VERSION_NUMBER']
+except:
+    version = ""
+
+entities = {
+    'lt': "<",
+    'gt': ">",
+    'amp': "&",
+    'nbsp': " ",
+    'copy': "©",
+    'quot': "\"",
+    'bull': "*"
+    }
+
+single = ["area", "base", "basefont", "br", "col", "frame",
+	  "hr", "img", "input", "isindex", "link", "meta", "param"]
+single = frozenset(single)
+
+heading = ["h1", "h2", "h3", "h4", "h5", "h6"]
+fontstyle = ["tt", "i", "b", "u", "s", "strike", "big", "small"]
+phrase = [ "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr",
+	   "acronym"]
+special = [ "a", "img", "applet", "object", "font", "basefont", "br", "script",
+	    "map", "q", "sub", "sup", "span", "bdo", "iframe"]
+formctrl = [ "input", "select", "textarea", "label", "button"]
+list = [ "ul", "ol", " dir", "menu"]
+head_misc = [ "script", "style", "meta", "link", "object"]
+pre_exclusion = [ "img", "object", "applet", "big", "small", "sub", "sup",
+		  "font", "basefont"]
+block = [ "p", "pre", "dl", "div", "center", "noscript", "noframes",
+	  "blockquote", "form", "isindex", "hr", "table", "fieldset",
+	  "address"] + heading + list
+inline = fontstyle + phrase + special + formctrl
+flow = block + inline
+html_content = ["head", "body"]
+head_content = ["title", "isindex", "base"]
+
+def setify(d):
+    return dict([(key, frozenset(val)) for key, val in d.iteritems()])
+
+allowed = {
+    "a": inline,
+    "abbr": inline,
+    "acronym": inline,
+    "address": inline + ["p"],
+    "applet": flow + ["param"],
+    "b": inline,
+    "bdo": inline,
+    "big": inline,
+    "blockquote": flow,
+    "body": flow + ["ins", "del"],
+    "button": flow,
+    "caption": inline,
+    "center": flow,
+    "cite": inline,
+    "code": inline,
+    "colgroup": ["col"],
+    "dd": flow,
+    "del": flow,
+    "dfn": inline,
+    "dir": ["li"],
+    "div": flow,
+    "dl": ["dt", "dd"],
+    "dt": inline,
+    "em": inline,
+    "fieldset": flow + ["legend"],
+    "font": inline,
+    "form": flow,
+    "frameset": ["frameset", "frame", "noframes"],
+    "h1": inline,
+    "h2": inline,
+    "h3": inline,
+    "h4": inline,
+    "h5": inline,
+    "h6": inline,
+    "head": head_content + head_misc,
+    "html": html_content,
+    "i": inline,
+    "iframe": flow,
+    "ins": flow,
+    "kbd": inline,
+    "label": inline,
+    "legend": inline,
+    "li": flow,
+    "map": block + ["area"],
+    "menu": ["li"],
+    "noframes": flow,
+    "noscript": flow,
+    "object": flow + ["param"],
+    "ol": ["li"],
+    "optgroup": ["option"],
+    "option": [],
+    "p": inline,
+    "pre": inline,
+    "q": inline,
+    "s": inline,
+    "samp": inline,
+    "script": [],
+    "select": ["optgroup", "option"],
+    "small": inline,
+    "span": inline,
+    "strike": inline,
+    "strong": inline,
+    "style": [],
+    "sub": inline,
+    "sup": inline,
+    "table": ["caption", "col", "colgroup", "thead", "tfoot", "tbody",
+	      "tr"], # to allow for <table>[implied <tbody>]<tr>
+    "tbody": ["tr"],
+    "td": flow,
+    "textarea": [],
+    "tfoot": ["tr"],
+    "th": flow,
+    "thead": ["tr"],
+    "title": [],
+    "tr": ["th", "td"],
+    "tt": inline,
+    "u": inline,
+    "ul": ["li"],
+    "var": inline
+    }
+
+allowed = setify(allowed)
+
+excluded = {
+    "a": ["a"],
+    "button": formctrl + ["a", "form", "isindex", "fieldset", "iframe"],
+    "dir": block,
+    "form": ["form"],
+    "label": ["label"],
+    "menu": block,
+    "pre": pre_exclusion
+    }
+
+excluded = setify(excluded)
+
+formats = {
+    'b':        "\\fB@\\fR",
+    'i':        "\\fI@\\fR",
+    'em':       "\\fI@\\fR",
+    'code':     "\\fC@\\fR",
+    'span':     "\\fC@\\fR",
+    'sup':      "\\u@\\d",
+    'br':       "\n.br\n",
+    'hr':       "",
+    'h2':       "\n.SH @",
+    'h3':       "\n.SS @",
+    'h4':       "\n.SS @",
+    'dt':       ("\n.IP \"@\" 4m", 'no_nl'),
+    'dd':       "\n.br\n@",
+    'ul':       ("\n.RS\n@\n.RE\n", 'in_ul'),
+    'menu':     ("\n.RS\n@\n.RE\n", 'in_ul'),
+    'dir':      ("\n.RS\n@\n.RE\n", 'in_ul'),
+    'ol':       ("\n..IP\n@\n.PP\n", 'index'),
+    'p':        "\n.PP\n@",
+    'pre':      ("\n\\fC\n.DS\n@\n.DE\n\\fR\n", 'preformat'),
+    'tr':       ("@\n.br\n", 'no_nl'),
+    'td':       "@\t"
+    }
+
+class Formatter:
+    def __init__(self, stream = sys.stdout):
+	self.stream = stream
+	self.style = dict(preformat = False, in_ul = False, no_nl = False, index = [])
+	self.stack = []
+	self.strip_re = re.compile("\n[ \t]+")
+
+    def set(self, var, val):
+	self.style[var] = val
+
+    def get(self, var):
+	return self.style[var]
+
+    def show(self, s):
+	self.stream.write(s)
+
+    def pp_with(self, content, var, val):
+	self.stack.append(self.style.copy())
+	self.set(var, val)
+	self.pp(content)
+	self.style = self.stack.pop()
+
+    def fmt(self, format, content, var = None):
+	(pre,sep,post) = format.partition("@")
+	if pre != "":
+	    self.show(pre)
+	if sep != "":
+	    if var:
+		if var == 'index':
+		    val = self.get('index') + [0]
+		else:
+		    val = True
+		self.pp_with(content, var, val)
+	    else:
+		self.pp(content)
+	if post != "":
+	    self.show(post)
+
+    def pp_li(self, content):
+	if self.get('in_ul'):
+	    self.fmt("\n.IP\n@", content)
+	else:
+	    idx = self.get('index')
+	    idx[-1] += 1
+	    sec = ".".join(map(str,idx))
+	    self.show("\n.IP \\fB%s\\fR\n" % sec)
+	    self.set('index', idx)
+	    self.pp(content)
+
+    def pp_title(self):
+	self.show("\n.TH " +
+		  os.path.basename(sys.argv[1]).replace(".html","") +
+		  " 1 \"\" \"GRASS " +
+		  version +
+		  "\" \"Grass User's Manual\"")
+
+    def pp_tag(self, tag, content):
+	if tag in formats:
+	    spec = formats[tag]
+	    if isinstance(spec, types.StringType):
+		self.fmt(spec, content)
+	    else:
+		(fmt, var) = spec
+		self.fmt(fmt, content, var)
+	elif tag == 'li':
+	    self.pp_li(content)
+	elif tag == 'title':
+	    self.pp_title()
+	else:
+	    self.pp(content)
+
+    def pp_string(self, content):
+	s = content
+	if self.get('no_nl'):
+	    s = s.replace("\n"," ")
+	s = s.replace("\\", "\\(rs")
+	s = s.replace("'", "\\(cq")
+	s = s.replace("\"", "\\(dq")
+	s = s.replace("`", "\\(ga")
+	self.show(s)
+
+    def pp_text(self, content):
+	if content != "":
+	    if self.get('preformat'):
+		for line in content.splitlines(True):
+		    self.pp_string(line)
+		    if line.endswith("\n"):
+			self.show("\n.br\n")
+	    else:
+		s = self.strip_re.sub('\n', content)
+		self.pp_string(s)
+
+    def pp_list(self, content):
+	for item in content:
+	    self.pp(item)
+
+    def pp(self, content):
+	if isinstance(content, types.ListType):
+	    self.pp_list(content)
+	elif isinstance(content, types.TupleType):
+	    (head, tail) = content
+	    self.pp_tag(head, tail)
+	elif isinstance(content, types.StringType):
+	    self.pp_text(content)
+
+class MyHTMLParser(HTMLParser):
+    def __init__(self):
+	HTMLParser.__init__(self)
+	self.tag_stack = []
+	self.excluded = frozenset()
+	self.excluded_stack = []
+	self.data = []
+	self.data_stack = []
+
+    def top(self):
+	if self.tag_stack == []:
+	    return None
+	else:
+	    return self.tag_stack[-1]
+
+    def pop(self):
+	self.excluded = self.excluded_stack.pop()
+	data = self.data
+	self.data = self.data_stack.pop()
+	tag = self.tag_stack.pop()
+	self.append((tag, data))
+	return tag
+
+    def push(self, tag):
+	self.tag_stack.append(tag)
+	self.excluded_stack.append(self.excluded)
+	if tag in excluded:
+	    self.excluded = self.excluded.union(excluded[tag])
+	self.data_stack.append(self.data)
+	self.data = []
+
+    def append(self, item):
+	self.data.append(item)
+
+    def is_allowed(self, tag):
+	return tag not in self.excluded and tag in allowed[self.top()]
+
+    def handle_starttag(self, tag, attrs):
+	if self.tag_stack != []:
+	    while not self.is_allowed(tag):
+		self.pop()
+	if tag not in single:
+	    self.push(tag)
+	else:
+	    self.append((tag,None))
+
+    def handle_entityref(self, name):
+	if name in entities:
+	    self.handle_data(entities[name])
+	elif name in entitydefs:
+	    self.handle_data(entitydefs[name])
+	else:
+	    sys.stderr.write("unrecognized entity: %s\n" % name)
+
+    def handle_data(self, data):
+	self.append(data)
+
+    def handle_endtag(self, tag):
+	while True:
+	    if self.pop() == tag:
+		break
+
+if __name__ == "__main__":
+    # parse HTML
+    inf = file(sys.argv[1])
+    p = MyHTMLParser()
+    p.feed(inf.read())
+    p.close()
+    inf.close()
+
+    # generate groff
+    sf = StringIO()
+    f = Formatter(sf)
+    f.pp(p.data)
+    s = sf.getvalue()
+    sf.close()
+
+    # strip excess whitespace
+    blank_re = re.compile("[ \t\n]*\n[ \t\n]*")
+    s = blank_re.sub('\n', s)
+
+    # write groff
+    outf = file(sys.argv[2], 'w')
+    outf.write(s)
+    outf.close()


Property changes on: grass/trunk/tools/g.html2man/g.html2man.py
___________________________________________________________________
Name: svn:executable
   + *
Name: svn:eol-style
   + native



More information about the grass-commit mailing list