[GRASS-SVN] r65544 - grass/trunk/tools/g.html2man
svn_grass at osgeo.org
svn_grass at osgeo.org
Tue Jul 7 21:13:49 PDT 2015
Author: zarch
Date: 2015-07-07 21:13:49 -0700 (Tue, 07 Jul 2015)
New Revision: 65544
Modified:
grass/trunk/tools/g.html2man/g.html2man.py
grass/trunk/tools/g.html2man/groff.py
grass/trunk/tools/g.html2man/html.py
grass/trunk/tools/g.html2man/rest.py
Log:
clean code using PEP8 rules, substitute mixed tab/space indentation with space
Modified: grass/trunk/tools/g.html2man/g.html2man.py
===================================================================
--- grass/trunk/tools/g.html2man/g.html2man.py 2015-07-06 20:30:45 UTC (rev 65543)
+++ grass/trunk/tools/g.html2man/g.html2man.py 2015-07-08 04:13:49 UTC (rev 65544)
@@ -8,9 +8,11 @@
entities = {
'nbsp': " ",
'bull': "*"
- }
+}
# Remove ToC
+
+
def fix(content):
if isinstance(content, tuple):
tag, attrs, body = content
@@ -26,20 +28,25 @@
else:
return content
+
def main():
# parse HTML
infile = sys.argv[1]
inf = file(infile)
p = HTMLParser(entities)
for n, line in enumerate(inf):
- try:
- p.feed(line)
- except HTMLParseError as err:
- sys.stderr.write('%s:%d:%d: Parse error: %s\n' % (infile, err.lineno, err.offset, err.msg))
- sys.exit(1)
- except Exception as err:
- sys.stderr.write('%s:%d:0: Error (%s): %s\n' % (infile, n + 1, repr(err), line))
- sys.exit(1)
+ try:
+ p.feed(line)
+ except HTMLParseError as err:
+ sys.stderr.write(
+ '%s:%d:%d: Parse error: %s\n' %
+ (infile, err.lineno, err.offset, err.msg))
+ sys.exit(1)
+ except Exception as err:
+ sys.stderr.write(
+ '%s:%d:0: Error (%s): %s\n' %
+ (infile, n + 1, repr(err), line))
+ sys.exit(1)
p.close()
inf.close()
Modified: grass/trunk/tools/g.html2man/groff.py
===================================================================
--- grass/trunk/tools/g.html2man/groff.py 2015-07-06 20:30:45 UTC (rev 65543)
+++ grass/trunk/tools/g.html2man/groff.py 2015-07-08 04:13:49 UTC (rev 65544)
@@ -11,255 +11,261 @@
version = ""
styles = {
- 'b': "\\fB@\\fR",
- 'i': "\\fI@\\fR",
- 'em': "\\fI@\\fR",
- 'code': "\\fC@\\fR",
- 'span': "\\fC@\\fR",
- 'sup': "\\u@\\d",
- 'hr': ""
- }
+ 'b': "\\fB@\\fR",
+ 'i': "\\fI@\\fR",
+ 'em': "\\fI@\\fR",
+ 'code': "\\fC@\\fR",
+ 'span': "\\fC@\\fR",
+ 'sup': "\\u@\\d",
+ 'hr': ""
+}
formats = {
- 'br': "\n.br\n",
- 'h2': "\n.SH @",
- 'h3': "\n.SS @",
- 'h4': "\n.SS @",
- 'dt': ("\n.IP \"@\" 4m", 'no_nl'),
- 'dd': "\n.br\n@",
- 'ul': ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
- 'menu': ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
- 'dir': ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
- 'ol': ("\n.IP\n@\n.PP\n", 'index'),
- 'p': "\n.PP\n@",
- 'pre': ("\n.br\n.nf\n\\fC\n@\n\\fR\n.fi\n", 'preformat')
- }
+ 'br': "\n.br\n",
+ 'h2': "\n.SH @",
+ 'h3': "\n.SS @",
+ 'h4': "\n.SS @",
+ 'dt': ("\n.IP \"@\" 4m", 'no_nl'),
+ 'dd': "\n.br\n@",
+ 'ul': ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
+ 'menu': ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
+ 'dir': ("\n.RS 4n\n@\n.RE\n", 'in_ul'),
+ 'ol': ("\n.IP\n@\n.PP\n", 'index'),
+ 'p': "\n.PP\n@",
+ 'pre': ("\n.br\n.nf\n\\fC\n@\n\\fR\n.fi\n", 'preformat')
+}
formats.update(styles)
+
def is_string(x):
return isinstance(x, str)
+
def is_tuple(x):
return isinstance(x, tuple)
+
def is_list(x):
return isinstance(x, list)
+
def is_blank(s):
return is_string(s) and s.strip() == ""
+
def clean(content):
return [item for item in content if not is_blank(item)]
+
class Formatter:
- def __init__(self, filename, stream = sys.stdout):
- self.stream = stream
- self.style = dict(preformat = False,
- in_ul = False,
- no_nl = False,
- in_table = False,
- in_tr = False,
- index = [])
- self.stack = []
- self.strip_re = re.compile("^[ \t]+")
- self.filename = filename
- self.at_bol = True
+ def __init__(self, filename, stream=sys.stdout):
+ self.stream = stream
+ self.style = dict(preformat=False,
+ in_ul=False,
+ no_nl=False,
+ in_table=False,
+ in_tr=False,
+ index=[])
+ self.stack = []
+ self.strip_re = re.compile("^[ \t]+")
+ self.filename = filename
+ self.at_bol = True
+
def warning(self, msg):
- sys.stderr.write(msg + '\n')
+ sys.stderr.write(msg + '\n')
def set(self, var, val):
- self.style[var] = val
+ self.style[var] = val
def get(self, var):
- return self.style[var]
+ return self.style[var]
def push(self, **kwargs):
- self.stack.append(self.style.copy())
- self.style.update(**kwargs)
+ self.stack.append(self.style.copy())
+ self.style.update(**kwargs)
def pop(self):
- self.style = self.stack.pop()
+ self.style = self.stack.pop()
def show(self, s):
- self.stream.write(s)
- if s != '':
- self.at_bol = s.endswith('\n')
+ self.stream.write(s)
+ if s != '':
+ self.at_bol = s.endswith('\n')
def pp_with(self, content, var, val):
- self.push()
- self.set(var, val)
- self.pp(content)
- self.pop()
+ self.push()
+ self.set(var, val)
+ self.pp(content)
+ self.pop()
- def fmt(self, format, content, var = None):
- # String.partition is only in 2.5+
- # (pre,sep,post) = format.partition("@")
- if self.get('no_nl') and '\n' in format:
- self.warning("can't handle line breaks in <dt>...</dt>")
- format = "@"
- f = format.split('@', 1)
- pre = f[0]
- if len(f) > 1:
- sep = '@'
- post = f[1]
- else:
- sep = ''
- post = ''
+ def fmt(self, format, content, var=None):
+ # String.partition is only in 2.5+
+ # (pre,sep,post) = format.partition("@")
+ if self.get('no_nl') and '\n' in format:
+ self.warning("can't handle line breaks in <dt>...</dt>")
+ format = "@"
+ f = format.split('@', 1)
+ pre = f[0]
+ if len(f) > 1:
+ sep = '@'
+ post = f[1]
+ else:
+ sep = ''
+ post = ''
- if pre != "":
- self.show(pre)
- if sep != "":
- if var:
- if var == 'index':
- val = self.get('index') + [0]
- else:
- val = True
- self.pp_with(content, var, val)
- else:
- self.pp(content)
- if post != "":
- self.show(post)
+ if pre != "":
+ self.show(pre)
+ if sep != "":
+ if var:
+ if var == 'index':
+ val = self.get('index') + [0]
+ else:
+ val = True
+ self.pp_with(content, var, val)
+ else:
+ self.pp(content)
+ if post != "":
+ self.show(post)
def pp_li(self, content):
- if self.get('in_ul'):
- self.fmt("\n.IP \(bu 4n\n@", content)
- else:
- idx = self.get('index')
- idx[-1] += 1
- sec = ".".join(map(str,idx))
- self.show("\n.IP \\fB%s\\fR\n" % sec)
- self.set('index', idx)
- self.pp(content)
+ if self.get('in_ul'):
+ self.fmt("\n.IP \(bu 4n\n@", content)
+ else:
+ idx = self.get('index')
+ idx[-1] += 1
+ sec = ".".join(map(str, idx))
+ self.show("\n.IP \\fB%s\\fR\n" % sec)
+ self.set('index', idx)
+ self.pp(content)
def pp_title(self):
- self.show("\n.TH " +
- os.path.basename(self.filename).replace(".html","") +
- " 1 \"\" \"GRASS " +
- version +
- "\" \"Grass User's Manual\"")
+ self.show("\n.TH " +
+ os.path.basename(self.filename).replace(".html", "") +
+ " 1 \"\" \"GRASS " +
+ version +
+ "\" \"Grass User's Manual\"")
def pp_tr(self, content):
- content = clean(content)
- self.push(in_tr = True)
- col = 0
- for item in content:
- if not is_tuple(item):
- self.warning("invalid item in table row: %s" % str(item))
- continue
- (tag, attrs, body) = item
- if tag not in ['td', 'th']:
- self.warning("invalid tag in table row: %s" % tag)
- continue
- if col > 0:
- self.show("\t \t")
- self.show("T{\n")
- self.pp(body)
- self.show("\nT}")
- col += 1
- self.show("\n")
- self.pop()
+ content = clean(content)
+ self.push(in_tr=True)
+ col = 0
+ for item in content:
+ if not is_tuple(item):
+ self.warning("invalid item in table row: %s" % str(item))
+ continue
+ (tag, attrs, body) = item
+ if tag not in ['td', 'th']:
+ self.warning("invalid tag in table row: %s" % tag)
+ continue
+ if col > 0:
+ self.show("\t \t")
+ self.show("T{\n")
+ self.pp(body)
+ self.show("\nT}")
+ col += 1
+ self.show("\n")
+ self.pop()
def pp_tbody(self, content):
- for item in content:
- if is_tuple(item):
- (tag, attrs, body) = item
- if tag in ['thead', 'tbody', 'tfoot']:
- self.pp_tbody(body)
- elif tag == 'tr':
- self.pp_tr(body)
- self.show(".sp 1\n")
+ for item in content:
+ if is_tuple(item):
+ (tag, attrs, body) = item
+ if tag in ['thead', 'tbody', 'tfoot']:
+ self.pp_tbody(body)
+ elif tag == 'tr':
+ self.pp_tr(body)
+ self.show(".sp 1\n")
def count_cols(self, content):
- cols = 0
- for item in content:
- n = 0
- if is_blank(item):
- pass
- elif is_tuple(item):
- (tag, attrs, body) = item
- if tag in ['thead', 'tbody', 'tfoot']:
- n = self.count_cols(body)
- elif tag == 'tr':
- n = len(clean(body))
- cols = max(cols, n)
- else:
- self.warning("invalid item in table: %s" % str(item))
- return cols
+ cols = 0
+ for item in content:
+ n = 0
+ if is_blank(item):
+ pass
+ elif is_tuple(item):
+ (tag, attrs, body) = item
+ if tag in ['thead', 'tbody', 'tfoot']:
+ n = self.count_cols(body)
+ elif tag == 'tr':
+ n = len(clean(body))
+ cols = max(cols, n)
+ else:
+ self.warning("invalid item in table: %s" % str(item))
+ return cols
def pp_table(self, content):
- cols = self.count_cols(content)
- if cols == 0:
- return
- self.show("\n.TS\nexpand;\n")
- self.show(" lw1 ".join(["lw60" for i in range(cols)]) + ".\n")
- self.pp_tbody(content)
- self.show("\n.TE\n")
+ cols = self.count_cols(content)
+ if cols == 0:
+ return
+ self.show("\n.TS\nexpand;\n")
+ self.show(" lw1 ".join(["lw60" for i in range(cols)]) + ".\n")
+ self.pp_tbody(content)
+ self.show("\n.TE\n")
def pp_tag(self, tag, content):
- if self.get('in_tr') and tag not in styles:
- self.pp(content)
- elif tag in formats:
- spec = formats[tag]
- if is_string(spec):
- self.fmt(spec, content)
- else:
- (fmt, var) = spec
- self.fmt(fmt, content, var)
- elif tag == 'table':
- if self.get('in_table'):
- self.warning("cannot handle nested tables")
- return
- self.push(in_table = True)
- self.pp_table(content)
- self.pop()
- elif tag == 'li':
- self.pp_li(content)
- elif tag == 'title':
- self.pp_title()
- else:
- self.pp(content)
+ if self.get('in_tr') and tag not in styles:
+ self.pp(content)
+ elif tag in formats:
+ spec = formats[tag]
+ if is_string(spec):
+ self.fmt(spec, content)
+ else:
+ (fmt, var) = spec
+ self.fmt(fmt, content, var)
+ elif tag == 'table':
+ if self.get('in_table'):
+ self.warning("cannot handle nested tables")
+ return
+ self.push(in_table=True)
+ self.pp_table(content)
+ self.pop()
+ elif tag == 'li':
+ self.pp_li(content)
+ elif tag == 'title':
+ self.pp_title()
+ else:
+ self.pp(content)
def pp_string(self, content):
- if content == "":
- return
- s = content
- if self.get('no_nl'):
- s = s.replace("\n"," ")
- s = s.replace("\\", "\\(rs")
- s = s.replace("'", "\\(cq")
- s = s.replace("\"", "\\(dq")
- s = s.replace("`", "\\(ga")
- s = s.replace("-", "\\-")
- if self.at_bol and s[0] in [".","'"]:
- s = "\\&" + s
- self.show(s)
+ if content == "":
+ return
+ s = content
+ if self.get('no_nl'):
+ s = s.replace("\n", " ")
+ s = s.replace("\\", "\\(rs")
+ s = s.replace("'", "\\(cq")
+ s = s.replace("\"", "\\(dq")
+ s = s.replace("`", "\\(ga")
+ s = s.replace("-", "\\-")
+ if self.at_bol and s[0] in [".", "'"]:
+ s = "\\&" + s
+ self.show(s)
def pp_text(self, content):
- if content == "":
- return
- lines = content.splitlines(True)
- if len(lines) != 1:
- for line in lines:
- self.pp_text(line)
- return
- else:
- content = lines[0]
- if self.at_bol and not self.get('preformat'):
- content = self.strip_re.sub('', content)
- self.pp_string(content)
+ if content == "":
+ return
+ lines = content.splitlines(True)
+ if len(lines) != 1:
+ for line in lines:
+ self.pp_text(line)
+ return
+ else:
+ content = lines[0]
+ if self.at_bol and not self.get('preformat'):
+ content = self.strip_re.sub('', content)
+ self.pp_string(content)
def pp_list(self, content):
- for item in content:
- self.pp(item)
+ for item in content:
+ self.pp(item)
def pp(self, content):
- if is_list(content):
- self.pp_list(content)
- elif is_tuple(content):
- (tag, attrs, body) = content
- self.pp_tag(tag, body)
- elif is_string(content):
- self.pp_text(content)
-
+ if is_list(content):
+ self.pp_list(content)
+ elif is_tuple(content):
+ (tag, attrs, body) = content
+ self.pp_tag(tag, body)
+ elif is_string(content):
+ self.pp_text(content)
Modified: grass/trunk/tools/g.html2man/html.py
===================================================================
--- grass/trunk/tools/g.html2man/html.py 2015-07-06 20:30:45 UTC (rev 65543)
+++ grass/trunk/tools/g.html2man/html.py 2015-07-08 04:13:49 UTC (rev 65544)
@@ -1,3 +1,5 @@
+from __future__ import (absolute_import, division, generators, nested_scopes,
+ print_function, unicode_literals, with_statement)
import sys
import HTMLParser as base
import htmlentitydefs
@@ -9,38 +11,40 @@
omit_start = ["body", "tbody", "head", "html"]
single = ["area", "base", "basefont", "br", "col", "frame",
- "hr", "img", "input", "isindex", "link", "meta", "param"]
+ "hr", "img", "input", "isindex", "link", "meta", "param"]
single = frozenset(single)
heading = ["h1", "h2", "h3", "h4", "h5", "h6"]
fontstyle = ["tt", "i", "b", "u", "s", "strike", "big", "small"]
-phrase = [ "em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr",
- "acronym"]
-special = [ "a", "img", "applet", "object", "font", "basefont", "br", "script",
- "map", "q", "sub", "sup", "span", "bdo", "iframe"]
-formctrl = [ "input", "select", "textarea", "label", "button"]
-lists = [ "ul", "ol", " dir", "menu"]
-head_misc = [ "script", "style", "meta", "link", "object"]
-pre_exclusion = [ "img", "object", "applet", "big", "small", "sub", "sup",
- "font", "basefont"]
-block = [ "p", "pre", "dl", "div", "center", "noscript", "noframes",
- "blockquote", "form", "isindex", "hr", "table", "fieldset",
- "address"] + heading + lists
+phrase = ["em", "strong", "dfn", "code", "samp", "kbd", "var", "cite", "abbr",
+ "acronym"]
+special = ["a", "img", "applet", "object", "font", "basefont", "br", "script",
+ "map", "q", "sub", "sup", "span", "bdo", "iframe"]
+formctrl = ["input", "select", "textarea", "label", "button"]
+lists = ["ul", "ol", " dir", "menu"]
+head_misc = ["script", "style", "meta", "link", "object"]
+pre_exclusion = ["img", "object", "applet", "big", "small", "sub", "sup",
+ "font", "basefont"]
+block = ["p", "pre", "dl", "div", "center", "noscript", "noframes",
+ "blockquote", "form", "isindex", "hr", "table", "fieldset",
+ "address"] + heading + lists
inline = fontstyle + phrase + special + formctrl
flow = block + inline
html_content = ["head", "body"]
head_content = ["title", "isindex", "base"]
+
def setify(d):
return dict([(key, frozenset(val)) for key, val in d.iteritems()])
+
def omit(allowed, tags):
result = {}
for k, v in allowed.iteritems():
- for t in tags:
- if t in v:
- v = v.union(allowed[t])
- result[k] = v
+ for t in tags:
+ if t in v:
+ v = v.union(allowed[t])
+ result[k] = v
return result
allowed = {
@@ -122,7 +126,7 @@
"u": inline,
"ul": ["li"],
"var": inline
- }
+}
allowed = setify(allowed)
allowed = omit(allowed, omit_start)
@@ -135,18 +139,20 @@
"label": ["label"],
"menu": block,
"pre": pre_exclusion
- }
+}
excluded = setify(excluded)
+
class HTMLParser(base.HTMLParser):
- def __init__(self, entities = None):
- base.HTMLParser.__init__(self)
- self.tag_stack = []
- self.excluded = frozenset()
- self.excluded_stack = []
- self.data = []
- self.data_stack = []
+
+ def __init__(self, entities=None):
+ base.HTMLParser.__init__(self)
+ self.tag_stack = []
+ self.excluded = frozenset()
+ self.excluded_stack = []
+ self.data = []
+ self.data_stack = []
self.decls = []
if entities:
self.entities = entities
@@ -154,61 +160,60 @@
self.entities = {}
def top(self):
- if self.tag_stack == []:
- return None
- else:
- return self.tag_stack[-1][0]
+ if self.tag_stack == []:
+ return None
+ else:
+ return self.tag_stack[-1][0]
def pop(self):
- self.excluded = self.excluded_stack.pop()
- data = self.data
- self.data = self.data_stack.pop()
- (tag, attrs) = self.tag_stack.pop()
- self.append((tag, attrs, data))
- return tag
+ self.excluded = self.excluded_stack.pop()
+ data = self.data
+ self.data = self.data_stack.pop()
+ (tag, attrs) = self.tag_stack.pop()
+ self.append((tag, attrs, data))
+ return tag
def push(self, tag, attrs):
- self.tag_stack.append((tag, attrs))
- self.excluded_stack.append(self.excluded)
- if tag in excluded:
- self.excluded = self.excluded.union(excluded[tag])
- self.data_stack.append(self.data)
- self.data = []
+ self.tag_stack.append((tag, attrs))
+ self.excluded_stack.append(self.excluded)
+ if tag in excluded:
+ self.excluded = self.excluded.union(excluded[tag])
+ self.data_stack.append(self.data)
+ self.data = []
def append(self, item):
- self.data.append(item)
+ self.data.append(item)
def is_allowed(self, tag):
- return tag not in self.excluded and tag in allowed[self.top()]
+ return tag not in self.excluded and tag in allowed[self.top()]
def handle_starttag(self, tag, attrs):
- if self.tag_stack != []:
- while not self.is_allowed(tag):
- self.pop()
- if tag not in single:
- self.push(tag, attrs)
- else:
- self.append((tag, attrs, None))
+ if self.tag_stack != []:
+ while not self.is_allowed(tag):
+ self.pop()
+ if tag not in single:
+ self.push(tag, attrs)
+ else:
+ self.append((tag, attrs, None))
def handle_entityref(self, name):
- if name in self.entities:
- self.handle_data(self.entities[name])
- elif name in htmlentitydefs.entitydefs:
- self.handle_data(htmlentitydefs.entitydefs[name])
- else:
- sys.stderr.write("unrecognized entity: %s\n" % name)
+ if name in self.entities:
+ self.handle_data(self.entities[name])
+ elif name in htmlentitydefs.entitydefs:
+ self.handle_data(htmlentitydefs.entitydefs[name])
+ else:
+ sys.stderr.write("unrecognized entity: %s\n" % name)
def handle_charref(self, name):
- sys.stderr.write('unsupported character reference <%s>' % name);
+ sys.stderr.write('unsupported character reference <%s>' % name)
def handle_data(self, data):
- self.append(data)
+ self.append(data)
def handle_endtag(self, tag):
- while True:
- if self.pop() == tag:
- break
+ while True:
+ if self.pop() == tag:
+ break
def handle_decl(self, decl):
self.decls.append(decl)
-
Modified: grass/trunk/tools/g.html2man/rest.py
===================================================================
--- grass/trunk/tools/g.html2man/rest.py 2015-07-06 20:30:45 UTC (rev 65543)
+++ grass/trunk/tools/g.html2man/rest.py 2015-07-08 04:13:49 UTC (rev 65544)
@@ -1,5 +1,6 @@
import sys
+
def match(node, tag, attr=None, val=None):
if not isinstance(node, tuple):
return False
@@ -13,6 +14,7 @@
return False
return True
+
def find(node, tag, attr=None, val=None):
if isinstance(node, tuple):
node = node[2]
@@ -23,22 +25,26 @@
return child
raise ValueError('child not found')
+
def children(node):
return node[2]
+
def text(node):
return children(node)[0]
+
def _(s):
return s # TODO
-def rest(root, f = sys.stdout):
+
+def rest(root, f=sys.stdout):
def write(text):
f.write(text)
def show(item, italic=False, bold=False):
if isinstance(item, str):
- spc = '' # if item[-1] == '\n' else ' '
+ spc = '' # if item[-1] == '\n' else ' '
fmt = '**' if bold else ('*' if italic else '')
write('%s%s%s%s' % (fmt, item, fmt, spc))
elif match(item, 'b'):
More information about the grass-commit
mailing list