[GRASS-SVN] r72756 - sandbox/wenzeslaus/g.citation

svn_grass at osgeo.org svn_grass at osgeo.org
Thu May 31 20:40:46 PDT 2018


Author: wenzeslaus
Date: 2018-05-31 20:40:46 -0700 (Thu, 31 May 2018)
New Revision: 72756

Modified:
   sandbox/wenzeslaus/g.citation/g.citation.py
Log:
g.citation: more robust author section capture

Add flag to display warnings instead of ending with error.
Accomodate more ways how Author section and Subversion Date are written.
Raise runtime errors and turn them to GRASS error at the top level.
Check if the section was captured.


Modified: sandbox/wenzeslaus/g.citation/g.citation.py
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.py	2018-06-01 02:43:23 UTC (rev 72755)
+++ sandbox/wenzeslaus/g.citation/g.citation.py	2018-06-01 03:40:46 UTC (rev 72756)
@@ -54,6 +54,11 @@
 #% description: Provide citation for all modules
 #%end
 
+#%flag
+#% key: s
+#% description: Skip errors, provide warning only
+#%end
+
 #%rules
 #% required: module,-a
 #% exclusive: module,-a
@@ -95,7 +100,7 @@
             entry + '.html')
 
     if not os.path.exists(path):
-        gs.fatal(_("No HTML manual page entry for '%s'") % entry)
+        raise RuntimeError(_("No HTML manual page entry for '%s'") % entry)
 
     return path
 
@@ -143,11 +148,13 @@
     >>> get_year_from_documentation(text)
     2011
     """
-    year_capture = r"<p>\s*<i>Last changed: \$Date: ([\d]+)-\d\d-\d\d .*\$</i>"
+    # we try to capture even when not properly worded (same below)
+    # offending modules: grep -IrnE '\$Date: ' | grep -v "Last changed:"
+    year_capture = r"<p>\s*<(i|em)>(Last changed: )?\$Date: ([\d]+)-\d\d-\d\d .*\$</(i|em)>"
     match = re.search(year_capture, text,
-                      re.MULTILINE | re.DOTALL)
+                      re.MULTILINE | re.DOTALL | re.IGNORECASE)
     if match:
-        return int(match.group(1))
+        return int(match.group(3))
     else:
         # TODO: raise or fatal? should be in library or module?
         raise RuntimeError("The text does not contain date entry")
@@ -159,13 +166,24 @@
     >>> pprint(get_authors_from_documentation(text))
     [{'feature': None, 'institute': None, 'name': 'Paul Kelly', 'orcid': None}]
     """
-    raw_author_capture = "<h2>.*AUTHOR.*</h2>(.*)<p>\s*<i>Last changed:"
+    # Some section names are singular, some plural.
+    # Additional tags can appear in the heading compiled documentation.
+    # TODO: ...or attributes
+    # HTML tags or section name can theoretically be different case.
+    # The "last changed" part might be missing.
+    # The i and em could be exchanged.
+    author_section_capture = r"<h2>.*AUTHOR.*</h2>(.*)<p>\s*<(i|em)>(Last changed:|\$Date:)"
 
+    match = re.search(author_section_capture, text,
+                      re.MULTILINE | re.DOTALL | re.IGNORECASE)
+    if match:
+        author_section = match.group(1)
+    else:
+        raise RuntimeError(_("Unable to find Authors section"))
+
     raw_author_lines = [
         line.strip()
-        for line in re.search(raw_author_capture, text,
-                              re.MULTILINE | re.DOTALL)
-            .group(1).strip().splitlines()
+        for line in author_section.strip().splitlines()
         if line.strip()
     ]
 
@@ -295,7 +313,15 @@
     output_format = options['format']
 
     for name in names:
-        citation = citation_for_module(name)
+        try:
+            citation = citation_for_module(name)
+        except RuntimeError as error:
+            message = _("Module {name}: {error}".format(**locals()))
+            if flags['s']:
+                gs.warning(message)
+                continue
+            else:
+                gs.fatal(message)
         print_citation(citation, output_format)
 
 # TODO: consider "Extended by" versus original authors



More information about the grass-commit mailing list