[GRASS-SVN] r72748 - sandbox/wenzeslaus/g.citation

svn_grass at osgeo.org svn_grass at osgeo.org
Mon May 28 13:54:40 PDT 2018


Author: wenzeslaus
Date: 2018-05-28 13:54:40 -0700 (Mon, 28 May 2018)
New Revision: 72748

Modified:
   sandbox/wenzeslaus/g.citation/g.citation.py
Log:
g.citation: parse ORCID, year, desc for options, code structure

Modified: sandbox/wenzeslaus/g.citation/g.citation.py
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.py	2018-05-28 19:52:30 UTC (rev 72747)
+++ sandbox/wenzeslaus/g.citation/g.citation.py	2018-05-28 20:54:40 UTC (rev 72748)
@@ -6,7 +6,7 @@
 #
 # AUTHOR(S):    Vaclav Petras <wenzeslaus AT gmail DOT com> (ORCID: 0000-0001-5566-9236)
 #               Peter Loewe <ploewe AT osgeo DOT org> (ORCID: 0000-0003-2257-0517)
-#               Markus Neteler <mneteler AT osgeo DOT org> (ORCID:0000-0003-1916-1966)
+#               Markus Neteler <mneteler AT osgeo DOT org> (ORCID: 0000-0003-1916-1966)
 #
 # PURPOSE:      Provide scientific citation for GRASS modules and add-ons.
 #
@@ -34,10 +34,11 @@
 #%end
 
 #%option
-#% key: style
+#% key: format
 #% type: string
-#% description: Citation style
-#% options: bibtex,csl,datacite,dublincore,json,json-ld,narcxml
+#% description: Citation format or style
+#% options: bibtex,csl,datacite,dublincore,json,json-ld,narcxml,plain,dict
+#% descriptions: bibtex;BibTeX;csl;cls;datacite;datacite;dublincore;dublincore;json;json;json-ld;json-ld;narcxml;narcxml;plain;Plain text;dict;Pretty printed Python dictionary
 #% answer: bibtex
 #% required: yes
 #%end
@@ -103,11 +104,25 @@
     out = []
     for line in lines:
         line = re.sub("<br.?>", "", line)
-        line = re.sub("</?[a-z]+ ?.+>", "", line)
+        line = re.sub("</?[a-z]+ ?[^>]*>", "", line)
         out.append(line)
     return out
 
 
+def clean_line_item(text):
+    text = text.strip()
+    text = re.sub(r"^, *", "", text)
+    text = re.sub(r",$", "", text)
+    return text
+
+
+def get_year_from_documentation(text):
+    year_capture = r"<p>\s*<i>Last changed: \$Date: ([\d]+)-\d\d-\d\d .*\$</i>"
+    match = re.search(year_capture, text,
+                      re.MULTILINE | re.DOTALL)
+    if match:
+        return int(match.group(1))
+
 def get_authors_from_documentation(text):
     raw_author_capture = "<h2>.*AUTHOR.*</h2>(.*)<p>\s*<i>Last changed:"
 
@@ -114,7 +129,7 @@
     raw_author_lines = [
         line.strip()
         for line in re.search(raw_author_capture, text,
-                              flags=re.MULTILINE | re.DOTALL)
+                              re.MULTILINE | re.DOTALL)
             .group(1).strip().splitlines()
         if line.strip()
     ]
@@ -126,20 +141,26 @@
     for line in raw_author_lines:
         if not line:
             continue
-        ai = line.split(",", 1)
-        name = ai[0].strip()
         institute = None
         feature = None
+        orcid = None
+        orcid_re = re.compile(r"\(?ORCID:? ?([0-9-]+)\)?", re.IGNORECASE)
+        match = re.search(orcid_re, line)
+        if match:
+            orcid = match.group(1)
+        line = re.sub(orcid_re, "", line)
+        ai = line.split(",", 1)
+        name = clean_line_item(ai[0])
         if len(ai) == 2:
-            institute = ai[1].strip()
+            institute = clean_line_item(ai[1])
         if " by " in name:
             feature, name = name.split(" by ", 1)
         authors.append({'name': name, 'institute': institute,
-                        'feature': feature})
+                        'feature': feature, 'orcid': orcid})
     return authors
 
 
-def write_bibtex(citation):
+def print_bibtex(citation):
     print("@software{", citation['module'], ",", sep="")
 
     print("title={", "GRASS GIS: ", citation['module'], " module},", sep="")
@@ -146,10 +167,38 @@
 
     author_names = [author['name'] for author in citation['authors']]
     print("author={", " and ".join(author_names), "},", sep="")
+    print("year={", citation['year'], "},", sep="")
 
     print("}")
 
 
+def print_plain(citation):
+    print("GRASSS GIS module", citation['module'])
+    num_authors = len(citation['authors'])
+    authors_text = ""
+    for i, author in enumerate(citation['authors']):
+        author_name = [ ]
+        authors_text += author['name']
+        if author['institute']:
+            authors_text += ", {institute}".format(**author)
+        if author['feature']:
+            authors_text += " ({feature})".format(**author)
+        if i < num_authors - 1:
+            authors_text += "\n"
+    print(authors_text)
+
+
+def print_citation(citation, output_format):
+    if output_format == 'bibtex':
+        print_bibtex(citation)
+    elif output_format == 'plain':
+        print_plain(citation)
+    elif output_format == 'dict':
+        pprint(citation)
+    else:
+        raise RuntimeError(_("Unsupported format or style"))
+
+
 def citation_for_module(name):
     path = documentation_filename(name)
 
@@ -158,11 +207,11 @@
 
     text = open(path).read()
 
-    authors = get_authors_from_documentation(text)
+    citation = {}
+    citation['module'] = name
+    citation['authors'] = get_authors_from_documentation(text)
+    citation['year'] = get_year_from_documentation(text)
 
-    citation = {}
-    citation['module'] =  name
-    citation['authors'] = authors
     return citation
 
 
@@ -169,15 +218,13 @@
 def main(options, flags):
 
     name = options['module']
+    output_format = options['format']
 
     citation = citation_for_module(name)
 
-    if options['style'] == 'bibtex':
-        write_bibtex(citation)
+    print_citation(citation, output_format)
 
-    pprint(citation)
 
-
 # TODO: consider "Extended by" versus original authors
 
 # LASTCHANGED, COPYRIGHT-YEARS, COPRIGHT-ENTITY



More information about the grass-commit mailing list