[GRASS-SVN] r72925 - sandbox/wenzeslaus/g.citation

svn_grass at osgeo.org svn_grass at osgeo.org
Thu Jun 28 18:32:05 PDT 2018


Author: wenzeslaus
Date: 2018-06-28 18:32:05 -0700 (Thu, 28 Jun 2018)
New Revision: 72925

Modified:
   sandbox/wenzeslaus/g.citation/g.citation.py
Log:
g.citation: split more complex names into the CFF style

Modified: sandbox/wenzeslaus/g.citation/g.citation.py
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.py	2018-06-29 01:17:23 UTC (rev 72924)
+++ sandbox/wenzeslaus/g.citation/g.citation.py	2018-06-29 01:32:05 UTC (rev 72925)
@@ -291,18 +291,100 @@
         raise RuntimeError("The text does not contain source code URLs")
 
 
+# TODO: Jr. separated by comma
+def author_name_to_cff(text):
+    """
+
+    Currently, we mostly intend this code to help getting legacy records
+    from GRASS manual pages to a parseable format, so we really need to
+    address only the national naming styles common for GRASS in 80s-10s.
+    This practically means American (US) names and couple other styles.
+
+    >>> d = author_name_to_cff("Richard G. Lathrop Jr.")
+    >>> print(d['given'])
+    Richard G.
+    >>> print(d['family'])
+    Lathrop
+    >>> print(d['suffix'])
+    Jr.
+    >>> d = author_name_to_cff("Margherita Di Leo")
+    >>> print(d['given'])
+    Margherita
+    >>> print(d['family'])
+    Di Leo
+    """
+    particles = ["von", "van", "der", "di", "de"]
+    suffixes = ["jr", "jnr", "sr", "snr", "junior", "senior"]
+    roman = "IVX"  # if you are 40th, we will fix it for you
+    def is_suffix(text):
+        text = text.lower()
+        for suffix in suffixes:
+            if text == suffix:
+                return True
+            elif len(suffix) <= 3 and text == suffix + ".":
+                return True
+        if text.isupper():
+            bool([char for char in text if char in roman])
+        return False
+    def is_middle_initial(text):
+        if text.isupper():
+            if  len(text) == 2 and text.endswith('.'):
+                return True
+            elif len(text) == 1:
+                return True
+        return False
+    names = text.split(" ")
+    # given and family required by CFF 1.0.3
+    particle = None
+    suffix = None
+    if len(names) == 2:
+        given = names[0]
+        family = names[1]
+    elif len(names) == 3:
+        if is_middle_initial(names[1]):
+            given = " ".join([names[0], names[1]])
+            family = names[2]
+        elif names[1] in particles:
+            given = names[0]
+            particle = names[1]
+            family = names[2]
+        elif names[1][0].isupper() and names[1].lower() in particles:
+            # If particle starts with capital, it is often considered
+            # to be part of family name.
+            given = names[0]
+            family = " ".join([names[1], names[2]])
+        else:
+            raise NotImplementedError("Not sure if <{n}> is family or middle name in <{t}>".format(n=names[1], t=text))
+    elif len(names) == 4:
+        # assuming that if you have suffix, you have a middle name
+        if is_suffix(names[3]):
+            given = " ".join([names[0], names[1]])
+            family = names[2]
+            suffix = names[3]
+        else:
+            raise NotImplementedError("Not sure how to split <{}>".format(text))
+    else:
+        raise RuntimeError(_("Cannot split name <{}> correctly".format(text)))
+    return {'given': given, 'particle': particle, 'family': family,
+            'suffix': suffix}
+
 def print_cff(citation):
     """Create Citation File Format file from citation dictionary
 
-    >>> print_cff({'module': 'g.tst', 'authors': [{'name': 'Joe Doe'}], 'year': 2011})
+    >>> authors = [{'name': 'Joe Doe', 'orcid': '0000-0000-0000-0000'}]
+    >>> cit = {'module': 'g.tst', 'authors': authors, 'year': 2011}
+    >>> cit.update({'grass-version': '7.4.1'})
+    >>> cit.update({'grass-build-date': '2018-06-07'})
+    >>> print_cff(cit)
     cff-version: 1.0.3
     message: "If you use this software, please cite it as below."
     authors:
       - family-names: Doe
         given-names: Joe
+        orcid: 0000-0000-0000-0000
     title: "GRASS GIS: g.tst module"
-    version: TODO
-    date-released: 2011-01-01
+    version: 7.4.1
+    date-released: 2018-06-07
     license: GPL-2.0-or-later
     """
     print("cff-version: 1.0.3")
@@ -314,22 +396,22 @@
         # best shot should be taken for names which don't include family
         # or given or which have different order
         # here we just split based on first space into given and family
-        given, family = author['name'].split(" ", 1)
-        print("  - family-names:", family)
-        print("    given-names:", given)
+        name = author_name_to_cff(author['name'])
+        print("  - family-names:", name['family'])
+        print("    given-names:", name['given'])
         if author['orcid']:
             print("    orcid:", author['orcid'])
     print("title: \"GRASS GIS: ", citation['module'], " module\"", sep="")
-    print("version:", "TODO")
+    print("version:", citation['grass-version'])
     # CFF 1.0.3 does not say expplicitely except for Date (so not any
     # string), so assuming YAML timestamp
     # (http://yaml.org/type/timestamp.html)
     # now we have only the year, so using Jan 1
-    print("date-released: ", citation['year'], "-01-01", sep="")
+    print("date-released:", citation['grass-build-date'])
     # license string according to https://spdx.org/licenses/
     # we know license of GRASS modules should be GPL>=2
     print("license: GPL-2.0-or-later")
-    if citation['keywords']:
+    if citation.get('keywords', None):
         print("keywords:")
         for keyword in citation['keywords']:
             print("  -", keyword)



More information about the grass-commit mailing list