[GRASS-SVN] r72925 - sandbox/wenzeslaus/g.citation
svn_grass at osgeo.org
svn_grass at osgeo.org
Thu Jun 28 18:32:05 PDT 2018
Author: wenzeslaus
Date: 2018-06-28 18:32:05 -0700 (Thu, 28 Jun 2018)
New Revision: 72925
Modified:
sandbox/wenzeslaus/g.citation/g.citation.py
Log:
g.citation: split more complex names into the CFF style
Modified: sandbox/wenzeslaus/g.citation/g.citation.py
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.py 2018-06-29 01:17:23 UTC (rev 72924)
+++ sandbox/wenzeslaus/g.citation/g.citation.py 2018-06-29 01:32:05 UTC (rev 72925)
@@ -291,18 +291,100 @@
raise RuntimeError("The text does not contain source code URLs")
+# TODO: Jr. separated by comma
+def author_name_to_cff(text):
+ """
+
+ Currently, we mostly intend this code to help getting legacy records
+ from GRASS manual pages to a parseable format, so we really need to
+ address only the national naming styles common for GRASS in 80s-10s.
+ This practically means American (US) names and couple other styles.
+
+ >>> d = author_name_to_cff("Richard G. Lathrop Jr.")
+ >>> print(d['given'])
+ Richard G.
+ >>> print(d['family'])
+ Lathrop
+ >>> print(d['suffix'])
+ Jr.
+ >>> d = author_name_to_cff("Margherita Di Leo")
+ >>> print(d['given'])
+ Margherita
+ >>> print(d['family'])
+ Di Leo
+ """
+ particles = ["von", "van", "der", "di", "de"]
+ suffixes = ["jr", "jnr", "sr", "snr", "junior", "senior"]
+ roman = "IVX" # if you are 40th, we will fix it for you
+ def is_suffix(text):
+ text = text.lower()
+ for suffix in suffixes:
+ if text == suffix:
+ return True
+ elif len(suffix) <= 3 and text == suffix + ".":
+ return True
+ if text.isupper():
+ bool([char for char in text if char in roman])
+ return False
+ def is_middle_initial(text):
+ if text.isupper():
+ if len(text) == 2 and text.endswith('.'):
+ return True
+ elif len(text) == 1:
+ return True
+ return False
+ names = text.split(" ")
+ # given and family required by CFF 1.0.3
+ particle = None
+ suffix = None
+ if len(names) == 2:
+ given = names[0]
+ family = names[1]
+ elif len(names) == 3:
+ if is_middle_initial(names[1]):
+ given = " ".join([names[0], names[1]])
+ family = names[2]
+ elif names[1] in particles:
+ given = names[0]
+ particle = names[1]
+ family = names[2]
+ elif names[1][0].isupper() and names[1].lower() in particles:
+ # If particle starts with capital, it is often considered
+ # to be part of family name.
+ given = names[0]
+ family = " ".join([names[1], names[2]])
+ else:
+ raise NotImplementedError("Not sure if <{n}> is family or middle name in <{t}>".format(n=names[1], t=text))
+ elif len(names) == 4:
+ # assuming that if you have suffix, you have a middle name
+ if is_suffix(names[3]):
+ given = " ".join([names[0], names[1]])
+ family = names[2]
+ suffix = names[3]
+ else:
+ raise NotImplementedError("Not sure how to split <{}>".format(text))
+ else:
+ raise RuntimeError(_("Cannot split name <{}> correctly".format(text)))
+ return {'given': given, 'particle': particle, 'family': family,
+ 'suffix': suffix}
+
def print_cff(citation):
"""Create Citation File Format file from citation dictionary
- >>> print_cff({'module': 'g.tst', 'authors': [{'name': 'Joe Doe'}], 'year': 2011})
+ >>> authors = [{'name': 'Joe Doe', 'orcid': '0000-0000-0000-0000'}]
+ >>> cit = {'module': 'g.tst', 'authors': authors, 'year': 2011}
+ >>> cit.update({'grass-version': '7.4.1'})
+ >>> cit.update({'grass-build-date': '2018-06-07'})
+ >>> print_cff(cit)
cff-version: 1.0.3
message: "If you use this software, please cite it as below."
authors:
- family-names: Doe
given-names: Joe
+ orcid: 0000-0000-0000-0000
title: "GRASS GIS: g.tst module"
- version: TODO
- date-released: 2011-01-01
+ version: 7.4.1
+ date-released: 2018-06-07
license: GPL-2.0-or-later
"""
print("cff-version: 1.0.3")
@@ -314,22 +396,22 @@
# best shot should be taken for names which don't include family
# or given or which have different order
# here we just split based on first space into given and family
- given, family = author['name'].split(" ", 1)
- print(" - family-names:", family)
- print(" given-names:", given)
+ name = author_name_to_cff(author['name'])
+ print(" - family-names:", name['family'])
+ print(" given-names:", name['given'])
if author['orcid']:
print(" orcid:", author['orcid'])
print("title: \"GRASS GIS: ", citation['module'], " module\"", sep="")
- print("version:", "TODO")
+ print("version:", citation['grass-version'])
# CFF 1.0.3 does not say expplicitely except for Date (so not any
# string), so assuming YAML timestamp
# (http://yaml.org/type/timestamp.html)
# now we have only the year, so using Jan 1
- print("date-released: ", citation['year'], "-01-01", sep="")
+ print("date-released:", citation['grass-build-date'])
# license string according to https://spdx.org/licenses/
# we know license of GRASS modules should be GPL>=2
print("license: GPL-2.0-or-later")
- if citation['keywords']:
+ if citation.get('keywords', None):
print("keywords:")
for keyword in citation['keywords']:
print(" -", keyword)
More information about the grass-commit
mailing list