[GRASS-SVN] r72922 - sandbox/wenzeslaus/g.citation

svn_grass at osgeo.org svn_grass at osgeo.org
Thu Jun 28 18:01:20 PDT 2018


Author: wenzeslaus
Date: 2018-06-28 18:01:19 -0700 (Thu, 28 Jun 2018)
New Revision: 72922

Modified:
   sandbox/wenzeslaus/g.citation/g.citation.py
Log:
g.citation: parseORCIDs from URLs

Modified: sandbox/wenzeslaus/g.citation/g.citation.py
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.py	2018-06-29 00:57:51 UTC (rev 72921)
+++ sandbox/wenzeslaus/g.citation/g.citation.py	2018-06-29 01:01:19 UTC (rev 72922)
@@ -181,6 +181,40 @@
         # TODO: raise or fatal? should be in library or module?
         raise RuntimeError("The text does not contain date entry")
 
+def get_orcid(text):
+    """Get ORCID from text
+
+    Returns tuple (orcid, text_without_orcid)
+    Returns (None, text) if not found.
+
+    >>> # URL style
+    >>> print(get_orcid("http://orcid.org/0000-0000-0000-0000")[0])
+    0000-0000-0000-0000
+    >>> # ISBN style
+    >>> print(get_orcid("ORCID 0000-0000-0000-0000")[0])
+    0000-0000-0000-0000
+    >>> # URI style
+    >>> print(get_orcid("orcid:0000-0000-0000-0000")[0])
+    0000-0000-0000-0000
+    >>> # no ORCID
+    >>> print(get_orcid("orcid: No ORCID here, no here: orcid.org.")[0])
+    None
+    """
+    orcid = None
+    # ORCID as text
+    orcid_re = re.compile(r"\(?ORCID:? ?([0-9-]+)\)?", re.IGNORECASE)
+    match = re.search(orcid_re, text)
+    if match:
+        orcid = match.group(1)
+    else:
+        # ORCID as URL
+        orcid_re = re.compile(r"https?://orcid.org/([0-9-]+)", re.IGNORECASE)
+        match = re.search(orcid_re, text)
+        if match:
+            orcid = match.group(1)
+    text = re.sub(orcid_re, "", text)
+    return (orcid, text)
+
 def get_authors_from_documentation(text):
     r"""Extract authors and associated info from documentation
 
@@ -218,12 +252,8 @@
             continue
         institute = None
         feature = None
-        orcid = None
-        orcid_re = re.compile(r"\(?ORCID:? ?([0-9-]+)\)?", re.IGNORECASE)
-        match = re.search(orcid_re, line)
-        if match:
-            orcid = match.group(1)
-        line = re.sub(orcid_re, "", line)
+        
+        orcid, text = get_orcid(text)
         ai = line.split(",", 1)
         name = clean_line_item(ai[0])
         if len(ai) == 2:



More information about the grass-commit mailing list