[GRASS-SVN] r72744 - sandbox/wenzeslaus/g.citation

svn_grass at osgeo.org svn_grass at osgeo.org
Mon May 28 10:15:19 PDT 2018


Author: wenzeslaus
Date: 2018-05-28 10:15:19 -0700 (Mon, 28 May 2018)
New Revision: 72744

Added:
   sandbox/wenzeslaus/g.citation/g.citation.py
Removed:
   sandbox/wenzeslaus/g.citation/g.citation.sh
Log:
g.citation: complete rewrite to Python with parsing and output

Suggestion of code structure, basic clean up/parsing of names and institute,
removal of HTML tags, draft of BibTeX output.


Copied: sandbox/wenzeslaus/g.citation/g.citation.py (from rev 72743, sandbox/wenzeslaus/g.citation/g.citation.sh)
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.py	                        (rev 0)
+++ sandbox/wenzeslaus/g.citation/g.citation.py	2018-05-28 17:15:19 UTC (rev 72744)
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+
+############################################################################
+#
+# MODULE:       g.citation
+#
+# AUTHOR(S):    Vaclav Petras <wenzeslaus AT gmail DOT com> (ORCID: 0000-0001-5566-9236)
+#               Peter Loewe <ploewe AT osgeo DOT org> (ORCID: 0000-0003-2257-0517)
+#               Markus Neteler <mneteler AT osgeo DOT org> (ORCID:0000-0003-1916-1966)
+#
+# PURPOSE:      Provide scientific citation for GRASS modules and add-ons.
+#
+# COPYRIGHT:    (C) 2018 by Vaclav Petras and the GRASS Development team
+#
+#               This program is free software under the GNU General Public
+#               License (>=v2). Read the file COPYING that comes with GRASS
+#               for details.
+#
+#############################################################################
+
+#%module
+#% description: Provide scientific citation for GRASS modules and add-ons.
+#% keyword: general
+#% keyword: metadata
+#% keyword: citation
+#%end
+
+#%option
+#% key: module
+#% type: string
+#% description: GRASS module to be cited
+#% required: yes
+#% multiple: no
+#%end
+
+#%option
+#% key: style
+#% type: string
+#% description: Citation style
+#% options: bibtex,csl,datacite,dublincore,json,json-ld,narcxml
+#% answer: bibtex
+#% required: yes
+#%end
+
+#%option G_OPT_F_INPUT
+#% key: output
+#% type: string
+#% description: Path of the output file
+#% required: no
+#%end
+
+# TODO: if output is provided, write to ascii file
+# (otherwise print to command line)
+# TODO: Find lhmpom-equivalent in GRASS repository
+
+# x=$(wget -0 - 'http:/foo/g.region.html')
+
+# Which GRASS version is currently used ?
+# What Libraries, etc ?
+# g.version -erg
+
+from __future__ import print_function
+
+import os
+import re
+
+from pprint import pprint
+
+import grass.script as gs
+
+
+# TODO: copied from g.manual, possibly move to library
+# (lib has also online ones)
+def documentation_filename(entry):
+    """Get the local path of HTML documentation
+
+    Calls fatal when page is not found.
+    """
+    gisbase = os.environ['GISBASE']
+    path = os.path.join(gisbase, 'docs', 'html', entry + '.html')
+    if not os.path.exists(path) and os.getenv('GRASS_ADDON_BASE'):
+        path = os.path.join(
+            os.getenv('GRASS_ADDON_BASE'), 'docs', 'html',
+            entry + '.html')
+
+    if not os.path.exists(path):
+        gs.fatal(_("No HTML manual page entry for '%s'") % entry)
+
+    return path
+
+
+def remove_non_author_lines(lines):
+    out = []
+    for line in lines:
+        if "©" in line:
+            pass
+        else:
+            out.append(line)
+    return out
+
+
+def remove_html_tags(lines):
+    out = []
+    for line in lines:
+        line = re.sub("<br.?>", "", line)
+        line = re.sub("</?[a-z]+ ?.+>", "", line)
+        out.append(line)
+    return out
+
+
+def get_authors_from_documentation(text):
+    raw_author_capture = "<h2>.*AUTHOR.*</h2>(.*)<p>\s*<i>Last changed:"
+
+    raw_author_lines = [
+        line.strip()
+        for line in re.search(raw_author_capture, text,
+                              flags=re.MULTILINE | re.DOTALL)
+            .group(1).strip().splitlines()
+        if line.strip()
+    ]
+
+    raw_author_lines = remove_non_author_lines(raw_author_lines)
+    raw_author_lines = remove_html_tags(raw_author_lines)
+
+    authors = []
+    for line in raw_author_lines:
+        if not line:
+            continue
+        ai = line.split(",", 1)
+        name = ai[0].strip()
+        institute = None
+        feature = None
+        if len(ai) == 2:
+            institute = ai[1].strip()
+        if " by " in name:
+            feature, name = name.split(" by ", 1)
+        authors.append({'name': name, 'institute': institute,
+                        'feature': feature})
+    return authors
+
+
+def write_bibtex(citation):
+    print("@software{", citation['module'], ",", sep="")
+
+    print("title={", "GRASS GIS: ", citation['module'], " module},", sep="")
+
+    author_names = [author['name'] for author in citation['authors']]
+    print("author={", " and ".join(author_names), "},", sep="")
+
+    print("}")
+
+
+def citation_for_module(name):
+    path = documentation_filename(name)
+
+    # derive core strings from lhmpom:
+    # NAME / AUTHOR / LAST CHANGED / COPYRIGHT: Years + Entity
+
+    text = open(path).read()
+
+    authors = get_authors_from_documentation(text)
+
+    citation = {}
+    citation['module'] =  name
+    citation['authors'] = authors
+    return citation
+
+
+def main(options, flags):
+
+    name = options['module']
+
+    citation = citation_for_module(name)
+
+    if options['style'] == 'bibtex':
+        write_bibtex(citation)
+
+    pprint(citation)
+
+
+# TODO: consider "Extended by" versus original authors
+
+# LASTCHANGED, COPYRIGHT-YEARS, COPRIGHT-ENTITY
+
+# LEFTOVERS:
+
+# A BibTeX entry for LaTeX users is:
+#
+# @Manual{GRASS_GIS_software,
+#  title = {Geographic Resources Analysis Support System (GRASS) Software},
+#  author = {{GRASS Development Team}},
+#  organization = {Open Source Geospatial Foundation},
+#  address = {USA},
+#  year = {YEAR},
+#  url = {http://grass.osgeo.org},
+# }
+
+if __name__ == '__main__':
+    main(*gs.parser())

Deleted: sandbox/wenzeslaus/g.citation/g.citation.sh
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.sh	2018-05-28 09:24:28 UTC (rev 72743)
+++ sandbox/wenzeslaus/g.citation/g.citation.sh	2018-05-28 17:15:19 UTC (rev 72744)
@@ -1,283 +0,0 @@
-#!/bin/sh
-
-############################################################################
-#
-# MODULE:       g.citation
-# AUTHOR(S):    Peter Loewe <ploewe AT osgeo DOT org> (ORCID: 0000-0003-2257-0517), Markus Neteler <mneteler AT osgeo DOT org> (ORCID:0000-0003-1916-1966)
-# PURPOSE:      Provide scientific citation for GRASS modules and add-ons. 
-#             
-# COPYRIGHT:    (C) 2018 by Peter Loewe (pl), Markus Neteler, and the GRASS Development team
-#
-#               This program is free software under the GNU General Public
-#               License (>=v2). Read the file COPYING that comes with GRASS
-#               for details.
-#
-#############################################################################
-#last edit: 20180104 pl
-
-
-#%Module
-#%  description: Provide scientific citation for GRASS modules and add-ons. THIS MODULE IS STILL UNDER DEVELOPMENT!   
-#%End
-
-
-#%option
-#% key: module
-#% type: string
-#% description: GRASS module to be cited
-#% required : yes
-#% multiple: no
-#%end
-
-#%option
-#% key: style
-#% type: string
-#% description: citation style
-#% options: bibtex,csl,datacite,dublincore,json,json-ld,narcxml
-#% answer: bibtex
-#% required : yes
-#%end
-###^^^ Default to BibTex !
-
-#%option
-#% key: output
-#% type: string
-#% gisprompt: new_file,file,file
-#% description: name/path of output image file
-#% required : no
-#%end
-###^^^ if output is provided, write to ascii file, otherwise print to command line
-
-
-# TMPDIR=`g.gisenv get="GISDBASE"`
-# #TMPDIR is used to create the inner temporary latlon location
-
-export GIS_LOCK=$$
-
-###
-### Initialize
-###
-#Error Routine
-
-error_routine () {
-echo "ERROR: $1"
-exit 1
-}
-
-#################################
-# is GRASS running ? if not: abort
-#################################
-
-if [ -z "$GISBASE" ] ; then
-  error_routine "You must be in GRASS to run this program."
-fi
-#################################
-
-#################################
-if [ "$1" != "@ARGS_PARSED@" ] ; then
-  exec $GISBASE/bin/g.parser "$0" "$@"
-fi
-#################################
-
-
-#################################
-
-###
-### 
-###
-
-# Find out, if / how many modules are provided by the user.
-
-
-if [ -z "$GIS_OPT_MODULE" ] ; then
-     error_routine "No module provided"    
-fi
-
-## Code snippet to deal with multiple modules (later): 
-# MAPS_LIST=`echo $GIS_OPT_INPUT | sed 's/,/ /g'`
-# 
-# MAPS_ARRAY=($(echo $MAPS_LIST | tr '\n' ' '))
-# MAPS_COUNT=${#MAPS_ARRAY[*]}
-# 
-# # echo "Zahl: $MAPS_COUNT"
-##
-
-###
-
-## TBD: Find lhmpom-equivalent in GRASS repository
-
-# x=$(wget -0 - 'http:/foo/g.region.html')
-
-###
-
-## Which GRASS version is currently used ?
-## What Libraries, etc ?
-
-#g.version -erg
-
-#version=7.2.0
-#date=2016
-#revision=exported
-#build_date=2016-12-28
-#build_platform=i686-pc-linux-gnu
-#build_off_t_size=8
-#libgis_revision=68908 
-#libgis_date="2016-07-09 20:12:57 +0200 (Sat, 09 Jul 2016) "
-#proj4=4.9.3
-#gdal=2.1.2
-#geos=3.5.1
-#sqlite=3.15.2
-
-
-#g.gisenv
-#MAPSET=user1
-#GISDBASE=/home/loewe/Projekte/locations
-#LOCATION_NAME=spearfish60_grass7
-#GUI=text
-#PID=1814
-
-
-###
-# Define the local directory of HTML man pages
-
-HTMLDIR="$GISBASE/docs/html"
-MODULEMANLOCAL="$HTMLDIR/$GIS_OPT_MODULE.html"
-
-### 
-# Verfiy that a man page exists for the module. Abort otherwise.
-if [ -e $MODULEMANLOCAL ]
-then
- echo 
- echo "OK: HTML for $GIS_OPT_MODULE available"
-else
- error_routine "No documenation for module $GIS_OPT_MODULE available"
-fi
-
-###
-
-## derive core strings from lhmpom: NAME / AUTHOR / LAST CHANGED / COPYRIGHT: Years + Entity
-
-#######################################
-#######################################
-#######################################
-### FUNCTIONS
-#######################################
-#######################################
-#######################################
-
-echo "function definition begins"
-######################################
-## Function to extract content from an existing HTML file
-## Example: function_extract_textfragment  
-### AUTHOR_STARTSTRING="<h2><a name=\"author\">AUTHOR</a></h2>"
-### AUTHOR_ENDSTRING="<p><i>Last changed:"
-### $MODULEMANLOCAL defines path to existing HTML document
-### function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $$MODULEMANLOCAL
-function_extract_textfragment()
-{
-STARTSTRING=$1
-ENDSTRING=$2
-SOURCE=$3
-echo
-echo "___________________________________"
-echo "START:function_extract_textfragment"
-
-STARTLINE=$(grep $STARTSTRING $SOURCE)
-ENDLINE=$(grep $ENDSTRING $SOURCE)
-
-echo " START: $STARTLINE"
-echo " STOP: $ENDLINE"
-#echo
-TEXTFRAGMENT_RAW=$(grep -A50 $STARTSTRING $SOURCE | grep -B50 $ENDSTRING | egrep -v "$STARTSTRING|$ENDSTRING")
-echo "RESULT: $TEXTFRAGMENT_RAW"
-#echo
-#remove all newlines, etc from string:
-echo "..."
-TEXTFRAGMENT=${TEXTFRAGMENT_RAW//[$'\t\n\r']}
-echo $TEXTFRAGMENT
-echo "---"
-#echo
-echo "STOP:function_extract_textfragment"
-echo "------------------------"
-
-echo
-}
-
-echo "function definition ends"
-echo
-
-
-############################
-
-#NAME
-
-#AUTHORS
-
-#AUTHORSTRING_RAW=`grep -A20 "$AUTHOR_STARTSTRING"  $MODULEMANLOCAL | grep -B20 "$AUTHOR_ENDSTRING" | egrep -v "$AUTHOR_STARTSTRING|$AUTHOR_ENDSTRING" `
-#echo $AUTHORSTRING_RAW
-#echo
-
-#AUTHOR_STARTSTRING="<h2><a name=\"author\">AUTHOR</a></h2>"
-AUTHOR_STARTSTRING=">AUTHOR</a></h2>"
-#AUTHOR_ENDSTRING="<p><i>Last changed:"
-AUTHOR_ENDSTRING="<p><i>Last"
-# !!! Hier Problem mit der Übergabe der Quotierung bei Leer- umnd Sonderzeichen !!!
-
-#echo "function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $MODULEMANLOCAL"
-#function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $MODULEMANLOCAL
-
-AUTHORSTRING=`function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $MODULEMANLOCAL`
-
-echo "MAIN TEXTFRAGMENT: $TEXTFRAGMENT"
-echo "***"
-
-echo "MAIN AUTHORSTRING $AUTHORSTRING"
-AUTHOR_FULLNAME=`echo $AUTHORSTRING_RAW| cut -d, -f1`
-AUTHOR_AFFILIATION=`echo $AUTHORSTRING_RAW| cut -d, -f2` 
-echo
-echo "Author NAME: $AUTHOR_FULLNAME"
-echo "Author AFFILIATION $AUTHOR_AFFILIATION"
-
-#very naive !
-# consider multiple authors: LINEBREAK as seperator
-# consider additional authors: "Extended by"
-
-#LASTCHANGED
-
-#COPYRIGHT-YEARS
-
-#COPRIGHT-ENTITY
-
-
-###
-
-echo
-echo
-echo "-COMPLETED-"
-###
-
-##IF output file is stated: print to file ELSE print to command line
-
-########################################
-#cleanup: remove location & files
-
-#rm $TMPDIR/.grassrc6_$TEMP_LOCATION
-#^^^activate if we have started ealrier a write to filesystem for whatever reasons
-
-
-########################################
-#That's all, folks.
-
-
-###########LEFTOVERS_
-
-#A BibTeX entry for LaTeX users is:
-#
-#@Manual{GRASS_GIS_software,
-#  title = {Geographic Resources Analysis Support System (GRASS) Software},
-#  author = {{GRASS Development Team}},
-#  organization = {Open Source Geospatial Foundation},
-#  address = {USA},
-#  year = {YEAR},
-#  url = {http://grass.osgeo.org},
-#}



More information about the grass-commit mailing list