[GRASS-SVN] r72744 - sandbox/wenzeslaus/g.citation
svn_grass at osgeo.org
svn_grass at osgeo.org
Mon May 28 10:15:19 PDT 2018
Author: wenzeslaus
Date: 2018-05-28 10:15:19 -0700 (Mon, 28 May 2018)
New Revision: 72744
Added:
sandbox/wenzeslaus/g.citation/g.citation.py
Removed:
sandbox/wenzeslaus/g.citation/g.citation.sh
Log:
g.citation: complete rewrite to Python with parsing and output
Suggestion of code structure, basic clean up/parsing of names and institute,
removal of HTML tags, draft of BibTeX output.
Copied: sandbox/wenzeslaus/g.citation/g.citation.py (from rev 72743, sandbox/wenzeslaus/g.citation/g.citation.sh)
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.py (rev 0)
+++ sandbox/wenzeslaus/g.citation/g.citation.py 2018-05-28 17:15:19 UTC (rev 72744)
@@ -0,0 +1,199 @@
+#!/usr/bin/env python
+
+############################################################################
+#
+# MODULE: g.citation
+#
+# AUTHOR(S): Vaclav Petras <wenzeslaus AT gmail DOT com> (ORCID: 0000-0001-5566-9236)
+# Peter Loewe <ploewe AT osgeo DOT org> (ORCID: 0000-0003-2257-0517)
+# Markus Neteler <mneteler AT osgeo DOT org> (ORCID:0000-0003-1916-1966)
+#
+# PURPOSE: Provide scientific citation for GRASS modules and add-ons.
+#
+# COPYRIGHT: (C) 2018 by Vaclav Petras and the GRASS Development team
+#
+# This program is free software under the GNU General Public
+# License (>=v2). Read the file COPYING that comes with GRASS
+# for details.
+#
+#############################################################################
+
+#%module
+#% description: Provide scientific citation for GRASS modules and add-ons.
+#% keyword: general
+#% keyword: metadata
+#% keyword: citation
+#%end
+
+#%option
+#% key: module
+#% type: string
+#% description: GRASS module to be cited
+#% required: yes
+#% multiple: no
+#%end
+
+#%option
+#% key: style
+#% type: string
+#% description: Citation style
+#% options: bibtex,csl,datacite,dublincore,json,json-ld,narcxml
+#% answer: bibtex
+#% required: yes
+#%end
+
+#%option G_OPT_F_INPUT
+#% key: output
+#% type: string
+#% description: Path of the output file
+#% required: no
+#%end
+
+# TODO: if output is provided, write to ascii file
+# (otherwise print to command line)
+# TODO: Find lhmpom-equivalent in GRASS repository
+
+# x=$(wget -0 - 'http:/foo/g.region.html')
+
+# Which GRASS version is currently used ?
+# What Libraries, etc ?
+# g.version -erg
+
+from __future__ import print_function
+
+import os
+import re
+
+from pprint import pprint
+
+import grass.script as gs
+
+
+# TODO: copied from g.manual, possibly move to library
+# (lib has also online ones)
+def documentation_filename(entry):
+ """Get the local path of HTML documentation
+
+ Calls fatal when page is not found.
+ """
+ gisbase = os.environ['GISBASE']
+ path = os.path.join(gisbase, 'docs', 'html', entry + '.html')
+ if not os.path.exists(path) and os.getenv('GRASS_ADDON_BASE'):
+ path = os.path.join(
+ os.getenv('GRASS_ADDON_BASE'), 'docs', 'html',
+ entry + '.html')
+
+ if not os.path.exists(path):
+ gs.fatal(_("No HTML manual page entry for '%s'") % entry)
+
+ return path
+
+
+def remove_non_author_lines(lines):
+ out = []
+ for line in lines:
+ if "©" in line:
+ pass
+ else:
+ out.append(line)
+ return out
+
+
+def remove_html_tags(lines):
+ out = []
+ for line in lines:
+ line = re.sub("<br.?>", "", line)
+ line = re.sub("</?[a-z]+ ?.+>", "", line)
+ out.append(line)
+ return out
+
+
+def get_authors_from_documentation(text):
+ raw_author_capture = "<h2>.*AUTHOR.*</h2>(.*)<p>\s*<i>Last changed:"
+
+ raw_author_lines = [
+ line.strip()
+ for line in re.search(raw_author_capture, text,
+ flags=re.MULTILINE | re.DOTALL)
+ .group(1).strip().splitlines()
+ if line.strip()
+ ]
+
+ raw_author_lines = remove_non_author_lines(raw_author_lines)
+ raw_author_lines = remove_html_tags(raw_author_lines)
+
+ authors = []
+ for line in raw_author_lines:
+ if not line:
+ continue
+ ai = line.split(",", 1)
+ name = ai[0].strip()
+ institute = None
+ feature = None
+ if len(ai) == 2:
+ institute = ai[1].strip()
+ if " by " in name:
+ feature, name = name.split(" by ", 1)
+ authors.append({'name': name, 'institute': institute,
+ 'feature': feature})
+ return authors
+
+
+def write_bibtex(citation):
+ print("@software{", citation['module'], ",", sep="")
+
+ print("title={", "GRASS GIS: ", citation['module'], " module},", sep="")
+
+ author_names = [author['name'] for author in citation['authors']]
+ print("author={", " and ".join(author_names), "},", sep="")
+
+ print("}")
+
+
+def citation_for_module(name):
+ path = documentation_filename(name)
+
+ # derive core strings from lhmpom:
+ # NAME / AUTHOR / LAST CHANGED / COPYRIGHT: Years + Entity
+
+ text = open(path).read()
+
+ authors = get_authors_from_documentation(text)
+
+ citation = {}
+ citation['module'] = name
+ citation['authors'] = authors
+ return citation
+
+
+def main(options, flags):
+
+ name = options['module']
+
+ citation = citation_for_module(name)
+
+ if options['style'] == 'bibtex':
+ write_bibtex(citation)
+
+ pprint(citation)
+
+
+# TODO: consider "Extended by" versus original authors
+
+# LASTCHANGED, COPYRIGHT-YEARS, COPRIGHT-ENTITY
+
+# LEFTOVERS:
+
+# A BibTeX entry for LaTeX users is:
+#
+# @Manual{GRASS_GIS_software,
+# title = {Geographic Resources Analysis Support System (GRASS) Software},
+# author = {{GRASS Development Team}},
+# organization = {Open Source Geospatial Foundation},
+# address = {USA},
+# year = {YEAR},
+# url = {http://grass.osgeo.org},
+# }
+
+if __name__ == '__main__':
+ main(*gs.parser())
Deleted: sandbox/wenzeslaus/g.citation/g.citation.sh
===================================================================
--- sandbox/wenzeslaus/g.citation/g.citation.sh 2018-05-28 09:24:28 UTC (rev 72743)
+++ sandbox/wenzeslaus/g.citation/g.citation.sh 2018-05-28 17:15:19 UTC (rev 72744)
@@ -1,283 +0,0 @@
-#!/bin/sh
-
-############################################################################
-#
-# MODULE: g.citation
-# AUTHOR(S): Peter Loewe <ploewe AT osgeo DOT org> (ORCID: 0000-0003-2257-0517), Markus Neteler <mneteler AT osgeo DOT org> (ORCID:0000-0003-1916-1966)
-# PURPOSE: Provide scientific citation for GRASS modules and add-ons.
-#
-# COPYRIGHT: (C) 2018 by Peter Loewe (pl), Markus Neteler, and the GRASS Development team
-#
-# This program is free software under the GNU General Public
-# License (>=v2). Read the file COPYING that comes with GRASS
-# for details.
-#
-#############################################################################
-#last edit: 20180104 pl
-
-
-#%Module
-#% description: Provide scientific citation for GRASS modules and add-ons. THIS MODULE IS STILL UNDER DEVELOPMENT!
-#%End
-
-
-#%option
-#% key: module
-#% type: string
-#% description: GRASS module to be cited
-#% required : yes
-#% multiple: no
-#%end
-
-#%option
-#% key: style
-#% type: string
-#% description: citation style
-#% options: bibtex,csl,datacite,dublincore,json,json-ld,narcxml
-#% answer: bibtex
-#% required : yes
-#%end
-###^^^ Default to BibTex !
-
-#%option
-#% key: output
-#% type: string
-#% gisprompt: new_file,file,file
-#% description: name/path of output image file
-#% required : no
-#%end
-###^^^ if output is provided, write to ascii file, otherwise print to command line
-
-
-# TMPDIR=`g.gisenv get="GISDBASE"`
-# #TMPDIR is used to create the inner temporary latlon location
-
-export GIS_LOCK=$$
-
-###
-### Initialize
-###
-#Error Routine
-
-error_routine () {
-echo "ERROR: $1"
-exit 1
-}
-
-#################################
-# is GRASS running ? if not: abort
-#################################
-
-if [ -z "$GISBASE" ] ; then
- error_routine "You must be in GRASS to run this program."
-fi
-#################################
-
-#################################
-if [ "$1" != "@ARGS_PARSED@" ] ; then
- exec $GISBASE/bin/g.parser "$0" "$@"
-fi
-#################################
-
-
-#################################
-
-###
-###
-###
-
-# Find out, if / how many modules are provided by the user.
-
-
-if [ -z "$GIS_OPT_MODULE" ] ; then
- error_routine "No module provided"
-fi
-
-## Code snippet to deal with multiple modules (later):
-# MAPS_LIST=`echo $GIS_OPT_INPUT | sed 's/,/ /g'`
-#
-# MAPS_ARRAY=($(echo $MAPS_LIST | tr '\n' ' '))
-# MAPS_COUNT=${#MAPS_ARRAY[*]}
-#
-# # echo "Zahl: $MAPS_COUNT"
-##
-
-###
-
-## TBD: Find lhmpom-equivalent in GRASS repository
-
-# x=$(wget -0 - 'http:/foo/g.region.html')
-
-###
-
-## Which GRASS version is currently used ?
-## What Libraries, etc ?
-
-#g.version -erg
-
-#version=7.2.0
-#date=2016
-#revision=exported
-#build_date=2016-12-28
-#build_platform=i686-pc-linux-gnu
-#build_off_t_size=8
-#libgis_revision=68908
-#libgis_date="2016-07-09 20:12:57 +0200 (Sat, 09 Jul 2016) "
-#proj4=4.9.3
-#gdal=2.1.2
-#geos=3.5.1
-#sqlite=3.15.2
-
-
-#g.gisenv
-#MAPSET=user1
-#GISDBASE=/home/loewe/Projekte/locations
-#LOCATION_NAME=spearfish60_grass7
-#GUI=text
-#PID=1814
-
-
-###
-# Define the local directory of HTML man pages
-
-HTMLDIR="$GISBASE/docs/html"
-MODULEMANLOCAL="$HTMLDIR/$GIS_OPT_MODULE.html"
-
-###
-# Verfiy that a man page exists for the module. Abort otherwise.
-if [ -e $MODULEMANLOCAL ]
-then
- echo
- echo "OK: HTML for $GIS_OPT_MODULE available"
-else
- error_routine "No documenation for module $GIS_OPT_MODULE available"
-fi
-
-###
-
-## derive core strings from lhmpom: NAME / AUTHOR / LAST CHANGED / COPYRIGHT: Years + Entity
-
-#######################################
-#######################################
-#######################################
-### FUNCTIONS
-#######################################
-#######################################
-#######################################
-
-echo "function definition begins"
-######################################
-## Function to extract content from an existing HTML file
-## Example: function_extract_textfragment
-### AUTHOR_STARTSTRING="<h2><a name=\"author\">AUTHOR</a></h2>"
-### AUTHOR_ENDSTRING="<p><i>Last changed:"
-### $MODULEMANLOCAL defines path to existing HTML document
-### function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $$MODULEMANLOCAL
-function_extract_textfragment()
-{
-STARTSTRING=$1
-ENDSTRING=$2
-SOURCE=$3
-echo
-echo "___________________________________"
-echo "START:function_extract_textfragment"
-
-STARTLINE=$(grep $STARTSTRING $SOURCE)
-ENDLINE=$(grep $ENDSTRING $SOURCE)
-
-echo " START: $STARTLINE"
-echo " STOP: $ENDLINE"
-#echo
-TEXTFRAGMENT_RAW=$(grep -A50 $STARTSTRING $SOURCE | grep -B50 $ENDSTRING | egrep -v "$STARTSTRING|$ENDSTRING")
-echo "RESULT: $TEXTFRAGMENT_RAW"
-#echo
-#remove all newlines, etc from string:
-echo "..."
-TEXTFRAGMENT=${TEXTFRAGMENT_RAW//[$'\t\n\r']}
-echo $TEXTFRAGMENT
-echo "---"
-#echo
-echo "STOP:function_extract_textfragment"
-echo "------------------------"
-
-echo
-}
-
-echo "function definition ends"
-echo
-
-
-############################
-
-#NAME
-
-#AUTHORS
-
-#AUTHORSTRING_RAW=`grep -A20 "$AUTHOR_STARTSTRING" $MODULEMANLOCAL | grep -B20 "$AUTHOR_ENDSTRING" | egrep -v "$AUTHOR_STARTSTRING|$AUTHOR_ENDSTRING" `
-#echo $AUTHORSTRING_RAW
-#echo
-
-#AUTHOR_STARTSTRING="<h2><a name=\"author\">AUTHOR</a></h2>"
-AUTHOR_STARTSTRING=">AUTHOR</a></h2>"
-#AUTHOR_ENDSTRING="<p><i>Last changed:"
-AUTHOR_ENDSTRING="<p><i>Last"
-# !!! Hier Problem mit der Übergabe der Quotierung bei Leer- umnd Sonderzeichen !!!
-
-#echo "function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $MODULEMANLOCAL"
-#function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $MODULEMANLOCAL
-
-AUTHORSTRING=`function_extract_textfragment $AUTHOR_STARTSTRING $AUTHOR_ENDSTRING $MODULEMANLOCAL`
-
-echo "MAIN TEXTFRAGMENT: $TEXTFRAGMENT"
-echo "***"
-
-echo "MAIN AUTHORSTRING $AUTHORSTRING"
-AUTHOR_FULLNAME=`echo $AUTHORSTRING_RAW| cut -d, -f1`
-AUTHOR_AFFILIATION=`echo $AUTHORSTRING_RAW| cut -d, -f2`
-echo
-echo "Author NAME: $AUTHOR_FULLNAME"
-echo "Author AFFILIATION $AUTHOR_AFFILIATION"
-
-#very naive !
-# consider multiple authors: LINEBREAK as seperator
-# consider additional authors: "Extended by"
-
-#LASTCHANGED
-
-#COPYRIGHT-YEARS
-
-#COPRIGHT-ENTITY
-
-
-###
-
-echo
-echo
-echo "-COMPLETED-"
-###
-
-##IF output file is stated: print to file ELSE print to command line
-
-########################################
-#cleanup: remove location & files
-
-#rm $TMPDIR/.grassrc6_$TEMP_LOCATION
-#^^^activate if we have started ealrier a write to filesystem for whatever reasons
-
-
-########################################
-#That's all, folks.
-
-
-###########LEFTOVERS_
-
-#A BibTeX entry for LaTeX users is:
-#
-#@Manual{GRASS_GIS_software,
-# title = {Geographic Resources Analysis Support System (GRASS) Software},
-# author = {{GRASS Development Team}},
-# organization = {Open Source Geospatial Foundation},
-# address = {USA},
-# year = {YEAR},
-# url = {http://grass.osgeo.org},
-#}
More information about the grass-commit
mailing list