diff options
author | Justin Bedo <cu@cua0.org> | 2021-08-02 21:41:57 +1000 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2021-08-02 21:49:50 +1000 |
commit | efd9fcdbbd3a961e97bb3d02ae086c258c1ecb40 (patch) | |
tree | c478fc2bfa79e6383b6395c3a9eda1b53e1408b6 /clinvar |
initial implementation
Diffstat (limited to 'clinvar')
-rwxr-xr-x | clinvar | 149 |
1 files changed, 149 insertions, 0 deletions
@@ -0,0 +1,149 @@ +#!/bin/sh + +c_red="\033[1;31m" +c_green="\033[1;32m" +c_yellow="\033[1;33m" +c_blue="\033[1;34m" +c_magenta="\033[1;35m" +c_cyan="\033[1;36m" +c_reset="\033[0m" + +CLINVAR_URL="https://www.ncbi.nlm.nih.gov/clinvar/" +CLINVAR_RESULTS_XPATH="/html/body/div/div/form/div[1]/div[5]/div/div[5]/div/table/tbody" +CLINVAR_DESC_XPATH="/html/body/div[3]/main/div[4]/div" +CLINVAR_EVID_XPATH="/html/body/div[2]/div[2]/div[3]/table/tbody" +FZF_CMD="fzf --ansi -m --tabstop=1 --bind change:top --layout=reverse --delimiter="\t"" + +usage() { + printf "Usage: %bclinvar %bsearch-query%b\n" $c_green $c_yellow $c_reset +} + +if [ "$#" -eq 0 ] ; then + usage + exit 1 +fi + + +awk_parse_results=' +BEGIN{ + RS="</tr[^>]*>" + FS="</?td[^>]*>" + OFS="\t" + c_red="\033[1;31m" + c_green="\033[1;32m" + c_yellow="\033[1;33m" + c_blue="\033[1;34m" + c_magenta="\033[1;35m" + c_reset="\033[0m" +} +function getmut(str, res) { + res=gensub(/^.*<span class="ui-button-text">(.*)<\/span>.*$/,"\\1","g",str) + gsub(/>/,">",res) + return striptag(res) +} +function striptag(str) { + gsub(/<[^>]*>/,"",str) + return str +} +function getpath(str) { + str=striptag(str) + if(str~/[pP]athogenic/) + str=c_yellow str c_reset + if(str~/[bB]enign/) + str=c_magenta str c_reset + return str +} +$16~/VCV[0-9]+/{ + mut=getmut($4) + if(mut~/^NM/) + print mut, getpath($12), $16 +} +' + +awk_parse_evidence=' +BEGIN{ + RS="</tr[^>]*>" + FS="</?td[^>]*>" + OFS="\t" + c_red="\033[1;31m" + c_green="\033[1;32m" + c_yellow="\033[1;33m" + c_blue="\033[1;34m" + c_magenta="\033[1;35m" + c_reset="\033[0m" +} +function striptag(str) { + gsub(/<[^>]*>/,"",str) + return str +} +!done[$4]{ + done[$4]++ + gsub(/<br\/>/, " ") + gsub(/\n/, " ", $16) + if($8~/[pP]athogenic/) + $8 = c_yellow $8 c_reset + if($8~/[bB]enign/) + $8 = c_magenta $8 c_reset + print $8, $10, $12, $16, $4 + print $4, striptag($26) > "/dev/stderr" +} +' + +preview() { + id=$(echo "$*" | awk '{id=$NF; gsub(/^VCV/, "", id); print int(id+0)}') + curl -s -L "$CLINVAR_URL/variation/$id" \ + --compressed \ + | xmllint --html --xpath "$CLINVAR_DESC_XPATH" - 2>/dev/null \ + | w3m -dump -T text/html -O utf-8 \ + | sed 's/^Help/-------/' + + exit 0 +} + +evid() { + id=$(echo "$*" | awk '{id=$NF; gsub(/^VCV/, "", id); print int(id+0)}') + export EVID_DESC_OUT="evid-$id" + trap "rm $EVID_DESC_OUT" EXIT + + curl -s -L "$CLINVAR_URL/variation/$id/evidence/" \ + --compressed \ + | xmllint --html --xpath "$CLINVAR_EVID_XPATH" - 2>/dev/null \ + | awk "$awk_parse_evidence" 2> $EVID_DESC_OUT \ + | column -t -s $'\t' \ + | $FZF_CMD --preview "sh $0 -VV $EVID_DESC_OUT {}" + + exit 0 +} + +evid_desc() { + db=$1 + shift + id=$(echo "%s" "$*" | awk '{print $NF}') + grep "$id" "$db" | awk -F'\t' '{print $2}' | fmt +} + +[ "$1" == "-U" ] && preview "$2" +[ "$1" == "-V" ] && evid "$2" +[ "$1" == "-VV" ] && evid_desc "$2" "$3" + +# do search +results=$(curl -s -L "$CLINVAR_URL" \ + -G --data-urlencode "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.PageSize=200" \ + -G --data-urlencode "term=$*" \ + --compressed ) + +parsed_results=$(printf "%s" "$results" | xmllint --html --xpath "$CLINVAR_RESULTS_XPATH" - 2>/dev/null | awk "$awk_parse_results") + +if [ "$(printf "%s" "$parsed_results" | wc -l)" -eq 0 ] ; then + parsed_results=$(printf "%s" "$results" | xmllint --html --xpath "/html/body/div[3]/main/div[3]/div/div/dl/dd[5]" - 2>/dev/null | sed 's/<[^>]*>//g') +fi + +# Main menu +while : ; do + id=$(printf "%s" "$parsed_results" | column -t -s $'\t' | $FZF_CMD --preview "sh $0 -U {}" | awk '{print $NF}') + + [ "$id" = "" ] && exit 0 + [ "$id" = "0" ] && exit 0 + # xdg-open "$CLINVAR_URL/variation/$id" + sh $0 -V $id +done |