aboutsummaryrefslogtreecommitdiff
path: root/clinvar
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2021-08-02 21:41:57 +1000
committerJustin Bedo <cu@cua0.org>2021-08-02 21:49:50 +1000
commitefd9fcdbbd3a961e97bb3d02ae086c258c1ecb40 (patch)
treec478fc2bfa79e6383b6395c3a9eda1b53e1408b6 /clinvar
initial implementation
Diffstat (limited to 'clinvar')
-rwxr-xr-xclinvar149
1 files changed, 149 insertions, 0 deletions
diff --git a/clinvar b/clinvar
new file mode 100755
index 0000000..63c9a13
--- /dev/null
+++ b/clinvar
@@ -0,0 +1,149 @@
+#!/bin/sh
+
+c_red="\033[1;31m"
+c_green="\033[1;32m"
+c_yellow="\033[1;33m"
+c_blue="\033[1;34m"
+c_magenta="\033[1;35m"
+c_cyan="\033[1;36m"
+c_reset="\033[0m"
+
+CLINVAR_URL="https://www.ncbi.nlm.nih.gov/clinvar/"
+CLINVAR_RESULTS_XPATH="/html/body/div/div/form/div[1]/div[5]/div/div[5]/div/table/tbody"
+CLINVAR_DESC_XPATH="/html/body/div[3]/main/div[4]/div"
+CLINVAR_EVID_XPATH="/html/body/div[2]/div[2]/div[3]/table/tbody"
+FZF_CMD="fzf --ansi -m --tabstop=1 --bind change:top --layout=reverse --delimiter="\t""
+
+usage() {
+ printf "Usage: %bclinvar %bsearch-query%b\n" $c_green $c_yellow $c_reset
+}
+
+if [ "$#" -eq 0 ] ; then
+ usage
+ exit 1
+fi
+
+
+awk_parse_results='
+BEGIN{
+ RS="</tr[^>]*>"
+ FS="</?td[^>]*>"
+ OFS="\t"
+ c_red="\033[1;31m"
+ c_green="\033[1;32m"
+ c_yellow="\033[1;33m"
+ c_blue="\033[1;34m"
+ c_magenta="\033[1;35m"
+ c_reset="\033[0m"
+}
+function getmut(str, res) {
+ res=gensub(/^.*<span class="ui-button-text">(.*)<\/span>.*$/,"\\1","g",str)
+ gsub(/&gt;/,">",res)
+ return striptag(res)
+}
+function striptag(str) {
+ gsub(/<[^>]*>/,"",str)
+ return str
+}
+function getpath(str) {
+ str=striptag(str)
+ if(str~/[pP]athogenic/)
+ str=c_yellow str c_reset
+ if(str~/[bB]enign/)
+ str=c_magenta str c_reset
+ return str
+}
+$16~/VCV[0-9]+/{
+ mut=getmut($4)
+ if(mut~/^NM/)
+ print mut, getpath($12), $16
+}
+'
+
+awk_parse_evidence='
+BEGIN{
+ RS="</tr[^>]*>"
+ FS="</?td[^>]*>"
+ OFS="\t"
+ c_red="\033[1;31m"
+ c_green="\033[1;32m"
+ c_yellow="\033[1;33m"
+ c_blue="\033[1;34m"
+ c_magenta="\033[1;35m"
+ c_reset="\033[0m"
+}
+function striptag(str) {
+ gsub(/<[^>]*>/,"",str)
+ return str
+}
+!done[$4]{
+ done[$4]++
+ gsub(/<br\/>/, " ")
+ gsub(/\n/, " ", $16)
+ if($8~/[pP]athogenic/)
+ $8 = c_yellow $8 c_reset
+ if($8~/[bB]enign/)
+ $8 = c_magenta $8 c_reset
+ print $8, $10, $12, $16, $4
+ print $4, striptag($26) > "/dev/stderr"
+}
+'
+
+preview() {
+ id=$(echo "$*" | awk '{id=$NF; gsub(/^VCV/, "", id); print int(id+0)}')
+ curl -s -L "$CLINVAR_URL/variation/$id" \
+ --compressed \
+ | xmllint --html --xpath "$CLINVAR_DESC_XPATH" - 2>/dev/null \
+ | w3m -dump -T text/html -O utf-8 \
+ | sed 's/^Help/-------/'
+
+ exit 0
+}
+
+evid() {
+ id=$(echo "$*" | awk '{id=$NF; gsub(/^VCV/, "", id); print int(id+0)}')
+ export EVID_DESC_OUT="evid-$id"
+ trap "rm $EVID_DESC_OUT" EXIT
+
+ curl -s -L "$CLINVAR_URL/variation/$id/evidence/" \
+ --compressed \
+ | xmllint --html --xpath "$CLINVAR_EVID_XPATH" - 2>/dev/null \
+ | awk "$awk_parse_evidence" 2> $EVID_DESC_OUT \
+ | column -t -s $'\t' \
+ | $FZF_CMD --preview "sh $0 -VV $EVID_DESC_OUT {}"
+
+ exit 0
+}
+
+evid_desc() {
+ db=$1
+ shift
+ id=$(echo "%s" "$*" | awk '{print $NF}')
+ grep "$id" "$db" | awk -F'\t' '{print $2}' | fmt
+}
+
+[ "$1" == "-U" ] && preview "$2"
+[ "$1" == "-V" ] && evid "$2"
+[ "$1" == "-VV" ] && evid_desc "$2" "$3"
+
+# do search
+results=$(curl -s -L "$CLINVAR_URL" \
+ -G --data-urlencode "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.PageSize=200" \
+ -G --data-urlencode "term=$*" \
+ --compressed )
+
+parsed_results=$(printf "%s" "$results" | xmllint --html --xpath "$CLINVAR_RESULTS_XPATH" - 2>/dev/null | awk "$awk_parse_results")
+
+if [ "$(printf "%s" "$parsed_results" | wc -l)" -eq 0 ] ; then
+ parsed_results=$(printf "%s" "$results" | xmllint --html --xpath "/html/body/div[3]/main/div[3]/div/div/dl/dd[5]" - 2>/dev/null | sed 's/<[^>]*>//g')
+fi
+
+# Main menu
+while : ; do
+ id=$(printf "%s" "$parsed_results" | column -t -s $'\t' | $FZF_CMD --preview "sh $0 -U {}" | awk '{print $NF}')
+
+ [ "$id" = "" ] && exit 0
+ [ "$id" = "0" ] && exit 0
+ # xdg-open "$CLINVAR_URL/variation/$id"
+ sh $0 -V $id
+done