From efd9fcdbbd3a961e97bb3d02ae086c258c1ecb40 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Mon, 2 Aug 2021 21:41:57 +1000 Subject: initial implementation --- LICENSE | 13 ++++++ README.md | 22 +++++++++ clinvar | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ clinvar.1 | 20 ++++++++ default.nix | 26 +++++++++++ flake.nix | 12 +++++ 6 files changed, 242 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100755 clinvar create mode 100644 clinvar.1 create mode 100644 default.nix create mode 100644 flake.nix diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fb8c9f6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright © 2021 by Justin Bedő + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR +IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5beb087 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +A command line interface to clinvar. + +# Demo + +![demo](https://vk3.wtf/clinvar.gif) + +# Install + +It's just a script, there are dependencies on libxml2 and FZF. If you have +Nix with flakes enabled you can run it without "installing" by +``` +nix run git://vk3.wtf/clinvar.git search-terms ... +``` +or put it into your active profile with +``` +nix-env -i $(nix eval --raw git://vk3.wtf/clinvar.git) +``` + + +# Contributing + +Email me with issues and patches. diff --git a/clinvar b/clinvar new file mode 100755 index 0000000..63c9a13 --- /dev/null +++ b/clinvar @@ -0,0 +1,149 @@ +#!/bin/sh + +c_red="\033[1;31m" +c_green="\033[1;32m" +c_yellow="\033[1;33m" +c_blue="\033[1;34m" +c_magenta="\033[1;35m" +c_cyan="\033[1;36m" +c_reset="\033[0m" + +CLINVAR_URL="https://www.ncbi.nlm.nih.gov/clinvar/" +CLINVAR_RESULTS_XPATH="/html/body/div/div/form/div[1]/div[5]/div/div[5]/div/table/tbody" +CLINVAR_DESC_XPATH="/html/body/div[3]/main/div[4]/div" +CLINVAR_EVID_XPATH="/html/body/div[2]/div[2]/div[3]/table/tbody" +FZF_CMD="fzf --ansi -m --tabstop=1 --bind change:top --layout=reverse --delimiter="\t"" + +usage() { + printf "Usage: %bclinvar %bsearch-query%b\n" $c_green $c_yellow $c_reset +} + +if [ "$#" -eq 0 ] ; then + usage + exit 1 +fi + + +awk_parse_results=' +BEGIN{ + RS="]*>" + FS="]*>" + OFS="\t" + c_red="\033[1;31m" + c_green="\033[1;32m" + c_yellow="\033[1;33m" + c_blue="\033[1;34m" + c_magenta="\033[1;35m" + c_reset="\033[0m" +} +function getmut(str, res) { + res=gensub(/^.*(.*)<\/span>.*$/,"\\1","g",str) + gsub(/>/,">",res) + return striptag(res) +} +function striptag(str) { + gsub(/<[^>]*>/,"",str) + return str +} +function getpath(str) { + str=striptag(str) + if(str~/[pP]athogenic/) + str=c_yellow str c_reset + if(str~/[bB]enign/) + str=c_magenta str c_reset + return str +} +$16~/VCV[0-9]+/{ + mut=getmut($4) + if(mut~/^NM/) + print mut, getpath($12), $16 +} +' + +awk_parse_evidence=' +BEGIN{ + RS="]*>" + FS="]*>" + OFS="\t" + c_red="\033[1;31m" + c_green="\033[1;32m" + c_yellow="\033[1;33m" + c_blue="\033[1;34m" + c_magenta="\033[1;35m" + c_reset="\033[0m" +} +function striptag(str) { + gsub(/<[^>]*>/,"",str) + return str +} +!done[$4]{ + done[$4]++ + gsub(//, " ") + gsub(/\n/, " ", $16) + if($8~/[pP]athogenic/) + $8 = c_yellow $8 c_reset + if($8~/[bB]enign/) + $8 = c_magenta $8 c_reset + print $8, $10, $12, $16, $4 + print $4, striptag($26) > "/dev/stderr" +} +' + +preview() { + id=$(echo "$*" | awk '{id=$NF; gsub(/^VCV/, "", id); print int(id+0)}') + curl -s -L "$CLINVAR_URL/variation/$id" \ + --compressed \ + | xmllint --html --xpath "$CLINVAR_DESC_XPATH" - 2>/dev/null \ + | w3m -dump -T text/html -O utf-8 \ + | sed 's/^Help/-------/' + + exit 0 +} + +evid() { + id=$(echo "$*" | awk '{id=$NF; gsub(/^VCV/, "", id); print int(id+0)}') + export EVID_DESC_OUT="evid-$id" + trap "rm $EVID_DESC_OUT" EXIT + + curl -s -L "$CLINVAR_URL/variation/$id/evidence/" \ + --compressed \ + | xmllint --html --xpath "$CLINVAR_EVID_XPATH" - 2>/dev/null \ + | awk "$awk_parse_evidence" 2> $EVID_DESC_OUT \ + | column -t -s $'\t' \ + | $FZF_CMD --preview "sh $0 -VV $EVID_DESC_OUT {}" + + exit 0 +} + +evid_desc() { + db=$1 + shift + id=$(echo "%s" "$*" | awk '{print $NF}') + grep "$id" "$db" | awk -F'\t' '{print $2}' | fmt +} + +[ "$1" == "-U" ] && preview "$2" +[ "$1" == "-V" ] && evid "$2" +[ "$1" == "-VV" ] && evid_desc "$2" "$3" + +# do search +results=$(curl -s -L "$CLINVAR_URL" \ + -G --data-urlencode "EntrezSystem2.PEntrez.clinVar.clinVar_Entrez_ResultsPanel.Entrez_DisplayBar.PageSize=200" \ + -G --data-urlencode "term=$*" \ + --compressed ) + +parsed_results=$(printf "%s" "$results" | xmllint --html --xpath "$CLINVAR_RESULTS_XPATH" - 2>/dev/null | awk "$awk_parse_results") + +if [ "$(printf "%s" "$parsed_results" | wc -l)" -eq 0 ] ; then + parsed_results=$(printf "%s" "$results" | xmllint --html --xpath "/html/body/div[3]/main/div[3]/div/div/dl/dd[5]" - 2>/dev/null | sed 's/<[^>]*>//g') +fi + +# Main menu +while : ; do + id=$(printf "%s" "$parsed_results" | column -t -s $'\t' | $FZF_CMD --preview "sh $0 -U {}" | awk '{print $NF}') + + [ "$id" = "" ] && exit 0 + [ "$id" = "0" ] && exit 0 + # xdg-open "$CLINVAR_URL/variation/$id" + sh $0 -V $id +done diff --git a/clinvar.1 b/clinvar.1 new file mode 100644 index 0000000..aa9d283 --- /dev/null +++ b/clinvar.1 @@ -0,0 +1,20 @@ +.TH CLINVAR 1 +.SH NAME +clinvar \- FZF based interface to clinvar for variant searching +.SH SYNOPSYS +\fBclinvar\fP \fIterm\fP ... +.SH DESCRIPTION +\fBclinvar\fP searches for the given terms in the clinvar database. +A search is conducted and if multiple results are available they are +displayed in FZF with a preview of the variant details. A variant +can be selected to view details of the evidence associated with that +variant. +.P +If a variant is selected, the evidence will be shown in FZF and the +preview used to show additional free-text description associated with +the highlighted evidence entry. Cancelling or making a selection will +return to the search results. +.SH BUGS +As the NCBI results navigation is stateful, only the first 200 results +can be shown. Out of these, only the subset with variant identifiers +are displayed. This can result in a short list for large queries. diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..a468bbe --- /dev/null +++ b/default.nix @@ -0,0 +1,26 @@ +{ lib, stdenvNoCC, fzf, libxml2, makeWrapper }: + +stdenvNoCC.mkDerivation { + pname = "clinvar"; + version = "0.1"; + + src = ./.; + + nativeBuildInputs = [ makeWrapper ]; + + installPhase = '' + runHook preInstall + install -Dm 755 clinvar $out/bin/clinvar + install -Dm 644 clinvar.1 $out/bin/man/man1/clinvar.1 + runHook postInstall + ''; + + fixupPhase = '' + runHook preFixup + wrapProgram $out/bin/clinvar \ + --prefix PATH : "${lib.makeBinPath [ fzf libxml2 ]}" + runHook postFixup + ''; + + meta.mainProgram = "clinvar"; +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..d267b4d --- /dev/null +++ b/flake.nix @@ -0,0 +1,12 @@ +{ + inputs.flake-utils.url = "github:numtide/flake-utils"; + + outputs = { nixpkgs, flake-utils, self }: + flake-utils.lib.eachDefaultSystem (system: + let pkgs = nixpkgs.legacyPackages.${system}; + in rec { + packages = + flake-utils.lib.flattenTree { clinvar = pkgs.callPackage ./. { }; }; + defaultPackage = packages.clinvar; + }); +} -- cgit v1.2.3