summaryrefslogtreecommitdiff
path: root/src/MaveDB.hs
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2022-09-29 08:53:40 +1000
committerJustin Bedo <cu@cua0.org>2022-09-29 08:55:35 +1000
commit8ecdab68587c07cf88ebc0b8a8f2bd0e1a703e37 (patch)
tree258b2b64362b5891e268f2761e6a798f2072759c /src/MaveDB.hs
init
Diffstat (limited to 'src/MaveDB.hs')
-rw-r--r--src/MaveDB.hs58
1 files changed, 58 insertions, 0 deletions
diff --git a/src/MaveDB.hs b/src/MaveDB.hs
new file mode 100644
index 0000000..b662231
--- /dev/null
+++ b/src/MaveDB.hs
@@ -0,0 +1,58 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
+
+module MaveDB where
+
+import Control.Lens
+import Data.Aeson.Lens
+import qualified Data.ByteString.Lazy.Char8 as BS
+import qualified Data.Map.Strict as M
+import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
+import Network.HTTP.Client (managerResponseTimeout, responseTimeoutNone)
+import Network.HTTP.Client.TLS (tlsManagerSettings)
+import Network.Wreq
+import Uniprot (UniprotID)
+
+type URN = Text
+
+type Gene = Text
+
+type HGVSP = Text
+
+type Score = (HGVSP, Double)
+
+-- Standard options for requests
+opts = defaults & manager .~ Left (tlsManagerSettings {managerResponseTimeout = responseTimeoutNone})
+
+--opts = defaults & manager .~ Left tlsManagerSettings
+
+-- Retrieve a list of URNs which are protein coding human genes
+queryURNs :: IO [(URN, UniprotID)]
+queryURNs = do
+ rep <- getWith opts "https://mavedb.org/api/scoresets/"
+ pure $
+ rep ^. responseBody
+ & (^.. traverse . runFold ((,) <$> Fold (key "urn" . _String) <*> Fold (key "target" . key "uniprot" . key "identifier" . _String)))
+ . (^.. folded . filtered (allOf (key "target" . key "reference_maps" . _Array . traverse . key "genome" . key "short_name" . _String) (== "hg38")))
+ . (^.. folded . filtered (allOf (key "target" . key "type" . _String) (== "Protein coding")))
+ . (^.. _Array . folded . filtered (allOf (key "target" . key "score_columns" . _Array) (\((^.. traverse . _String) -> scs) -> "hgvs_pro" `elem` scs && "score" `elem` scs)))
+
+getScores :: URN -> IO [Score]
+getScores urn = do
+ let urn' = T.unpack urn
+ rep <- getWith opts $ "https://mavedb.org/scoreset/" <> urn' <> "/scores/"
+ let hdr : rows = dropComments . BS.lines . BS.filter (/= '\r') $ rep ^. responseBody
+ rows' = map (M.fromList . zip (BS.split ',' hdr) . BS.split ',') rows
+ pure $
+ rows'
+ ^.. traverse
+ . runFold
+ ( (,)
+ <$> Fold (at "hgvs_pro" . to (fromMaybe $ error ("no hgvs_pro column in " <> urn')) . to (T.pack . BS.unpack))
+ <*> Fold (at "score" . traverse . _Double)
+ )
+ where
+ dropComments (a : as) = if BS.head a == '#' then dropComments as else a : as
+ dropComments x = x