blob: f33abe9ba936a6e0d38a093af1900b4abdd9b005 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module MaveDB where
import Control.Lens
import Data.Aeson.Lens
import qualified Data.ByteString.Lazy.Char8 as BS
import qualified Data.Map.Strict as M
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Network.HTTP.Client (managerResponseTimeout, responseTimeoutNone)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.Wreq
import Uniprot (UniprotID)
type URN = Text
type Gene = Text
type HGVSP = Text
type Score = (HGVSP, Double)
-- Standard options for requests
opts = defaults & manager .~ Left (tlsManagerSettings {managerResponseTimeout = responseTimeoutNone})
--opts = defaults & manager .~ Left tlsManagerSettings
-- Retrieve a list of URNs which are protein coding human genes
queryURNs :: IO [(URN, UniprotID)]
queryURNs = do
rep <- getWith opts "https://mavedb.org/api/scoresets/"
pure $
rep ^. responseBody
& (^.. traverse . runFold ((,) <$> Fold (key "urn" . _String) <*> Fold (key "target" . key "uniprot" . key "identifier" . _String)))
. (^.. folded . filtered (allOf (key "target" . key "reference_maps" . _Array . traverse . key "genome" . key "organism_name" . _String) (== "Homo sapiens")))
. (^.. folded . filtered (allOf (key "target" . key "type" . _String) (== "Protein coding")))
. (^.. _Array . folded . filtered (allOf (key "score_columns" . _Array) (\((^.. traverse . _String) -> scs) -> "hgvs_pro" `elem` scs && "score" `elem` scs)))
getScores :: URN -> IO [Score]
getScores urn = do
let urn' = T.unpack urn
rep <- getWith opts $ "https://mavedb.org/scoreset/" <> urn' <> "/scores/"
let hdr : rows = dropComments . BS.lines . BS.filter (/= '\r') $ rep ^. responseBody
rows' = map (M.fromList . zip (BS.split ',' hdr) . BS.split ',') rows
pure $
rows'
^.. traverse
. runFold
( (,)
<$> Fold (at "hgvs_pro" . to (fromMaybe $ error ("no hgvs_pro column in " <> urn')) . to (T.pack . BS.unpack))
<*> Fold (at "score" . traverse . _Double)
)
where
dropComments (a : as) = if BS.head a == '#' then dropComments as else a : as
dropComments x = x
|