summaryrefslogtreecommitdiff
path: root/src/MaveDB.hs
blob: f33abe9ba936a6e0d38a093af1900b4abdd9b005 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}

module MaveDB where

import Control.Lens
import Data.Aeson.Lens
import qualified Data.ByteString.Lazy.Char8 as BS
import qualified Data.Map.Strict as M
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Network.HTTP.Client (managerResponseTimeout, responseTimeoutNone)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Network.Wreq
import Uniprot (UniprotID)

type URN = Text

type Gene = Text

type HGVSP = Text

type Score = (HGVSP, Double)

-- Standard options for requests
opts = defaults & manager .~ Left (tlsManagerSettings {managerResponseTimeout = responseTimeoutNone})

--opts = defaults & manager .~ Left tlsManagerSettings

-- Retrieve a list of URNs which are protein coding human genes
queryURNs :: IO [(URN, UniprotID)]
queryURNs = do
  rep <- getWith opts "https://mavedb.org/api/scoresets/"
  pure $
    rep ^. responseBody
      & (^.. traverse . runFold ((,) <$> Fold (key "urn" . _String) <*> Fold (key "target" . key "uniprot" . key "identifier" . _String)))
        . (^.. folded . filtered (allOf (key "target" . key "reference_maps" . _Array . traverse . key "genome" . key "organism_name" . _String) (== "Homo sapiens")))
        . (^.. folded . filtered (allOf (key "target" . key "type" . _String) (== "Protein coding")))
        . (^.. _Array . folded . filtered (allOf (key "score_columns" . _Array) (\((^.. traverse . _String) -> scs) -> "hgvs_pro" `elem` scs && "score" `elem` scs)))

getScores :: URN -> IO [Score]
getScores urn = do
  let urn' = T.unpack urn
  rep <- getWith opts $ "https://mavedb.org/scoreset/" <> urn' <> "/scores/"
  let hdr : rows = dropComments . BS.lines . BS.filter (/= '\r') $ rep ^. responseBody
      rows' = map (M.fromList . zip (BS.split ',' hdr) . BS.split ',') rows
  pure $
    rows'
      ^.. traverse
        . runFold
          ( (,)
              <$> Fold (at "hgvs_pro" . to (fromMaybe $ error ("no hgvs_pro column in " <> urn')) . to (T.pack . BS.unpack))
              <*> Fold (at "score" . traverse . _Double)
          )
  where
    dropComments (a : as) = if BS.head a == '#' then dropComments as else a : as
    dropComments x = x