diff options
Diffstat (limited to 'src/MaveDB.hs')
-rw-r--r-- | src/MaveDB.hs | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/src/MaveDB.hs b/src/MaveDB.hs new file mode 100644 index 0000000..b662231 --- /dev/null +++ b/src/MaveDB.hs @@ -0,0 +1,58 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} + +module MaveDB where + +import Control.Lens +import Data.Aeson.Lens +import qualified Data.ByteString.Lazy.Char8 as BS +import qualified Data.Map.Strict as M +import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T +import Network.HTTP.Client (managerResponseTimeout, responseTimeoutNone) +import Network.HTTP.Client.TLS (tlsManagerSettings) +import Network.Wreq +import Uniprot (UniprotID) + +type URN = Text + +type Gene = Text + +type HGVSP = Text + +type Score = (HGVSP, Double) + +-- Standard options for requests +opts = defaults & manager .~ Left (tlsManagerSettings {managerResponseTimeout = responseTimeoutNone}) + +--opts = defaults & manager .~ Left tlsManagerSettings + +-- Retrieve a list of URNs which are protein coding human genes +queryURNs :: IO [(URN, UniprotID)] +queryURNs = do + rep <- getWith opts "https://mavedb.org/api/scoresets/" + pure $ + rep ^. responseBody + & (^.. traverse . runFold ((,) <$> Fold (key "urn" . _String) <*> Fold (key "target" . key "uniprot" . key "identifier" . _String))) + . (^.. folded . filtered (allOf (key "target" . key "reference_maps" . _Array . traverse . key "genome" . key "short_name" . _String) (== "hg38"))) + . (^.. folded . filtered (allOf (key "target" . key "type" . _String) (== "Protein coding"))) + . (^.. _Array . folded . filtered (allOf (key "target" . key "score_columns" . _Array) (\((^.. traverse . _String) -> scs) -> "hgvs_pro" `elem` scs && "score" `elem` scs))) + +getScores :: URN -> IO [Score] +getScores urn = do + let urn' = T.unpack urn + rep <- getWith opts $ "https://mavedb.org/scoreset/" <> urn' <> "/scores/" + let hdr : rows = dropComments . BS.lines . BS.filter (/= '\r') $ rep ^. responseBody + rows' = map (M.fromList . zip (BS.split ',' hdr) . BS.split ',') rows + pure $ + rows' + ^.. traverse + . runFold + ( (,) + <$> Fold (at "hgvs_pro" . to (fromMaybe $ error ("no hgvs_pro column in " <> urn')) . to (T.pack . BS.unpack)) + <*> Fold (at "score" . traverse . _Double) + ) + where + dropComments (a : as) = if BS.head a == '#' then dropComments as else a : as + dropComments x = x |