From 5e2bda5ef2f7e95a164603829d983f61d0737b5f Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 30 Sep 2022 14:53:40 +1000 Subject: add ability to explicitly specify additional mavedb scoresets matched with uniprot ids --- src/scrape.hs | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/scrape.hs b/src/scrape.hs index 56f4631..e877a9c 100644 --- a/src/scrape.hs +++ b/src/scrape.hs @@ -6,6 +6,8 @@ module Main where import Control.Lens +import Control.Monad +import Data.Aeson.Lens import Data.List import qualified Data.Map as M import Data.Maybe @@ -32,30 +34,41 @@ schema = PRIMARY KEY (gene, hgvs_p)); |] -data Config = Config FilePath Double +data Config = Config FilePath Double (Maybe FilePath) main = configured =<< execParser opts where config = Config <$> strArgument (metavar "DB" <> help "path to SQLite DB to insert into") - <*> option - auto - ( long "min-pi" - <> short 'p' - <> metavar "π" - <> help "minimum estimated labelling proportion" - <> showDefault - <> value 0.1 - ) + <*> option + auto + ( long "min-pi" + <> short 'p' + <> metavar "π" + <> help "minimum estimated labelling proportion" + <> showDefault + <> value 0.1 + ) + <*> option + auto + ( long "extra" + <> short 'e' + <> metavar "PATH" + <> help "path to JSON map between URNs and uniprot ids to supplement MaveDB URN query" + <> value Nothing + ) opts = info (config <**> helper) (fullDesc <> progDesc "scrapes MaveDB for human proteins, normalises, and inserts HGVS notation into a SQLite DB" <> header "scrape -- MaveDB scraper") -configured (Config out minpi) = do +configured (Config out minpi uniprotMap) = do urns <- queryURNs + extraUrns <- forM uniprotMap $ \path -> do + json <- T.readFile path + return $ zip (json ^.. members . asIndex) (json ^.. members . _String) scores <- M.fromListWith (M.unionWith (++)) . catMaybes <$> mapM @@ -65,7 +78,7 @@ configured (Config out minpi) = do s <- normalise minpi <$> getScores x pure $ (g,) <$> s ) - urns + (urns ++ fromMaybe mempty extraUrns) let cScores = M.map (M.toList . M.map geomean) scores -- cgit v1.2.3