summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2022-09-30 14:53:40 +1000
committerJustin Bedo <cu@cua0.org>2022-10-05 10:40:17 +1100
commit5e2bda5ef2f7e95a164603829d983f61d0737b5f (patch)
tree26aa59bf3f53e0fde53355368765e85426dafaec
parentdc911006d7be248f22db9549b2b85884615e4abc (diff)
add ability to explicitly specify additional mavedb scoresets matched with uniprot ids
-rw-r--r--src/scrape.hs37
1 files changed, 25 insertions, 12 deletions
diff --git a/src/scrape.hs b/src/scrape.hs
index 56f4631..e877a9c 100644
--- a/src/scrape.hs
+++ b/src/scrape.hs
@@ -6,6 +6,8 @@
module Main where
import Control.Lens
+import Control.Monad
+import Data.Aeson.Lens
import Data.List
import qualified Data.Map as M
import Data.Maybe
@@ -32,30 +34,41 @@ schema =
PRIMARY KEY (gene, hgvs_p));
|]
-data Config = Config FilePath Double
+data Config = Config FilePath Double (Maybe FilePath)
main = configured =<< execParser opts
where
config =
Config
<$> strArgument (metavar "DB" <> help "path to SQLite DB to insert into")
- <*> option
- auto
- ( long "min-pi"
- <> short 'p'
- <> metavar "π"
- <> help "minimum estimated labelling proportion"
- <> showDefault
- <> value 0.1
- )
+ <*> option
+ auto
+ ( long "min-pi"
+ <> short 'p'
+ <> metavar "π"
+ <> help "minimum estimated labelling proportion"
+ <> showDefault
+ <> value 0.1
+ )
+ <*> option
+ auto
+ ( long "extra"
+ <> short 'e'
+ <> metavar "PATH"
+ <> help "path to JSON map between URNs and uniprot ids to supplement MaveDB URN query"
+ <> value Nothing
+ )
opts =
info
(config <**> helper)
(fullDesc <> progDesc "scrapes MaveDB for human proteins, normalises, and inserts HGVS notation into a SQLite DB" <> header "scrape -- MaveDB scraper")
-configured (Config out minpi) = do
+configured (Config out minpi uniprotMap) = do
urns <- queryURNs
+ extraUrns <- forM uniprotMap $ \path -> do
+ json <- T.readFile path
+ return $ zip (json ^.. members . asIndex) (json ^.. members . _String)
scores <-
M.fromListWith (M.unionWith (++)) . catMaybes
<$> mapM
@@ -65,7 +78,7 @@ configured (Config out minpi) = do
s <- normalise minpi <$> getScores x
pure $ (g,) <$> s
)
- urns
+ (urns ++ fromMaybe mempty extraUrns)
let cScores = M.map (M.toList . M.map geomean) scores