From 7f4bcef26d58934045ba48e1d53e491a319184d0 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Mon, 16 Jan 2023 09:02:49 +1100 Subject: add command line arguments --- bin/cluster.hs | 41 +++++++++++++++++++++++++++++++++++------ package.yaml | 1 + phylogenies.cabal | 1 + 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/bin/cluster.hs b/bin/cluster.hs index 70e22c7..3c9dd6c 100644 --- a/bin/cluster.hs +++ b/bin/cluster.hs @@ -8,6 +8,7 @@ import Data.List import qualified Data.Map as M import qualified Data.Vector.Unboxed as V import Numeric.Log hiding (sum) +import Options.Applicative import PPL import System.Random (mkStdGen, setStdGen) @@ -74,14 +75,42 @@ model xs = do max (pois (fromIntegral ad * ap) ac) (pois (fromIntegral ad * ap / 2) ac) * max (pois (fromIntegral dd * dp) dc) (pois (fromIntegral dd * dp / 2) dc) -main = do - setStdGen $ mkStdGen 42 - (hdr : lines) <- lines <$> readFile "/nix/store/9xkb2apajv9sy37akz24x3jj6kw5hn7h-bionix-dump-counts" +-- Command line args +data Options = Options + { seed :: Int, + nsamples :: Int, + mhfrac :: Double, + input :: FilePath, + propsPath :: FilePath, + clusterPath :: FilePath + } + +main = run =<< execParser opts + where + opts = + info + (parser <**> helper) + (fullDesc <> progDesc "Infer a phylogeny from SNV calls in multiple samples" <> header "phylogey - Bayesian phylogeny inference") + parser = + Options <$> option auto (long "seed" <> short 's' <> help "random seed" <> showDefault <> value 42 <> metavar "INT") + <*> option auto (long "nsamples" <> short 'n' <> help "number of samples from posterior" <> value 100000 <> metavar "INT" <> showDefault) + <*> option probability (long "mhfrac" <> short 'm' <> help "Metropolis-Hastings mutation probability" <> value 0.3 <> metavar "(0,1]" <> showDefault) + <*> argument str (metavar "INPUT") + <*> argument str (metavar "PROPS") + <*> argument str (metavar "TREE") + + probability = eitherReader $ \arg -> case reads arg of + [(r, "")] -> if r <= 1 && r > 0 then Right r else Left "mhfrac not a valid probability" + _ -> Left "mhfrac not a valid probability" + +run opts = do + setStdGen . mkStdGen $ seed opts + (hdr : lines) <- lines <$> readFile (input opts) let parsed = V.fromList $ map ((\[_, ac, ad, dc, dd] -> (dbl ac, dbl ad, dbl dc, dbl dd)) . words) lines dbl = round . read :: String -> Int - ((a, d, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take 100000 <$> mh 0.3 (model parsed) - writeFile "/data/props" . unlines $ zipWith (\a b -> show a <> "," <> show b) a d - writeFile "/data/clusters" . unlines $ map show cl + ((a, d, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take (nsamples opts) <$> mh (mhfrac opts) (model parsed) + writeFile (propsPath opts) . unlines $ zipWith (\a b -> show a <> "," <> show b) a d + writeFile (clusterPath opts) . unlines $ map show cl where mml ((a, d, cl), lik) = sum (map (log . (+ 1)) a) + sum (map (log . (+ 1)) d) + sum (map (log . (+ 1)) tab) - sum (map stirling tab) - ln lik where diff --git a/package.yaml b/package.yaml index 4d96c25..8290eed 100644 --- a/package.yaml +++ b/package.yaml @@ -15,3 +15,4 @@ executable: - log-domain - vector - random + - optparse-applicative diff --git a/phylogenies.cabal b/phylogenies.cabal index 15db8d9..e8cea10 100644 --- a/phylogenies.cabal +++ b/phylogenies.cabal @@ -22,6 +22,7 @@ executable phylogenies base >=4.9 && <5 , containers , log-domain + , optparse-applicative , ppl , random , vector -- cgit v1.2.3