aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2023-01-16 09:02:49 +1100
committerJustin Bedo <cu@cua0.org>2023-01-16 09:25:36 +1100
commit7f4bcef26d58934045ba48e1d53e491a319184d0 (patch)
tree56b09953f8f245f5a73dc6196b1b7c578cde37f4
parenta09d5c9d9f7097dd4baf4e9611e488ee1d12ca2f (diff)
add command line arguments
-rw-r--r--bin/cluster.hs41
-rw-r--r--package.yaml1
-rw-r--r--phylogenies.cabal1
3 files changed, 37 insertions, 6 deletions
diff --git a/bin/cluster.hs b/bin/cluster.hs
index 70e22c7..3c9dd6c 100644
--- a/bin/cluster.hs
+++ b/bin/cluster.hs
@@ -8,6 +8,7 @@ import Data.List
import qualified Data.Map as M
import qualified Data.Vector.Unboxed as V
import Numeric.Log hiding (sum)
+import Options.Applicative
import PPL
import System.Random (mkStdGen, setStdGen)
@@ -74,14 +75,42 @@ model xs = do
max (pois (fromIntegral ad * ap) ac) (pois (fromIntegral ad * ap / 2) ac)
* max (pois (fromIntegral dd * dp) dc) (pois (fromIntegral dd * dp / 2) dc)
-main = do
- setStdGen $ mkStdGen 42
- (hdr : lines) <- lines <$> readFile "/nix/store/9xkb2apajv9sy37akz24x3jj6kw5hn7h-bionix-dump-counts"
+-- Command line args
+data Options = Options
+ { seed :: Int,
+ nsamples :: Int,
+ mhfrac :: Double,
+ input :: FilePath,
+ propsPath :: FilePath,
+ clusterPath :: FilePath
+ }
+
+main = run =<< execParser opts
+ where
+ opts =
+ info
+ (parser <**> helper)
+ (fullDesc <> progDesc "Infer a phylogeny from SNV calls in multiple samples" <> header "phylogey - Bayesian phylogeny inference")
+ parser =
+ Options <$> option auto (long "seed" <> short 's' <> help "random seed" <> showDefault <> value 42 <> metavar "INT")
+ <*> option auto (long "nsamples" <> short 'n' <> help "number of samples from posterior" <> value 100000 <> metavar "INT" <> showDefault)
+ <*> option probability (long "mhfrac" <> short 'm' <> help "Metropolis-Hastings mutation probability" <> value 0.3 <> metavar "(0,1]" <> showDefault)
+ <*> argument str (metavar "INPUT")
+ <*> argument str (metavar "PROPS")
+ <*> argument str (metavar "TREE")
+
+ probability = eitherReader $ \arg -> case reads arg of
+ [(r, "")] -> if r <= 1 && r > 0 then Right r else Left "mhfrac not a valid probability"
+ _ -> Left "mhfrac not a valid probability"
+
+run opts = do
+ setStdGen . mkStdGen $ seed opts
+ (hdr : lines) <- lines <$> readFile (input opts)
let parsed = V.fromList $ map ((\[_, ac, ad, dc, dd] -> (dbl ac, dbl ad, dbl dc, dbl dd)) . words) lines
dbl = round . read :: String -> Int
- ((a, d, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take 100000 <$> mh 0.3 (model parsed)
- writeFile "/data/props" . unlines $ zipWith (\a b -> show a <> "," <> show b) a d
- writeFile "/data/clusters" . unlines $ map show cl
+ ((a, d, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take (nsamples opts) <$> mh (mhfrac opts) (model parsed)
+ writeFile (propsPath opts) . unlines $ zipWith (\a b -> show a <> "," <> show b) a d
+ writeFile (clusterPath opts) . unlines $ map show cl
where
mml ((a, d, cl), lik) = sum (map (log . (+ 1)) a) + sum (map (log . (+ 1)) d) + sum (map (log . (+ 1)) tab) - sum (map stirling tab) - ln lik
where
diff --git a/package.yaml b/package.yaml
index 4d96c25..8290eed 100644
--- a/package.yaml
+++ b/package.yaml
@@ -15,3 +15,4 @@ executable:
- log-domain
- vector
- random
+ - optparse-applicative
diff --git a/phylogenies.cabal b/phylogenies.cabal
index 15db8d9..e8cea10 100644
--- a/phylogenies.cabal
+++ b/phylogenies.cabal
@@ -22,6 +22,7 @@ executable phylogenies
base >=4.9 && <5
, containers
, log-domain
+ , optparse-applicative
, ppl
, random
, vector