diff options
author | Justin Bedo <cu@cua0.org> | 2023-01-16 09:02:49 +1100 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2023-01-16 09:25:36 +1100 |
commit | 7f4bcef26d58934045ba48e1d53e491a319184d0 (patch) | |
tree | 56b09953f8f245f5a73dc6196b1b7c578cde37f4 /bin/cluster.hs | |
parent | a09d5c9d9f7097dd4baf4e9611e488ee1d12ca2f (diff) |
add command line arguments
Diffstat (limited to 'bin/cluster.hs')
-rw-r--r-- | bin/cluster.hs | 41 |
1 files changed, 35 insertions, 6 deletions
diff --git a/bin/cluster.hs b/bin/cluster.hs index 70e22c7..3c9dd6c 100644 --- a/bin/cluster.hs +++ b/bin/cluster.hs @@ -8,6 +8,7 @@ import Data.List import qualified Data.Map as M import qualified Data.Vector.Unboxed as V import Numeric.Log hiding (sum) +import Options.Applicative import PPL import System.Random (mkStdGen, setStdGen) @@ -74,14 +75,42 @@ model xs = do max (pois (fromIntegral ad * ap) ac) (pois (fromIntegral ad * ap / 2) ac) * max (pois (fromIntegral dd * dp) dc) (pois (fromIntegral dd * dp / 2) dc) -main = do - setStdGen $ mkStdGen 42 - (hdr : lines) <- lines <$> readFile "/nix/store/9xkb2apajv9sy37akz24x3jj6kw5hn7h-bionix-dump-counts" +-- Command line args +data Options = Options + { seed :: Int, + nsamples :: Int, + mhfrac :: Double, + input :: FilePath, + propsPath :: FilePath, + clusterPath :: FilePath + } + +main = run =<< execParser opts + where + opts = + info + (parser <**> helper) + (fullDesc <> progDesc "Infer a phylogeny from SNV calls in multiple samples" <> header "phylogey - Bayesian phylogeny inference") + parser = + Options <$> option auto (long "seed" <> short 's' <> help "random seed" <> showDefault <> value 42 <> metavar "INT") + <*> option auto (long "nsamples" <> short 'n' <> help "number of samples from posterior" <> value 100000 <> metavar "INT" <> showDefault) + <*> option probability (long "mhfrac" <> short 'm' <> help "Metropolis-Hastings mutation probability" <> value 0.3 <> metavar "(0,1]" <> showDefault) + <*> argument str (metavar "INPUT") + <*> argument str (metavar "PROPS") + <*> argument str (metavar "TREE") + + probability = eitherReader $ \arg -> case reads arg of + [(r, "")] -> if r <= 1 && r > 0 then Right r else Left "mhfrac not a valid probability" + _ -> Left "mhfrac not a valid probability" + +run opts = do + setStdGen . mkStdGen $ seed opts + (hdr : lines) <- lines <$> readFile (input opts) let parsed = V.fromList $ map ((\[_, ac, ad, dc, dd] -> (dbl ac, dbl ad, dbl dc, dbl dd)) . words) lines dbl = round . read :: String -> Int - ((a, d, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take 100000 <$> mh 0.3 (model parsed) - writeFile "/data/props" . unlines $ zipWith (\a b -> show a <> "," <> show b) a d - writeFile "/data/clusters" . unlines $ map show cl + ((a, d, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take (nsamples opts) <$> mh (mhfrac opts) (model parsed) + writeFile (propsPath opts) . unlines $ zipWith (\a b -> show a <> "," <> show b) a d + writeFile (clusterPath opts) . unlines $ map show cl where mml ((a, d, cl), lik) = sum (map (log . (+ 1)) a) + sum (map (log . (+ 1)) d) + sum (map (log . (+ 1)) tab) - sum (map stirling tab) - ln lik where |