aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2023-01-25 10:14:56 +1100
committerJustin Bedo <cu@cua0.org>2023-01-25 19:42:21 +1100
commitea4c9f0f7959251bf84338b145d5733c38f07871 (patch)
tree283d6644dca4b6c5d933be323a7306523da70355
parentc7059c4d7309f1371f5acf8398b4adb4f2f33881 (diff)
split out tree drawing code into separate executable
-rw-r--r--bin/cluster.hs21
-rw-r--r--bin/draw.hs85
-rw-r--r--package.yaml3
-rw-r--r--phylogenies.cabal3
4 files changed, 91 insertions, 21 deletions
diff --git a/bin/cluster.hs b/bin/cluster.hs
index 7da6810..6b0d917 100644
--- a/bin/cluster.hs
+++ b/bin/cluster.hs
@@ -88,22 +88,6 @@ model xs = do
-- Tabulate list
tabulate xs = M.elems $ M.fromListWith (+) [(c, 1) | c <- xs]
--- Draws a phylogeny to DOT format
-drawGraph :: FilePath -> [[Double]] -> [Int] -> IO ()
-drawGraph path ps cl = writeFile path $ "digraph{" <> edges <> "}"
- where
- edges = concatMap calcEdges [1 .. length tab - 1]
- calcEdges idx = fmt (parent idx) <> "->" <> fmt idx <> ";"
- fmt i = "\"" <> intercalate "," (map fmtDbl $ ps' !! i) <> " " <> show (tab !! i) <> "\""
- tab = tabulate cl
- parent = flip div 2 . (\x -> x - 1)
- fmtDbl = show . (/ 10) . fromIntegral . round . (* 1000)
- ps' = transpose $ map norm ps
-
- -- Normalise to frequency of parent
- norm :: [Double] -> [Double]
- norm xxs@(x : xs) = x : zipWith (\i y -> y / xxs !! parent i) [1 ..] xs
-
-- Command line args
data Options = Options
{ seed :: Int,
@@ -111,8 +95,7 @@ data Options = Options
mhfrac :: Double,
input :: FilePath,
propsPath :: FilePath,
- clusterPath :: FilePath,
- dotPath :: FilePath
+ clusterPath :: FilePath
}
main = run =<< execParser opts
@@ -128,7 +111,6 @@ main = run =<< execParser opts
<*> argument str (metavar "INPUT")
<*> argument str (metavar "PROPS")
<*> argument str (metavar "TREE")
- <*> strOption (long "dot" <> short 'd' <> help "draw graph of phylogeny in dot format" <> value "" <> metavar "PATH")
probability = eitherReader $ \arg -> case reads arg of
[(r, "")] -> if r <= 1 && r > 0 then Right r else Left "mhfrac not a valid probability"
@@ -142,7 +124,6 @@ run opts = do
((ps, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take (nsamples opts) <$> mh (mhfrac opts) 0.5 (model $ getCompact parsed)
writeFile (propsPath opts) . unlines $ map (intercalate "," . map show) ps
writeFile (clusterPath opts) . unlines $ map show cl
- when (dotPath opts /= "") $ drawGraph (dotPath opts) ps cl
where
mml ((ps, cl), lik) = sum' (sum' (log . (+ 1))) ps + sum' (log . (+ 1)) tab - sum' (ln . stirling) tab - ln lik
where
diff --git a/bin/draw.hs b/bin/draw.hs
index b41c778..4e19992 100644
--- a/bin/draw.hs
+++ b/bin/draw.hs
@@ -1,3 +1,86 @@
+{-# LANGUAGE ViewPatterns #-}
module Main where
-main = undefined
+import Options.Applicative
+import Data.List.Split
+import qualified Data.Map as M
+import Data.List
+
+type Props = [[Double]]
+type Nodes = [Int]
+
+-- Command line args
+data Options = Options
+ {
+ nthr :: Int,
+ propsPath :: FilePath,
+ clusterPath :: FilePath,
+ dotPath :: FilePath
+ }
+
+-- Trees
+data Tree = Tree [Double] Int [Tree]
+ deriving (Show, Eq)
+
+children (Tree _ _ cs) = cs
+
+addChild :: Tree -> Tree -> Tree
+(Tree a b cs) `addChild` child = Tree a b (child:cs)
+
+buildTree :: Props -> Nodes -> Tree
+buildTree (transpose -> ps) cl = merge n [Tree (ps !! i) (tab !! i) [] | i <- [0..n]]
+ where
+ tab = tabulate cl
+ n = length tab - 1
+
+ parent = flip div 2 . (\x -> x - 1)
+
+ merge 0 ts = head ts
+ merge i ts =
+ let j = parent i
+ in merge (i-1) $ take j ts <> [(ts !! j) `addChild` (ts !! i)] <> drop (j+1) ts
+
+-- Prune a tree according to a node threshold
+prune th = fix prune'
+ where
+ prune' p@(Tree a b cs) = Tree a b (map prune' $ concatMap children discard <> keep)
+ where
+ (discard, keep) = partition (\(Tree _ n _) -> n < th) cs
+ fix f x = let y = f x in if y == x then x else fix f y
+
+-- Normalise proportions wrt parent
+norm (Tree a n cs) = Tree a n $ map (norm . update a) cs
+ where
+ update ps (Tree a n cs) = Tree (zipWith (/) a ps) n cs
+
+-- Tabulate list
+tabulate xs = M.elems $ M.fromListWith (+) [(c, 1) | c <- xs]
+
+-- Draws a phylogeny to DOT format
+drawGraph :: FilePath -> Tree -> IO ()
+drawGraph path root = writeFile path $ "digraph{" <> edges root <> "}"
+ where
+ edges p@(Tree _ _ cs) = concatMap (edge p) cs <> concatMap edges cs
+ edge p c = fmt p <> "->" <> fmt c <> ";"
+ fmt (Tree ps n _) = "\"" <> intercalate "," (map fmtDbl ps) <> " " <> show n <> "\""
+ fmtDbl = show . (/ 10) . fromIntegral . round . (* 1000)
+
+main = run =<< execParser opts
+ where
+ opts =
+ info
+ (parser <**> helper)
+ (fullDesc <> progDesc "Draw phylogeny" <> header "phylogey - Bayesian phylogeny inference")
+ parser =
+ Options <$> option nat (long "nthreshold" <> short 't' <> help "minimum number of variants" <> showDefault <> value 0 <> metavar "NAT")
+ <*> argument str (metavar "PROPS")
+ <*> argument str (metavar "TREE")
+ <*> argument str (metavar "PATH")
+
+ nat = eitherReader $ \arg -> case reads arg of
+ [(r, "")] -> if r >= (0 :: Int) then Right r else Left "not a natural number"
+
+run opts = do
+ ps <- map (map read . splitOn ",") . lines <$> readFile (propsPath opts)
+ cl <- map read . lines <$> readFile (clusterPath opts)
+ drawGraph (dotPath opts) . norm . prune (nthr opts) $ buildTree ps cl
diff --git a/package.yaml b/package.yaml
index aa823e9..f72db5a 100644
--- a/package.yaml
+++ b/package.yaml
@@ -23,3 +23,6 @@ executables:
ghc-options: [-rtsopts]
dependencies:
- base >= 4.9 && < 5
+ - optparse-applicative
+ - split
+ - containers
diff --git a/phylogenies.cabal b/phylogenies.cabal
index f2c776f..ae63597 100644
--- a/phylogenies.cabal
+++ b/phylogenies.cabal
@@ -38,4 +38,7 @@ executable draw
ghc-options: -rtsopts
build-depends:
base >=4.9 && <5
+ , containers
+ , optparse-applicative
+ , split
default-language: Haskell2010