From ea4c9f0f7959251bf84338b145d5733c38f07871 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Wed, 25 Jan 2023 10:14:56 +1100 Subject: split out tree drawing code into separate executable --- bin/cluster.hs | 21 +------------- bin/draw.hs | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- package.yaml | 3 ++ phylogenies.cabal | 3 ++ 4 files changed, 91 insertions(+), 21 deletions(-) diff --git a/bin/cluster.hs b/bin/cluster.hs index 7da6810..6b0d917 100644 --- a/bin/cluster.hs +++ b/bin/cluster.hs @@ -88,22 +88,6 @@ model xs = do -- Tabulate list tabulate xs = M.elems $ M.fromListWith (+) [(c, 1) | c <- xs] --- Draws a phylogeny to DOT format -drawGraph :: FilePath -> [[Double]] -> [Int] -> IO () -drawGraph path ps cl = writeFile path $ "digraph{" <> edges <> "}" - where - edges = concatMap calcEdges [1 .. length tab - 1] - calcEdges idx = fmt (parent idx) <> "->" <> fmt idx <> ";" - fmt i = "\"" <> intercalate "," (map fmtDbl $ ps' !! i) <> " " <> show (tab !! i) <> "\"" - tab = tabulate cl - parent = flip div 2 . (\x -> x - 1) - fmtDbl = show . (/ 10) . fromIntegral . round . (* 1000) - ps' = transpose $ map norm ps - - -- Normalise to frequency of parent - norm :: [Double] -> [Double] - norm xxs@(x : xs) = x : zipWith (\i y -> y / xxs !! parent i) [1 ..] xs - -- Command line args data Options = Options { seed :: Int, @@ -111,8 +95,7 @@ data Options = Options mhfrac :: Double, input :: FilePath, propsPath :: FilePath, - clusterPath :: FilePath, - dotPath :: FilePath + clusterPath :: FilePath } main = run =<< execParser opts @@ -128,7 +111,6 @@ main = run =<< execParser opts <*> argument str (metavar "INPUT") <*> argument str (metavar "PROPS") <*> argument str (metavar "TREE") - <*> strOption (long "dot" <> short 'd' <> help "draw graph of phylogeny in dot format" <> value "" <> metavar "PATH") probability = eitherReader $ \arg -> case reads arg of [(r, "")] -> if r <= 1 && r > 0 then Right r else Left "mhfrac not a valid probability" @@ -142,7 +124,6 @@ run opts = do ((ps, cl), _) <- foldl1' (\a c -> if mml a < mml c then a else c) . take (nsamples opts) <$> mh (mhfrac opts) 0.5 (model $ getCompact parsed) writeFile (propsPath opts) . unlines $ map (intercalate "," . map show) ps writeFile (clusterPath opts) . unlines $ map show cl - when (dotPath opts /= "") $ drawGraph (dotPath opts) ps cl where mml ((ps, cl), lik) = sum' (sum' (log . (+ 1))) ps + sum' (log . (+ 1)) tab - sum' (ln . stirling) tab - ln lik where diff --git a/bin/draw.hs b/bin/draw.hs index b41c778..4e19992 100644 --- a/bin/draw.hs +++ b/bin/draw.hs @@ -1,3 +1,86 @@ +{-# LANGUAGE ViewPatterns #-} module Main where -main = undefined +import Options.Applicative +import Data.List.Split +import qualified Data.Map as M +import Data.List + +type Props = [[Double]] +type Nodes = [Int] + +-- Command line args +data Options = Options + { + nthr :: Int, + propsPath :: FilePath, + clusterPath :: FilePath, + dotPath :: FilePath + } + +-- Trees +data Tree = Tree [Double] Int [Tree] + deriving (Show, Eq) + +children (Tree _ _ cs) = cs + +addChild :: Tree -> Tree -> Tree +(Tree a b cs) `addChild` child = Tree a b (child:cs) + +buildTree :: Props -> Nodes -> Tree +buildTree (transpose -> ps) cl = merge n [Tree (ps !! i) (tab !! i) [] | i <- [0..n]] + where + tab = tabulate cl + n = length tab - 1 + + parent = flip div 2 . (\x -> x - 1) + + merge 0 ts = head ts + merge i ts = + let j = parent i + in merge (i-1) $ take j ts <> [(ts !! j) `addChild` (ts !! i)] <> drop (j+1) ts + +-- Prune a tree according to a node threshold +prune th = fix prune' + where + prune' p@(Tree a b cs) = Tree a b (map prune' $ concatMap children discard <> keep) + where + (discard, keep) = partition (\(Tree _ n _) -> n < th) cs + fix f x = let y = f x in if y == x then x else fix f y + +-- Normalise proportions wrt parent +norm (Tree a n cs) = Tree a n $ map (norm . update a) cs + where + update ps (Tree a n cs) = Tree (zipWith (/) a ps) n cs + +-- Tabulate list +tabulate xs = M.elems $ M.fromListWith (+) [(c, 1) | c <- xs] + +-- Draws a phylogeny to DOT format +drawGraph :: FilePath -> Tree -> IO () +drawGraph path root = writeFile path $ "digraph{" <> edges root <> "}" + where + edges p@(Tree _ _ cs) = concatMap (edge p) cs <> concatMap edges cs + edge p c = fmt p <> "->" <> fmt c <> ";" + fmt (Tree ps n _) = "\"" <> intercalate "," (map fmtDbl ps) <> " " <> show n <> "\"" + fmtDbl = show . (/ 10) . fromIntegral . round . (* 1000) + +main = run =<< execParser opts + where + opts = + info + (parser <**> helper) + (fullDesc <> progDesc "Draw phylogeny" <> header "phylogey - Bayesian phylogeny inference") + parser = + Options <$> option nat (long "nthreshold" <> short 't' <> help "minimum number of variants" <> showDefault <> value 0 <> metavar "NAT") + <*> argument str (metavar "PROPS") + <*> argument str (metavar "TREE") + <*> argument str (metavar "PATH") + + nat = eitherReader $ \arg -> case reads arg of + [(r, "")] -> if r >= (0 :: Int) then Right r else Left "not a natural number" + +run opts = do + ps <- map (map read . splitOn ",") . lines <$> readFile (propsPath opts) + cl <- map read . lines <$> readFile (clusterPath opts) + drawGraph (dotPath opts) . norm . prune (nthr opts) $ buildTree ps cl diff --git a/package.yaml b/package.yaml index aa823e9..f72db5a 100644 --- a/package.yaml +++ b/package.yaml @@ -23,3 +23,6 @@ executables: ghc-options: [-rtsopts] dependencies: - base >= 4.9 && < 5 + - optparse-applicative + - split + - containers diff --git a/phylogenies.cabal b/phylogenies.cabal index f2c776f..ae63597 100644 --- a/phylogenies.cabal +++ b/phylogenies.cabal @@ -38,4 +38,7 @@ executable draw ghc-options: -rtsopts build-depends: base >=4.9 && <5 + , containers + , optparse-applicative + , split default-language: Haskell2010 -- cgit v1.2.3