blob: 4e1999289b2784104291d81662438135fb3df166 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
{-# LANGUAGE ViewPatterns #-}
module Main where
import Options.Applicative
import Data.List.Split
import qualified Data.Map as M
import Data.List
type Props = [[Double]]
type Nodes = [Int]
-- Command line args
data Options = Options
{
nthr :: Int,
propsPath :: FilePath,
clusterPath :: FilePath,
dotPath :: FilePath
}
-- Trees
data Tree = Tree [Double] Int [Tree]
deriving (Show, Eq)
children (Tree _ _ cs) = cs
addChild :: Tree -> Tree -> Tree
(Tree a b cs) `addChild` child = Tree a b (child:cs)
buildTree :: Props -> Nodes -> Tree
buildTree (transpose -> ps) cl = merge n [Tree (ps !! i) (tab !! i) [] | i <- [0..n]]
where
tab = tabulate cl
n = length tab - 1
parent = flip div 2 . (\x -> x - 1)
merge 0 ts = head ts
merge i ts =
let j = parent i
in merge (i-1) $ take j ts <> [(ts !! j) `addChild` (ts !! i)] <> drop (j+1) ts
-- Prune a tree according to a node threshold
prune th = fix prune'
where
prune' p@(Tree a b cs) = Tree a b (map prune' $ concatMap children discard <> keep)
where
(discard, keep) = partition (\(Tree _ n _) -> n < th) cs
fix f x = let y = f x in if y == x then x else fix f y
-- Normalise proportions wrt parent
norm (Tree a n cs) = Tree a n $ map (norm . update a) cs
where
update ps (Tree a n cs) = Tree (zipWith (/) a ps) n cs
-- Tabulate list
tabulate xs = M.elems $ M.fromListWith (+) [(c, 1) | c <- xs]
-- Draws a phylogeny to DOT format
drawGraph :: FilePath -> Tree -> IO ()
drawGraph path root = writeFile path $ "digraph{" <> edges root <> "}"
where
edges p@(Tree _ _ cs) = concatMap (edge p) cs <> concatMap edges cs
edge p c = fmt p <> "->" <> fmt c <> ";"
fmt (Tree ps n _) = "\"" <> intercalate "," (map fmtDbl ps) <> " " <> show n <> "\""
fmtDbl = show . (/ 10) . fromIntegral . round . (* 1000)
main = run =<< execParser opts
where
opts =
info
(parser <**> helper)
(fullDesc <> progDesc "Draw phylogeny" <> header "phylogey - Bayesian phylogeny inference")
parser =
Options <$> option nat (long "nthreshold" <> short 't' <> help "minimum number of variants" <> showDefault <> value 0 <> metavar "NAT")
<*> argument str (metavar "PROPS")
<*> argument str (metavar "TREE")
<*> argument str (metavar "PATH")
nat = eitherReader $ \arg -> case reads arg of
[(r, "")] -> if r >= (0 :: Int) then Right r else Left "not a natural number"
run opts = do
ps <- map (map read . splitOn ",") . lines <$> readFile (propsPath opts)
cl <- map read . lines <$> readFile (clusterPath opts)
drawGraph (dotPath opts) . norm . prune (nthr opts) $ buildTree ps cl
|