aboutsummaryrefslogtreecommitdiff
path: root/bin/draw.hs
blob: 4e1999289b2784104291d81662438135fb3df166 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{-# LANGUAGE ViewPatterns #-}
module Main where

import Options.Applicative
import Data.List.Split
import qualified Data.Map as M
import Data.List

type Props = [[Double]]
type Nodes = [Int]

-- Command line args
data Options = Options
  {
    nthr :: Int,
    propsPath :: FilePath,
    clusterPath :: FilePath,
    dotPath :: FilePath
  }

-- Trees
data Tree = Tree [Double] Int [Tree]
  deriving (Show, Eq)

children (Tree _ _ cs) = cs

addChild :: Tree -> Tree -> Tree
(Tree a b cs) `addChild` child = Tree a b (child:cs)

buildTree :: Props -> Nodes -> Tree
buildTree (transpose -> ps) cl = merge n [Tree (ps !! i) (tab !! i) [] | i <- [0..n]]
  where
    tab = tabulate cl
    n = length tab - 1

    parent = flip div 2 . (\x -> x - 1)

    merge 0 ts = head ts
    merge i ts =
      let j = parent i
      in merge (i-1) $ take j ts <> [(ts !! j) `addChild` (ts !! i)] <> drop (j+1) ts

-- Prune a tree according to a node threshold
prune th = fix prune'
  where
    prune' p@(Tree a b cs) = Tree a b (map prune' $ concatMap children discard <> keep)
      where
        (discard, keep) = partition (\(Tree _ n _) -> n < th) cs
    fix f x = let y = f x in if y == x then x else fix f y

-- Normalise proportions wrt parent
norm (Tree a n cs) = Tree a n $ map (norm . update a) cs
  where
    update ps (Tree a n cs) = Tree (zipWith (/) a ps) n cs

-- Tabulate list
tabulate xs = M.elems $ M.fromListWith (+) [(c, 1) | c <- xs]

-- Draws a phylogeny to DOT format
drawGraph :: FilePath -> Tree -> IO ()
drawGraph path root = writeFile path $ "digraph{" <> edges root <> "}"
  where
    edges p@(Tree _ _ cs) = concatMap (edge p) cs <> concatMap edges cs
    edge p c = fmt p <> "->" <> fmt c <> ";"
    fmt (Tree ps n _) = "\"" <> intercalate "," (map fmtDbl ps) <> " " <> show n <> "\""
    fmtDbl = show . (/ 10) . fromIntegral . round . (* 1000)

main = run =<< execParser opts
  where
    opts =
      info
        (parser <**> helper)
        (fullDesc <> progDesc "Draw phylogeny" <> header "phylogey - Bayesian phylogeny inference")
    parser =
      Options <$> option nat (long "nthreshold" <> short 't' <> help "minimum number of variants" <> showDefault <> value 0 <> metavar "NAT")
        <*> argument str (metavar "PROPS")
        <*> argument str (metavar "TREE")
        <*> argument str (metavar "PATH")

    nat = eitherReader $ \arg -> case reads arg of
      [(r, "")] -> if r >= (0 :: Int) then Right r else Left "not a natural number"

run opts = do
  ps <- map (map read . splitOn ",") . lines <$> readFile (propsPath opts)
  cl <- map read . lines <$> readFile (clusterPath opts)
  drawGraph (dotPath opts) . norm . prune (nthr opts) $ buildTree ps cl