Skip to content

reproduce ggphylo example using ggtree

Guangchuang Yu edited this page Sep 2, 2015 · 7 revisions

ggphylo example

ggphylo supports plotting a list of phylo objects.

require(ape)
require(ggphylo)

tree.list = rmtree(3, 20)
ggphylo(tree.list)

AND plotting data along tree

n <- 40;  x <- rtree(n); n.nodes <- length(nodes(x))
bootstraps <- 100 - rexp(n.nodes, rate=5) * 100
pop.sizes <- pmax(0, rnorm(n.nodes, mean=50000, sd=50000))
for (i in nodes(x)) {
x <- tree.set.tag(x, i, 'bootstrap', bootstraps[i])
x <- tree.set.tag(x, i, 'pop.size', pop.sizes[i]) }
plot.args <- list(
   x,
   line.color.by='bootstrap',
   line.color.scale=scale_colour_gradient(limits=c(50, 100), low='red', high='black'), 
                                      node.size.by='pop.size',
   node.size.scale = scale_size_continuous(limits=c(0, 100000), 
                    range=c(1, 5)), label.size=2
)
do.call(ggphylo, plot.args)

reproduce using ggtree

In ggtree, multiPhylo object is supported.

require(ggplot2)
require(ggtree)

ggtree(tree.list, ladderize=F, aes(color=.id)) + facet_wrap(~.id) + 
      geom_point() + geom_text(subset=.(!isTip), aes(label=node), hjust=-.2, size=3) + 
          geom_tiplab(size=4, hjust=-.2) 

In the ggphylo example, they set attributes based on internal node number. In my opinion, this is not commonly use, since most of the users have no idea of how taxa were mapping to internal node number.

In ggtree, we created %<+% operator to add additional related information, it requires the first column to be taxa label. This is more commonly used since user already have the taxa label.

To reproduce this case, we can employ raxml class, which is design to store RAxML bootstrapping analysis output, to store the bootstrap and pop.size.

xx=new("raxml", phylo=x, bootstrap=data.frame(node=1:getNodeNum(x), 
                                              bootstrap=bootstraps, 
                                              pop.size=pop.sizes)
       )
ggtree(xx, aes(color=bootstrap), ladderize=F, size=1.2) + 
      geom_point(aes(size=pop.size), subset=.(bootstrap>50), color="black") + 
         scale_color_gradient(limits=c(50, 100), low="red", high="black")  + 
            scale_size_continuous(limits=c(0, 100000), range=c(1, 5)) +
               theme_tree2(legend.position="right") 

benchmark

draw_ggphylo <- function(x, bootstraps, pop.sizes) {
	for (i in nodes(x)) {
	x <- tree.set.tag(x, i, 'bootstrap', bootstraps[i])
	x <- tree.set.tag(x, i, 'pop.size', pop.sizes[i]) }
	plot.args <- list(
   		x,
   		line.color.by='bootstrap',
   		line.color.scale=scale_colour_gradient(limits=c(50, 100), low='red', high='black'), 
                                      node.size.by='pop.size',
   		node.size.scale = scale_size_continuous(limits=c(0, 100000), 
                    range=c(1, 5)), label.size=2
	)
	do.call(ggphylo, plot.args)
}


draw_ggtree <- function(x, bootstraps, pop.size) {
	xx=new("raxml", phylo=x, bootstrap=data.frame(node=1:getNodeNum(x), 
    	                                          bootstrap=bootstraps, 
        	                                      pop.size=pop.sizes)
       	)

	ggtree(xx, aes(color=bootstrap), ladderize=F, size=1.2) + 
    	  geom_point(aes(size=pop.size), subset=.(bootstrap>50), color="black") + 
        	 scale_color_gradient(limits=c(50, 100), low="red", high="black")  + 
            	scale_size_continuous(limits=c(0, 100000), range=c(1, 5)) +
               		theme_tree2(legend.position="right") 
}

require(microbenchmark)

bm <- microbenchmark(
	ggphylo = draw_ggphylo(x, bootstraps, pop.sizes),
	ggtree = draw_ggtree(x, bootstraps, pop.sizes),
	times=100L
)
qplot(y=time, data=bm, color=expr) + scale_y_log10() + ggtitle("run time comparison")

Clone this wiki locally