Skip to content

reproduce ggphylo example using ggtree

Guangchuang Yu edited this page Sep 2, 2015 · 7 revisions

ggphylo example

ggphylo supports plotting a list of phylo objects.

require(ape)
require(ggphylo)

tree.list = rmtree(3, 20)
ggphylo(tree.list)

AND plotting data along tree

n <- 40;  x <- rtree(n); n.nodes <- length(nodes(x))
bootstraps <- 100 - rexp(n.nodes, rate=5) * 100
pop.sizes <- pmax(0, rnorm(n.nodes, mean=50000, sd=50000))
for (i in nodes(x)) {
x <- tree.set.tag(x, i, 'bootstrap', bootstraps[i])
x <- tree.set.tag(x, i, 'pop.size', pop.sizes[i]) }
plot.args <- list(
   x,
   line.color.by='bootstrap',
   line.color.scale=scale_colour_gradient(limits=c(50, 100), low='red', high='black'), 
                                      node.size.by='pop.size',
   node.size.scale = scale_size_continuous(limits=c(0, 100000), 
                    range=c(1, 5)), label.size=2
)
do.call(ggphylo, plot.args)

reproduce using ggtree

In ggtree, multiPhylo object is supported.

require(ggplot2)
require(ggtree)

ggtree(tree.list, ladderize=F, aes(color=.id)) + facet_wrap(~.id) + 
      geom_point() + geom_text(subset=.(!isTip), aes(label=node), hjust=-.2, size=3) + 
          geom_tiplab(size=4, hjust=-.2) 

In the ggphylo example, they set attributes based on internal node number. In my opinion, this is not commonly use, since most of the users have no idea of how taxa were mapping to internal node number.

In ggtree, we created %<+% operator to add additional related information, it requires the first column to be taxa label. This is more commonly used since user already have the taxa label.

To reproduce this case, we can employ raxml class, which is design to store RAxML bootstrapping analysis output, to store the bootstrap and pop.size.

xx=new("raxml", phylo=x, bootstrap=data.frame(node=1:getNodeNum(x), 
                                              bootstrap=bootstraps, 
                                              pop.size=pop.sizes)
       )
ggtree(xx, aes(color=bootstrap), ladderize=F, size=1.2) + 
      geom_point(aes(size=pop.size), subset=.(bootstrap>50), color="black") + 
        geom_text(aes(label=node), subset=.(!isTip), size=3, color="grey50", hjust=-.5) + 
          geom_tiplab(color="black", hjust=-.5, size=3) +           
            scale_color_gradient(limits=c(50, 100), low="red", high="black")  + 
              scale_size_continuous(limits=c(0, 100000), range=c(1, 5)) +
                theme_tree2(legend.position="right") 

benchmark

draw_ggphylo <- function(x, bootstraps, pop.sizes) {
	for (i in nodes(x)) {
	x <- tree.set.tag(x, i, 'bootstrap', bootstraps[i])
	x <- tree.set.tag(x, i, 'pop.size', pop.sizes[i]) }
	plot.args <- list(
   		x,
   		line.color.by='bootstrap',
   		line.color.scale=scale_colour_gradient(limits=c(50, 100), low='red', high='black'), 
                                      node.size.by='pop.size',
   		node.size.scale = scale_size_continuous(limits=c(0, 100000), 
                    range=c(1, 5)), label.size=2
	)
	do.call(ggphylo, plot.args)
}


draw_ggtree <- function(x, bootstraps, pop.size) {
	xx=new("raxml", phylo=x, bootstrap=data.frame(node=1:getNodeNum(x), 
    	                                          bootstrap=bootstraps, 
        	                                      pop.size=pop.sizes)
       	)

	ggtree(xx, aes(color=bootstrap), ladderize=F, size=1.2) + 
    	  geom_point(aes(size=pop.size), subset=.(bootstrap>50), color="black") + 
            geom_text(aes(label=node), subset=.(!isTip), size=3, color="grey50", hjust=-.5) + 
              geom_tiplab(color="black", hjust=-.5, size=3) +           
                scale_color_gradient(limits=c(50, 100), low="red", high="black")  + 
                  scale_size_continuous(limits=c(0, 100000), range=c(1, 5)) +
                    theme_tree2(legend.position="right") 
}

require(microbenchmark)

bm <- microbenchmark(
	ggphylo = draw_ggphylo(x, bootstraps, pop.sizes),
	ggtree = draw_ggtree(x, bootstraps, pop.sizes),
	times=100L
)
print(bm)
## Unit: milliseconds
 ##    expr        min         lq       mean     median         uq        max
 ## ggphylo 1230.81977 1266.51639 1283.54395 1284.05280 1294.22355 1467.62436
 ##  ggtree   30.40235   31.70079   33.48648   32.67046   34.05156   42.88493
 ## neval
 ##   100
 ##   100
qplot(y=time, data=bm, color=expr) + scale_y_log10() + ggtitle("run time comparison")

Clone this wiki locally