# Twinning

#### 2023-01-19

pirouette has the option to investigate the error BEAST2 makes on a twin tree. A twin tree is a tree with the same topology as the original phylogeny, yet where the branch lengths follow a birth-death branch length distribution.

library(pirouette)
library(ggplot2)

Twinning is useful to separate the effect of an unknown tree prior (i.e. speciation model) on a phylogeny’s shape from the noise (the minimal error) made by BEAST2.

In this example, the following tree is used:

phylogeny <- ape::read.tree(text = "((A:1, B:1):1, (C:1, D:1):1);")
ape::plot.phylo(phylogeny, main = "The True Phylogeny")

This phylogeny follows a speciation model unknown to BEAST2, as two speciation events happened at exactly the same time. Such a phylogeny will have a likelihood of zero for all of the BEAST2 tree priors.

Twinning:

twinning_params <- pirouette::create_twinning_params()
alignment_params <- pirouette::create_alignment_params(
root_sequence = pirouette::create_blocked_dna(length = 100),
rng_seed = 314
)

We select our inference models in two ways:

1. Using the generative model: we use the same site and clock model the alignment was created with. We do have to specify a BEAST2 tree prior that is closest to what we think generated the phylogeny
2. (for non-Windows users only) Using the inference model that has the most evidence (also: marginal likelihood) from a set of inference models. In this example, we pick all combinations of the one correct site model, the one correct clock model and two tree priors.
type run_if measure evidence inference model
generative always TRUE Birth-Death
candidate best_candidate TRUE Yule
experiment_1 <- create_test_gen_experiment(
inference_model = create_test_inference_model(
tree_prior = create_bd_tree_prior()
)
)
if (rappdirs::app_dir()$os != "win") { experiment_2 <- create_test_cand_experiment( inference_model = create_test_inference_model( tree_prior = create_yule_tree_prior() ) ) experiment_1$inference_conditions$do_measure_evidence <- TRUE experiments <- list(experiment_1, experiment_2) twinning_params$twin_evidence_filename <- get_temp_evidence_filename()

pir_params <- create_pir_params(
alignment_params = alignment_params,
experiments = experiments,
twinning_params = twinning_params,
evidence_filename = get_temp_evidence_filename()
)

} else {
experiments <- list(experiment_1)

pir_params <- create_pir_params(
alignment_params = alignment_params,
experiments = experiments,
twinning_params = twinning_params
)
}

Run:

df <- NULL
if (rappdirs::app_dir()os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { df <- pir_run( phylogeny = phylogeny, pir_params = pir_params ) } else { df <- create_test_pir_run_output() } Show as a table: knitr::kable(df) tree inference_model inference_model_weight site_model clock_model tree_prior error_1 error_2 error_3 true generative 0.5 JC69 relaxed_log_normal birth_death 0.1 0.11 0.12 Show as a figure: pirouette::pir_plot(df) ## Appendix library(ggplot2) ### True tree See true tree again: ape::plot.phylo(phylogeny, main = "The True Phylogeny") See the alignment generated from the true tree: if (rappdirs::app_dir()os != "win" &&
is_beast2_installed() &&
is_beast2_ns_pkg_installed()
) {
check_file_exists(pir_paramsalignment_paramsfasta_filename)
ape::image.DNAbin(
ape::read.FASTA(file = pir_paramsalignment_paramsfasta_filename)
)
}

See the posterior trees generated from the true alignment, for the generative model:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { # BEAUti offers the '$(tree)' shorthand notation.
# Here, do what BEAUti does...
treelog_filename <-
pir_params$experiments[[1]]$inference_model$mcmc$treelog$filename treelog_filename <- gsub( x = treelog_filename, pattern = "\\$\$$tree\$$",
replacement = beautier::get_alignment_id(
pir_paramsalignment_paramsfasta_filename
)
)
check_file_exists(treelog_filename)
babette::plot_densitree(tracerer::parse_beast_trees(treelog_filename))
}

See the evidence of the true alignments for the models:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { knitr::kable(readr::read_csv(pir_params$evidence_filename))
}

See the posterior trees generated from the true alignment, for the model with the most evidence:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { experiment <- pir_params$experiments[[2]]
trees_filename <- experiment$inference_model$mcmc$treelog$filename
check_file_exists(trees_filename)
babette::plot_densitree(tracerer::parse_beast_trees(trees_filename))
}

See the posterior parameter estimates generated from the true alignment, for the generative model:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { # A tracelog's filename is set to NA by default. # Here, do what BEAUti does... tracelog_filename <- pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
if (is.na(tracelog_filename)) {
pir_params$experiments[[1]]$inference_model$mcmc$tracelogfilename <- paste0( beautier::get_alignment_id(pir_paramsalignment_params$fasta_filename), ".log" ) } check_file_exists( pir_params$experiments[[1]]$inference_model$mcmc$tracelog$filename
)
df <- tracerer::parse_beast_tracelog_file(
pir_params$experiments[[1]]$inference_model$mcmc$tracelogfilename ) ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line() } See the posterior parameter estimates generated from the true alignment, for the model with the most evidence: if (rappdirs::app_dir()os != "win" &&
is_beast2_installed() &&
is_beast2_ns_pkg_installed()
) {
experiment <- pir_params$experiments[[2]] log_filename <- experiment$inference_model$mcmc$tracelog$filename check_file_exists(log_filename) df <- tracerer::parse_beast_tracelog_file(log_filename) ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line() } ### Twin tree See the twin tree: if (rappdirs::app_dir()$os != "win" &&
is_beast2_installed() &&
is_beast2_ns_pkg_installed()
) {
ape::plot.phylo(
ape::read.tree(pir_params$twinning_params$twin_tree_filename),
main = "The Twin Tree"
)
}

See the alignment generated from the twin tree:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { check_file_exists(pir_params$twinning_paramstwin_alignment_filename) ape::image.DNAbin( ape::read.FASTA(file = pir_paramstwinning_paramstwin_alignment_filename) ) } See the posterior trees generated from the twin alignment, for the generative model: if (rappdirs::app_dir()os != "win" &&
is_beast2_installed() &&
is_beast2_ns_pkg_installed()
) {
trees_filename <- to_twin_filename(
pir_params$experiments[[1]]$inference_model$mcmc$treelogfilename ) check_file_exists(trees_filename) babette::plot_densitree( tracerer::parse_beast_trees(trees_filename) ) } See the evidence of the true alignments for the models: if (rappdirs::app_dir()os != "win" &&
is_beast2_installed() &&
is_beast2_ns_pkg_installed()
) {
twin_evidence_filename <- pir_params$twinning_params$twin_evidence_filename
check_file_exists(twin_evidence_filename)
}

See the posterior trees generated from the twin alignment, for the model with the most evidence:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { trees_filename <- to_twin_filename( pir_params$experiments[[2]]$inference_model$mcmc$treelog$filename
)
check_file_exists(trees_filename)
babette::plot_densitree(tracerer::parse_beast_trees(trees_filename))
}

See the posterior parameter estimates generated from the true alignment, for the generative model:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed()) { log_filename <- to_twin_filename( pir_params$experiments[[2]]$inference_model$mcmc$tracelog$filename
)
check_file_exists(log_filename)
df <- tracerer::parse_beast_tracelog_file(log_filename)
ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}

See the posterior parameter estimates generated from the true alignment, for the model with the most evidence:

if (rappdirs::app_dir()$os != "win" && is_beast2_installed() && is_beast2_ns_pkg_installed() ) { log_filename <- to_twin_filename( pir_params$experiments[[2]]$inference_model$mcmc$tracelog$filename
)
check_file_exists(log_filename)
df <- tracerer::parse_beast_tracelog_file(log_filename)
ggplot(data = df, aes(x = Sample, y = likelihood)) + geom_line()
}