--- title: "Power analysis - within model" author: "Bob Verity" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{"Power analysis - within model"} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, echo=FALSE} set.seed(1) library(MALECOT) ``` One of the major advantages of simulated data is that we can test the power of the program under different scenarios, hopefully allowing us to design field and lab studies that are powered to detect a signal. MALECOT is a Bayesian program and therefore power in this context is a Bayesian equivalent of traditional power, defined as the posterior probability of the true (simulated) value averaged over a large number of simulations drawn from the same model. For example, if the power to detect population structure is 0.9 then that means the true grouping will tend to have a posterior probability of 0.9. For those interested in the headline results: #### bi-allelic data *under realistic assumptions - including a skewed allele frequency distribution, 5% genotyping error and a mean COI of 2 per subpopulation - power analysis indicates that 100 independent loci are sufficient to detect population structure with ~95% posterior probability with 20 samples per subpopulation.* ## Simulation details The following parameter ranges were explored when simulating data: * Always assuming K = 5 subpopulations * Sample size (*n*) in the range [25, 100]. Note that this is the total sample size over all 5 subpopulations, meaning the number of samples per subpopulation is 1/5th this value * Loci (*L*) in the range [10, 100] * Mean COI per subpopulation in {1.2, 2.0, 5.0}, representing low, moderate and high transmission intensity. The assumed COI distribution is Poisson * Shape parameter of the prior on allele frequencies (*lambda1*) in {1, 5, 10}, representing different levels of skewed allele frequency distribution * Proportion erronious genotyping calls (both false homozygote and false heterozygote) in {0.00, 0.05, 0.10} The three priors on allele frequencies correspond to the following distributions: ```{r, echo=FALSE, fig.width=10, fig.height=4} library(ggplot2) library(gridExtra) # produce prior plots plot1 <- prior_plot <- plot_prior_p(lambda = c(1, 1)) + ggtitle("lambda1 = 1") plot2 <- prior_plot <- plot_prior_p(lambda = c(5, 1)) + ggtitle("lambda1 = 5") plot3 <- prior_plot <- plot_prior_p(lambda = c(10, 1)) + ggtitle("lambda1 = 10") grid.arrange(plot1, plot2, plot3, nrow = 1) ``` Simulated datasets were analysed by MCMC with the following parameters: * 10,000 burn-in iterations. Test for convergence automatically every 100 iterations * 10,000 sampling iterations * Single temperature rung (no thermodynamic MCMC) Each simulation parameter set was repeated 50 times, and results were averaged over simulations. ## Power to detect population structure ------ ```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} library(tidyr) library(plotly) # load power analysis results df <- malecot_file("power_within_results_mean.rds") df$power_structure <- round(df$power_structure, 2) # get loci into wide format df <- spread(df, L, power_structure) names(df)[-(1:4)] <- paste0("L", names(df)[-(1:4)]) # define vectors of values COI_mean <- c(1.2, 2, 5) loci <- c(10, 20, 50, 100) ``` ### Error = 0

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0 lambda1 <- 1 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 1", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0 lambda1 <- 5 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 5", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0 lambda1 <- 10 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 10", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```

------ ### Error = 0.05

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0.05 lambda1 <- 1 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 1", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0.05 lambda1 <- 5 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 5", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0.05 lambda1 <- 10 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 10", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ``` ------ ### Error = 0.10

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0.10 lambda1 <- 1 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 1", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0.10 lambda1 <- 5 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 5", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.width=8, fig.height=5} # choose parameters to plot error <- 0.10 lambda1 <- 10 # create basic plot and add multiple traces p <- plot_ly() for (i in 1:3) { df_sub <- df[df$error == error & df$lambda1 == lambda1 & df$COI_mean == COI_mean[i],] for (j in 1:4) { p <- add_trace(p, x = unique(df$n), y = df_sub[,j+4], type = 'scatter', mode = 'lines+markers', visible = i == 1, name = paste0("loci = ", loci[j])) } } # create dropdown buttons as list button_list <- list() for (i in 1:3) { v <- rep(FALSE,3) v[i] <- TRUE button_list[[i]] <- list(method = "restyle", args = list("visible", as.list(rep(v, each = 4))), label = paste0("COI_mean = ", COI_mean[i])) } # finalise layout plto plot p <- layout(p, title = "lambda1 = 10", yaxis = list(title = "probability correct assignment", range = c(0,1)), xaxis = list(title = "sample size", range = c(0,250)), updatemenus = list( list( yanchor = 'auto', buttons = button_list ) ) ) # print plot p ```