---
title: "Discrete sensitivity analysis"
output: rmarkdown::html_vignette
vignette: >
 %\VignetteIndexEntry{Discrete sensitivity analysis}
 %\VignetteEngine{knitr::rmarkdown}
 %\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}
# Here, we set default options for our markdown file
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  tidy = "styler",
  fig.width = 7,
  fig.height = 7,
  message = FALSE,
  warning = FALSE
)

# The following two variables control which chunks are run. Either the tests are
# run and the stored images not printed or the tests can not be run and stored
# images printed. To run the tests, set eval_tests to TRUE. To print the stored
# images, set eval_tests to FALSE.
eval_tests <- FALSE
eval_images <- !eval_tests

library(coiaf)
library(ggplot2)
library(patchwork)
```

## Introduction

In this analysis file, we aim to understand the effect of varying parameters on
our COI framework. We will examine both simulation and estimation parameters.
The parameters that we will examine are:

* `coverage`: Coverage at each locus.
* `loci`: The number of loci.
* `alpha`: Shape parameter of the symmetric Dirichlet prior on strain proportions.
* `overdispersion`: The extent to which counts are over-dispersed relative to
the binomial distribution. Counts are Beta-binomially distributed, with the beta
distribution having shape parameters $p/\text{overdispersion}$ and $(1-p) /
\text{overdispersion}$.
* `relatedness`: The probability that a strain in mixed infections is related to
another.
* `epsilon`: The probability of a single read being miscalled as the other allele.
Applies in both directions.
* `coi`: The complexity of infection of the sample.
* `seq_error`: The level of sequencing error that is assumed.
* `use_bins`: Whether or not to group data before estimating the COI.
* `bin_size`: The minimum size of each bin of data.

### Default parameters

| Parameter       | Default Value         |
|:---------------:|:---------------------:|
| COI             | `3`                   |
| PLMAF           | `runif(1000, 0, 0.5)` |
| Coverage        | `200`                 |
| Alpha           | `1`                   |
| Overdispersion  | `0`                   |
| Relatedness     | `0`                   |
| Epsilon         | `0`                   |
| Sequence Error  | `0.01`                |
| Use Bins        | `FALSE`               |
| Bin Size        | `20`                  |


### Setting our PLMAF
```{r initialization, eval = eval_tests}
# Set the seed
set.seed(1)

# Define number of loci, and distribution of minor allele frequencies
L <- 1e3
p <- stats::rbeta(L, 1, 5)
p[p > 0.5] <- 1 - p[p > 0.5]
```

### Overall performance
```{r overall, eval = eval_tests}
toverall <- disc_sensitivity(
  coi = 1:20,
  repetitions = 100,
  plmaf = p,
  coverage = 200,
  seq_error = 0,
  coi_method = c("variant", "frequency")
)

toverall_image <- sensitivity_plot(
  data = toverall,
  dims = c(1, 2),
  result_type = "disc",
  title = "Predicted COI",
  sub_title = c("Variant Method", "Frequency Method")
)

toverall_error <- error_plot(
  data = toverall,
  fill = "coi_method",
  legend_title = "COI Method",
  title = "Error",
  fill_levels = c("Variant Method", "Frequency Method")
)

toverall_fig <- toverall_image / toverall_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

toverall_fig
```

```{r overall image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "toverall.png")
)
```

## Simulation parameters

### Coverage
```{r coverage method 1, eval = eval_tests}
tcoverage_1 <- disc_sensitivity(
  coi = 2:20,
  coverage = c(50, 100, 250, 500, 1000, 2000),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = "variant"
)

tcoverage_image_1 <- sensitivity_plot(
  data = tcoverage_1,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 3),
  sub_title = paste0("Coverage = ", c(50, 100, 250, 500, 1000, 2000))
)

tcoverage_error_1 <- error_plot(
  tcoverage_1,
  fill = "coverage",
  legend_title = "Coverage",
  title = "Error"
)

tcoverage_fig_1 <- tcoverage_image_1 / tcoverage_error_1 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(2, 1)) +
  theme(legend.position = "bottom")

tcoverage_fig_1
```

```{r coverage method 2, eval = eval_tests}
tcoverage_2 <- disc_sensitivity(
  coi = 2:20,
  coverage = c(50, 100, 250, 500, 1000, 2000),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p, coi_method = "frequency"
)

tcoverage_image_2 <- sensitivity_plot(
  data = tcoverage_2,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 3),
  sub_title = paste0("Coverage = ", c(50, 100, 250, 500, 1000, 2000))
)

tcoverage_error_2 <- error_plot(
  tcoverage_2,
  fill = "coverage",
  legend_title = "Coverage",
  title = "Error"
)

tcoverage_fig_2 <- tcoverage_image_2 / tcoverage_error_2 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(2, 1)) +
  theme(legend.position = "bottom")

tcoverage_fig_2
```

```{r coverage image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tcoverage_1.png")
)

knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tcoverage_2.png")
)
```

### Loci
```{r loci method 1, eval = eval_tests}
# Set the range over which we will iterate
loci <- c(1e2, 1e3, 1e4)

# For each loci, reset the PLMAF and then run
bloci <- lapply(loci, function(new_L) {
  new_p <- rbeta(new_L, 1, 5)
  new_p[new_p > 0.5] <- 1 - new_p[new_p > 0.5]

  inner_tloci <- disc_sensitivity(
    coi = 2:20,
    repetitions = 100,
    plmaf = new_p,
    seq_error = 0.01,
    coi_method = "variant"
  )
  inner_tloci$param_grid$loci <- new_L
  return(inner_tloci)
})

# Extract the relevant information for each output: predicted_coi, probability,
# param_grid, and boot_error
pc <- do.call(cbind, lapply(bloci, function(test) {
  return(test$predicted_coi)
}))
pb <- do.call(cbind, lapply(bloci, function(test) {
  return(test$probability)
}))
pg <- do.call(rbind, lapply(bloci, function(test) {
  return(test$param_grid)
}))
be <- do.call(rbind, lapply(bloci, function(test) {
  return(test$boot_error)
}))

# Fix the naming for predicted_coi
num_cois <- length(unique(pg$coi))
num_repeat_cois <- length(pg$coi) / num_cois
names(pc) <- paste(
  "coi",
  pg$coi,
  rep(seq(num_repeat_cois), each = num_cois),
  sep = "_"
)

# Create the output
tloci_1 <- list(
  predicted_coi = pc,
  probability   = pb,
  param_grid    = pg,
  boot_error    = be
)

# Plot
tloci_image_1 <- sensitivity_plot(
  data = tloci_1,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(1, 3),
  sub_title = paste0("Loci = ", c(1e2, 1e3, 1e4))
)

# Add a loci column
tloci_1$boot_error$loci <- rep(
  c(1e2, 1e3, 1e4),
  each = length(unique(tloci_1$boot_error$coi))
)
tloci_error_1 <- error_plot(
  tloci_1,
  fill = "loci",
  legend_title = "Loci",
  title = "Error"
)

tloci_fig_1 <- tloci_image_1 / tloci_error_1 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

tloci_fig_1
```

```{r loci method 2, eval = eval_tests}
# Set the range over which we will iterate
loci <- c(1e2, 1e3, 1e4)

# For each loci, reset the PLMAF and then run
bloci <- lapply(loci, function(new_L) {
  new_p <- rbeta(new_L, 1, 5)
  new_p[new_p > 0.5] <- 1 - new_p[new_p > 0.5]

  inner_tloci <- disc_sensitivity(
    coi = 2:20,
    repetitions = 100,
    plmaf = new_p,
    seq_error = 0.01,
    coi_method = "frequency"
  )
  inner_tloci$param_grid$loci <- new_L
  return(inner_tloci)
})

# Extract the relevant information for each output: predicted_coi, probability,
# param_grid, and boot_error
pc <- do.call(cbind, lapply(bloci, function(test) {
  return(test$predicted_coi)
}))
pb <- do.call(cbind, lapply(bloci, function(test) {
  return(test$probability)
}))
pg <- do.call(rbind, lapply(bloci, function(test) {
  return(test$param_grid)
}))
be <- do.call(rbind, lapply(bloci, function(test) {
  return(test$boot_error)
}))

# Fix the naming for predicted_coi
num_cois <- length(unique(pg$coi))
num_repeat_cois <- length(pg$coi) / num_cois
names(pc) <- paste(
  "coi",
  pg$coi,
  rep(seq(num_repeat_cois), each = num_cois),
  sep = "_"
)

# Create the output
tloci_2 <- list(
  predicted_coi = pc,
  probability   = pb,
  param_grid    = pg,
  boot_error    = be
)

# Plot
tloci_image_2 <- sensitivity_plot(
  data = tloci_2,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(1, 3),
  sub_title = paste0("Loci = ", c(1e2, 1e3, 1e4))
)

# Add a loci column
tloci_2$boot_error$loci <- rep(
  c(1e2, 1e3, 1e4),
  each = length(unique(tloci_2$boot_error$coi))
)
tloci_error_2 <- error_plot(
  tloci_2,
  fill = "loci",
  legend_title = "Loci",
  title = "Error"
)

tloci_fig_2 <- tloci_image_2 / tloci_error_2 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

tloci_fig_2
```

```{r loci image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tloci_1.png")
)

knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tloci_2.png")
)
```

### Alpha
```{r alpha method 1, eval = eval_tests}
talpha_1 <- disc_sensitivity(
  coi = 2:20,
  alpha = seq(0.01, 5.51, 0.5),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = "variant"
)

talpha_image_1 <- sensitivity_plot(
  data = talpha_1,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0("Alpha = ", seq(0.01, 5.51, 0.5))
)

talpha_error_1 <- error_plot(
  talpha_1,
  fill = "alpha",
  legend_title = "Alpha",
  title = "Error"
)

talpha_fig_1 <- talpha_image_1 / talpha_error_1 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

talpha_fig_1
```

```{r alpha method 2, eval = eval_tests}
talpha_2 <- disc_sensitivity(
  coi = 2:20,
  alpha = seq(0.01, 5.51, 0.5),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = "frequency"
)

talpha_image_2 <- sensitivity_plot(
  data = talpha_2,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0("Alpha = ", seq(0.01, 5.51, 0.5))
)

talpha_error_2 <- error_plot(
  talpha_2,
  fill = "alpha",
  legend_title = "Alpha",
  title = "Error"
)

talpha_fig_2 <- talpha_image_2 / talpha_error_2 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

talpha_fig_2
```

```{r alpha image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "talpha_1.png")
)

knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "talpha_2.png")
)
```

### Overdispersion
```{r overdispersion, eval = eval_tests}
tover <- disc_sensitivity(
  coi = 2:20,
  overdispersion = seq(0, 0.25, 0.05),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

tover_image <- sensitivity_plot(
  data = tover,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0(
    rep(c("Var, ", "Freq, "), each = 6),
    "Dispersion = ",
    seq(0, 0.25, 0.05)
  )
)

tover_error <- error_plot(
  tover,
  fill = "overdispersion",
  legend_title = "Overdispersion",
  title = "Error",
  second_fill = "coi_method"
)

tover_fig <- tover_image / tover_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

tover_fig
```

```{r overdispersion image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tover.png")
)
```

### Relatedness
```{r relatedness, eval = eval_tests}
trelated <- disc_sensitivity(
  coi = 2:20,
  relatedness = seq(0, 0.5, 0.1),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

trelated_image <- sensitivity_plot(
  data = trelated,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste(
    paste0("Relatedness =  ", seq(0, 0.5, 0.1)),
    rep(c("Variant", "Frequency"), each = 6),
    sep = ", "
  )
)

trelated_error <- error_plot(
  trelated,
  fill = "relatedness",
  legend_title = "Related",
  title = "Error",
  second_fill = "coi_method"
)

trelated_fig <- trelated_image / trelated_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(6, 1)) +
  theme(legend.position = "bottom")

trelated_fig
```

```{r relatedness image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "trelated.png")
)
```

### Epsilon
```{r epsilon, eval = eval_tests}
tepsilon <- disc_sensitivity(
  coi = 2:20,
  epsilon = seq(0, 0.025, 0.005),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

tepsilon_image <- sensitivity_plot(
  data = tepsilon,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0(
    rep(c("Var, ", "Freq, "), each = 6),
    "Epsilon = ",
    seq(0, 0.025, 0.005)
  )
)

tepsilon_error <- error_plot(
  tepsilon,
  fill = "epsilon",
  legend_title = "Epsilon",
  title = "Error",
  second_fill = "coi_method"
)

tepsilon_fig <- tepsilon_image / tepsilon_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(3, 1)) +
  theme(legend.position = "bottom")

tepsilon_fig
```

```{r epsilon image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tepsilon.png")
)
```

## Estimation parameters

### COI
```{r coi, eval = eval_tests}
tcoi <- disc_sensitivity(
  coi = 2:40,
  max_coi = 40,
  repetitions = 100,
  plmaf = p,
  seq_error = 0.01,
  coi_method = c("variant", "frequency")
)

tcoi_image <- sensitivity_plot(
  data = tcoi,
  dims = c(1, 2),
  result_type = "disc",
  title = "Predicted COI",
  sub_title = c("Variant Method", "Frequency Method")
)

tcoi_error <- error_plot(
  tcoi,
  fill = "coi_method",
  legend_title = "COI Method",
  title = "Error",
  fill_levels = c("Variant Method", "Frequency Method")
)

tcoi_fig <- tcoi_image / tcoi_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

tcoi_fig
```

```{r coi image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tcoi.png")
)
```

### Sequencing error
```{r seq error, eval = eval_tests}
tseq <- disc_sensitivity(
  coi = 2:20,
  epsilon = 0.01,
  seq_error = seq(0, 0.10, 0.02),
  repetitions = 100,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

tseq_image <- sensitivity_plot(
  data = tseq,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0(
    rep(c("Variant, ", "Frequency, "), each = 6),
    "Seq Error = ",
    seq(0, 0.12, 0.02)
  )
)

tseq_error <- error_plot(
  tseq,
  fill = "seq_error",
  legend_title = "Sequence Error",
  title = "Error",
  second_fill = "coi_method"
)

tseq_fig <- tseq_image / tseq_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

tseq_fig
```

```{r seq error image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tseq.png")
)
```

### Bins
```{r bins, eval = eval_tests}
tbin <- disc_sensitivity(
  coi = 2:20,
  use_bins = c(TRUE, FALSE),
  plmaf = p,
  repetitions = 100,
  coi_method = c("variant", "frequency")
)

tbin_image <- sensitivity_plot(
  data = tbin,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 2),
  sub_title = paste(
    c("Variant Method,", "Frequency Method,"),
    "Bins =",
    rep(c(TRUE, FALSE), each = 2)
  )
)

tbin_error <- error_plot(
  tbin,
  fill = "use_bins",
  fill_levels = as.character(c(TRUE, FALSE)),
  legend_title = "COI Method",
  title = "Error",
  second_fill = "coi_method"
)

tbin_fig <- tbin_image / tbin_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(3, 1)) +
  theme(legend.position = "bottom")

tbin_fig
```

```{r bin image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tbin.png")
)
```

### Bin size
```{r bin size, eval = eval_tests}
tbin_size <- disc_sensitivity(
  coi = 2:20,
  use_bins = TRUE,
  bin_size = seq(10, 100, 30),
  plmaf = p,
  repetitions = 100,
  seq_error = 0.01,
  coi_method = c("variant", "frequency")
)

tbin_size_image <- sensitivity_plot(
  data = tbin_size,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 4),
  sub_title = rep(paste("Bin Size =", seq(10, 100, 30)), 2)
)

tbin_size_error <- error_plot(
  tbin_size,
  fill = "bin_size",
  fill_levels = as.character(seq(10, 100, 30)),
  legend_title = "COI Method",
  title = "Error"
)

tbin_size_fig <- tbin_size_image / tbin_size_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(3, 1)) +
  theme(legend.position = "bottom")

tbin_size_fig
```

```{r bin size image, eval = eval_images, echo = F, out.width = "75%"}
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tbin_size.png")
)
```


```{r save results, eval = eval_tests, echo = FALSE, inlcude = FALSE}
# Below, we save the runs we performed for future use
saved_analysis <- list(
  toverall, tcoverage_1, tcoverage_2, tloci_1, tloci_2, talpha_1, talpha_2,
  tover, trelated, tepsilon, tcoi, tseq, tbin, tbin_size
)

names(saved_analysis) <- list(
  "toverall", "tcoverage_1", "tcoverage_2", "tloci_1", "tloci_2",
  "talpha_1", "talpha_2", "tover", "trelated", "tepsilon", "tcoi",
  "tseq", "tbin", "tbin_size"
)

saveRDS(
  saved_analysis,
  file = here::here("vignettes", "saved_analysis_discrete.rds")
)

# Here, we update all the stored images
update_images <- list(
  toverall_fig, tcoverage_fig_1, tcoverage_fig_2, tloci_fig_1, tloci_fig_2,
  talpha_fig_1, talpha_fig_2, tover_fig, trelated_fig, tepsilon_fig,
  tcoi_fig, tseq_fig, tbin_fig, tbin_size_fig
)
names(update_images) <- names(saved_analysis)

for (i in seq_len(length(update_images))) {
  # Save vignette images
  ggsave(
    plot = update_images[[i]],
    filename = here::here(
      "vignettes",
      "figures",
      "discrete",
      glue::glue("{ names(update_images)[i] }.png")
    ),
    width = 7,
    height = 7,
    units = "in",
    device = "png",
    dpi = "screen"
  )
}
```