Skip to content

Commit

Permalink
label_mutations() maintenance (#44)
Browse files Browse the repository at this point in the history
* Make `label_mutations()` a generic and define methods

* Add check to ensure call columns exist

* Rename added column

* Update NEWS
  • Loading branch information
arisp99 authored Jul 18, 2022
1 parent a8ec58a commit a2b0750
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 41 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ S3method("names<-",ref_alt_cov_tbl)
S3method("names<-",ref_tbl)
S3method(autoplot,mut_freq)
S3method(autoplot,mut_prev)
S3method(label_mutations,default)
S3method(label_mutations,ref_alt_cov_tbl)
S3method(mutation_frequency,default)
S3method(mutation_frequency,ref_alt_cov_tbl)
S3method(mutation_prevalence,default)
Expand Down Expand Up @@ -94,4 +96,5 @@ importFrom(rlang,caller_env)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,expr)
importFrom(stringr,str_length)
importFrom(tibble,tbl_sum)
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# miplicorn (development version)

- The column created by `label_mutations()` has been renamed from
`ans_der_indel` to `mutation_label` (#44).
- `label_mutations()` more clearly signals when the computation can not be
completed. This can occur because of missing variables or an incorrect input
table (#44).
- Improve condition signalling by using `{cli}`. Messages are now properly
pluralized and display the erroring function (#42).
- Improve performance of amino acid conversion functions (#40).
Expand Down
64 changes: 51 additions & 13 deletions R/label_mutations.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,54 @@
#' @param .data The data set containing REF and ALT calls.
#' @param .before,.after `r lifecycle::badge("experimental")`
#' <[`tidy-select`][dplyr_tidy_select]> Optionally, control where new columns
#' should appear (the default is to add to the right hand side). See
#' should appear (the default is to add to the right-hand side). See
#' [`dplyr::relocate()`][dplyr::relocate()] for more details.
#'
#' @return
#' The object `.data` with an added column that indicates the mutation type of
#' each row. The column is added to the right-hand side by default, but this may
#' be controlled by the `.before` and `.after` arguments.
#'
#' @export
#' @examples
#' data <- tibble::tribble(
#' ~sample, ~pos, ~ref, ~alt, ~ref_umi_count, ~alt_umi_count, ~coverage,
#' "S1", "1049838", "A", "G", 54, 10, 64,
#' "S2", "801498", "G", "A", 15, 0, 15,
#' "S3", "625403", "T", "C", 0, 15, 15,
#' "S4", "748165", "GA", "G", 2, 18, 20,
#' "S5", "487199", "G", "ATC", 0, 10, 10
#' # Read example data
#' data <- read_tbl_ref_alt_cov(
#' miplicorn_example("reference_AA_table.csv"),
#' miplicorn_example("alternate_AA_table.csv"),
#' miplicorn_example("coverage_AA_table.csv"),
#' gene == "atp6"
#' )
#'
#' # Add ref and alt calls to data
#' sequences <- c("A", "T", "C", "G", "AT", "TC", "TGC")
#' data <- dplyr::mutate(
#' data,
#' ref = sample(sequences, size = nrow(data), replace = TRUE),
#' alt = sample(sequences, size = nrow(data), replace = TRUE)
#' )
#'
#' # Label the mutations
#' label_mutations(data)
#' label_mutations(data, .after = alt)
#' label_mutations(data, .before = pos)
label_mutations <- function(.data, .before = NULL, .after = NULL) {
UseMethod("label_mutations")
}

#' @export
label_mutations.default <- function(.data, .before = NULL, .after = NULL) {
cli_abort(c(
"Cannot label the mutations of this data object.",
"i" = "Object must be a reference, alternate, coverage table.",
"i" = "Object must additionally contain reference and alternate calls."
))
}

#' @importFrom stringr str_length
#' @rdname label_mutations
#' @export
label_mutations.ref_alt_cov_tbl <- function(.data,
.before = NULL,
.after = NULL) {
.before <- enquo(.before)
.after <- enquo(.after)

Expand All @@ -43,13 +73,21 @@ label_mutations <- function(.data, .before = NULL, .after = NULL) {
cli_abort("Must supply only one of `.before` and `.after`.")
}

# Ensure that ref and alt call columns exist
if (!all(c("ref", "alt") %in% colnames(.data))) {
cli_abort(c(
"Data object is missing reference and alternate calls.",
"x" = "Call columns must be named `ref` and `alt`, respectively."
))
}

# Label the mutations
dplyr::mutate(.data,
ans_der_indel = dplyr::case_when(
mutation_label = dplyr::case_when(
ref_umi_count > alt_umi_count ~ "ref",
stringr::str_length(ref) == 1 & stringr::str_length(alt) == 1 & alt_umi_count > ref_umi_count ~ "alt",
stringr::str_length(ref) < stringr::str_length(alt) & alt_umi_count > ref_umi_count ~ "ins",
stringr::str_length(ref) > stringr::str_length(alt) & alt_umi_count > ref_umi_count ~ "del",
str_length(ref) == 1 & str_length(alt) == 1 & alt_umi_count > ref_umi_count ~ "alt",
str_length(ref) < str_length(alt) & alt_umi_count > ref_umi_count ~ "ins",
str_length(ref) > str_length(alt) & alt_umi_count > ref_umi_count ~ "del",
TRUE ~ NA_character_
),
.before = !!.before,
Expand Down
33 changes: 24 additions & 9 deletions man/label_mutations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/_snaps/label_mutations.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
# label mutations needs a ref alt cov table

Cannot label the mutations of this data object.
i Object must be a reference, alternate, coverage table.
i Object must additionally contain reference and alternate calls.

# call columns must exist

Data object is missing reference and alternate calls.
x Call columns must be named `ref` and `alt`, respectively.

# error if control position with both .before and .after

Must supply only one of `.before` and `.after`.
Expand Down
52 changes: 33 additions & 19 deletions tests/testthat/test-label_mutations.R
Original file line number Diff line number Diff line change
@@ -1,33 +1,47 @@
data <- tibble::tribble(
~sample, ~ref, ~alt, ~ref_umi_count, ~alt_umi_count,
"S1", "A", "G", 54, 10,
"S2", "G", "A", 15, 0,
"S3", "T", "C", 0, 15,
"S4", "GA", "G", 2, 18,
"S5", "G", "ATC", 0, 10
)
data <- new_ref_alt_cov_tbl(tibble::tribble(
~sample, ~ref, ~alt, ~ref_umi_count, ~alt_umi_count, ~coverage,
"S1", "A", "G", 54, 10, 64,
"S2", "G", "A", 15, 0, 15,
"S3", "T", "C", 0, 15, 15,
"S4", "GA", "G", 2, 18, 20,
"S5", "G", "ATC", 0, 10, 10
))

res <- tibble::tribble(
~sample, ~ref, ~alt, ~ref_umi_count, ~alt_umi_count, ~ans_der_indel,
"S1", "A", "G", 54, 10, "ref",
"S2", "G", "A", 15, 0, "ref",
"S3", "T", "C", 0, 15, "alt",
"S4", "GA", "G", 2, 18, "del",
"S5", "G", "ATC", 0, 10, "ins"
)
res <- new_ref_alt_cov_tbl(tibble::tribble(
~sample, ~ref, ~alt, ~ref_umi_count, ~alt_umi_count, ~coverage, ~mutation_label,
"S1", "A", "G", 54, 10, 64, "ref",
"S2", "G", "A", 15, 0, 15, "ref",
"S3", "T", "C", 0, 15, 15, "alt",
"S4", "GA", "G", 2, 18, 20, "del",
"S5", "G", "ATC", 0, 10, 10, "ins"
))

test_that("label mutations needs a ref alt cov table", {
expect_error(label_mutations(tibble::tibble(a = 1)))
expect_snapshot_error(label_mutations(tibble::tibble(a = 1)))
})

test_that("call columns must exist", {
expect_error(label_mutations(new_ref_alt_cov_tbl(tibble::tibble(a = 3))))
expect_error(label_mutations(new_ref_alt_cov_tbl(tibble::tibble(ref = 3))))
expect_error(label_mutations(new_ref_alt_cov_tbl(tibble::tibble(alt = 3))))
expect_snapshot_error(
label_mutations(new_ref_alt_cov_tbl(tibble::tibble(a = 3)))
)
})

test_that("labels mutations correctly", {
expect_equal(label_mutations(data), res)
})

test_that("control position", {
test_that("can control position", {
expect_equal(
label_mutations(data, .before = ref),
dplyr::relocate(res, ans_der_indel, .before = ref)
dplyr::relocate(res, mutation_label, .before = ref)
)
expect_equal(
label_mutations(data, .after = sample),
dplyr::relocate(res, ans_der_indel, .after = sample)
dplyr::relocate(res, mutation_label, .after = sample)
)
})

Expand Down

0 comments on commit a2b0750

Please sign in to comment.