Skip to content

Commit

Permalink
Add option to use median when computing supercellcyto marker expression
Browse files Browse the repository at this point in the history
  • Loading branch information
ghar1821 committed Jan 29, 2024
1 parent 7f2a3a6 commit ad49da6
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 18 deletions.
64 changes: 49 additions & 15 deletions R/runSuperCellCyto.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@
#' scheme when processing multiple samples in parallel.
#' Defaults to FALSE.
#' Refer to additional details section below on parallel processing for more details.
#' @param aggregation_method A character string specifying the method to be used for
#' calculating the marker expression of the supercells.
#' Accepted values are "mean" and "median".
#' Based on the choice, the supercells' marker expression are computed by computing
#' either the mean or median of the marker expression of the cells therein.
#' The default value is "mean".
#' If any other value is provided, the function will return an error.
#'
#' @section Parallel Processing:
#' SuperCellCyto can process multiple samples simultaneously in parallel.
Expand Down Expand Up @@ -156,6 +163,7 @@ runSuperCellCyto <- function(dt,
markers,
sample_colname,
cell_id_colname,
aggregation_method = c("mean", "median"),
gam = 20,
k_knn = 5,
BPPARAM = SerialParam(),
Expand Down Expand Up @@ -204,7 +212,10 @@ runSuperCellCyto <- function(dt,
n_pc <- 10
}

supercell_res <- bplapply(names(matrix_per_samp), function(sample_name, gam, k_knn) {
# How to aggregate the cells in supercells to get expression matrix?
aggregation_method <- match.arg(aggregation_method)

supercell_res <- bplapply(names(matrix_per_samp), function(sample_name, gam, k_knn, aggregation_method) {
mt <- matrix_per_samp[[sample_name]]

# ---- Run supercell ----
Expand All @@ -220,23 +231,46 @@ runSuperCellCyto <- function(dt,
)

# ---- Calculate supercell expression matrix ----
supercell_exp_mat <- data.table(
t(
as.matrix(
supercell_GE(
ge = mt,
groups = res$membership
if (aggregation_method == "mean") {
supercell_exp_mat <- data.table(
t(
as.matrix(
supercell_GE(
ge = mt,
groups = res$membership
)
)
)
)
)
supercell_exp_mat[[sample_colname]] <- sample_name
supercell_exp_mat[[sample_colname]] <- sample_name

# Create a unique supercell id concatenating the sample name
supercell_exp_mat[["SuperCellId"]] <- paste0(
"SuperCell_",
seq(1, nrow(supercell_exp_mat)), "_Sample_", sample_name
)
} else if (aggregation_method == "median") {
supercell_exp_mat <- data.table(t(as.matrix(mt)))

# grab it here now!
markers_name <- colnames(supercell_exp_mat)

supercell_exp_mat$cell_id <- colnames(mt)
supercell_membership <- data.table(
cell_id = names(res$membership),
SuperCellId = paste0("SuperCell_", res$membership, "_Sample_", sample_name)
)
supercell_exp_mat <- merge.data.table(
supercell_exp_mat,
supercell_membership,
by = "cell_id"
)

# this is where you calculate the expression
supercell_exp_mat <- supercell_exp_mat[, lapply(.SD, median), .SDcols = markers, by='SuperCellId']
}


# Create a unique supercell id concatenating the sample name
supercell_exp_mat[["SuperCellId"]] <- paste0(
"SuperCell_",
seq(1, nrow(supercell_exp_mat)), "_Sample_", sample_name
)

# ---- Create supercell and cell mapping ----
supercell_cell_map <- data.table(
Expand All @@ -255,7 +289,7 @@ runSuperCellCyto <- function(dt,
supercell_cell_map = supercell_cell_map
))

}, gam = gam, k_knn = k_knn, BPPARAM = BPPARAM)
}, gam = gam, k_knn = k_knn, aggregation_method = aggregation_method, BPPARAM = BPPARAM)

# Now the messy reshaping so each element is not the output for a sample
# but either a supercell object, expression matrix or supercell cell map
Expand Down
9 changes: 9 additions & 0 deletions man/runSuperCellCyto.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

96 changes: 93 additions & 3 deletions tests/testthat/test-runSuperCellCyto_correctness.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Test the correctness of runSuperCellCyto

library(data.table)

test_that("Output is a list", {
cyto_dat <- simCytoData(10, rep(1000, 2))

Expand Down Expand Up @@ -87,14 +89,13 @@ test_that("Data with small number of markers can still be processed", {
nmarkers <- 7
cyto_dat <- simCytoData(nmarkers = nmarkers)

expect_error(
expect_no_error(
runSuperCellCyto(
dt = cyto_dat,
markers = paste0("Marker_", seq_len(nmarkers)),
sample_colname = "Sample",
cell_id_colname = "Cell_Id"
),
NA
)
)
})

Expand Down Expand Up @@ -196,5 +197,94 @@ test_that("List containing supercell objects are ordered correctly", {

})

test_that("Mean is used for calculating supercells' marker expressions", {
cyto_dat <- simCytoData(ncells = rep(1000, 2))

markers <- paste0("Marker_", seq_len(10))

supercells <- runSuperCellCyto(
dt = cyto_dat,
markers = markers,
sample_colname = "Sample",
cell_id_colname = "Cell_Id"
)

actual_exp_mat <- supercells$supercell_expression_matrix

cell_mapping <- supercells$supercell_cell_map

# hand calculation of the supercell expression matrix using mean
cyto_dat <- merge.data.table(
cyto_dat,
cell_mapping,
by.x = "Cell_Id",
by.y = "CellId"
)

expected_mean_exp <- cyto_dat[, lapply(.SD, mean), .SDcols = markers, by='SuperCellID']

# sort just for comparison of all.equal
expected_mean_exp <- expected_mean_exp[order(SuperCellID)]
actual_exp_mat <- actual_exp_mat[order(SuperCellId)]

expect_true(all.equal(
target = expected_mean_exp[, markers, with = FALSE],
current = actual_exp_mat[, markers, with = FALSE])
)

})

test_that("Median is used for calculating supercells' marker expressions", {
cyto_dat <- simCytoData(ncells = rep(1000, 2))

markers <- paste0("Marker_", seq_len(10))

supercells <- runSuperCellCyto(
dt = cyto_dat,
markers = markers,
sample_colname = "Sample",
cell_id_colname = "Cell_Id",
aggregation_method = "median"
)

actual_exp_mat <- supercells$supercell_expression_matrix

cell_mapping <- supercells$supercell_cell_map

# hand calculation of the supercell expression matrix using mean
cyto_dat <- merge.data.table(
cyto_dat,
cell_mapping,
by.x = "Cell_Id",
by.y = "CellId"
)

expected_mean_exp <- cyto_dat[, lapply(.SD, median), .SDcols = markers, by='SuperCellID']

# sort just for comparison of all.equal
expected_mean_exp <- expected_mean_exp[order(SuperCellID)]
actual_exp_mat <- actual_exp_mat[order(SuperCellId)]

expect_true(all.equal(
target = expected_mean_exp[, markers, with = FALSE],
current = actual_exp_mat[, markers, with = FALSE])
)

})

test_that("If not median or mean is used for calculating supercells' marker expressions", {
cyto_dat <- simCytoData(ncells = rep(1000, 2))

expect_error(
runSuperCellCyto(
dt = cyto_dat,
markers = paste0("Marker_", seq_len(10)),
sample_colname = "Sample",
cell_id_colname = "Cell_Id",
aggregation_method = "sum"
)
)
})



0 comments on commit ad49da6

Please sign in to comment.