Skip to content

Commit

Permalink
Update scale_mgm params and vignette,
Browse files Browse the repository at this point in the history
  • Loading branch information
Gene233 committed Mar 29, 2024
1 parent dfe6155 commit 14e5f63
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 33 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@

# smartid 0.99.5

* Update `scale_mgm()` function, using pooled SD.
* Update `scale_mgm()` function using pooled SD, add details for scale function.
47 changes: 25 additions & 22 deletions R/scale_mgm.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,38 @@
#'
#' @param expr matrix
#' @param label a vector of group label
#' @param pooled.sd logical, if to use pooled SD for scaling
#'
#' @return scaled matrix
#' @export
#'
#' @examples
#' scale_mgm(matrix(rnorm(100), 10), label = rep(letters[1:2], 5))
scale_mgm <- function(expr, label) {
# ## compute overall sds
# sds <- sparseMatrixStats::rowSds(expr, na.rm = TRUE)
scale_mgm <- function(expr, label, pooled.sd = FALSE) {
if (pooled.sd) {
## compute pooled sds
sds <- vapply(
unique(label), \(i)
sparseMatrixStats::rowVars(expr[, label == i, drop = FALSE],
na.rm = TRUE
),
rep(1, nrow(expr))
) # get vars of each group
ng <- table(label)[unique(label)] # get group sizes in the same order
sds <- sds %*% cbind(ng - 1)
sds <- as.numeric(sqrt(sds / sum(ng - 1)))
} else {
## compute overall sds
sds <- sparseMatrixStats::rowSds(expr, na.rm = TRUE)

# ## compute group sds
# sds <- vapply(unique(label), \(i)
# sparseMatrixStats::rowSds(expr[, label == i, drop = FALSE],
# na.rm = TRUE),
# rep(1, nrow(expr))
# ) # get sds of each group
# sds <- sparseMatrixStats::rowMeans2(sds)

## compute pooled sds
sds <- vapply(
unique(label), \(i)
sparseMatrixStats::rowVars(expr[, label == i, drop = FALSE],
na.rm = TRUE
),
rep(1, nrow(expr))
) # get vars of each group
ng <- table(label)[unique(label)] # get group sizes in the same order
sds <- sds %*% cbind(ng - 1)
sds <- as.numeric(sqrt(sds / sum(ng - 1)))
# ## compute group sds
# sds <- vapply(unique(label), \(i)
# sparseMatrixStats::rowSds(expr[, label == i, drop = FALSE],
# na.rm = TRUE),
# rep(1, nrow(expr))
# ) # get sds of each group
# sds <- sparseMatrixStats::rowMeans2(sds)
}

## compute group means
mgm <- vapply(
Expand Down
10 changes: 6 additions & 4 deletions R/top_markers.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ top_markers_init <- function(data, label, n = 10,

#' calculate group median, MAD or mean score and order genes based on scores
#'
#' @inheritParams scale_mgm
#' @param data matrix, features in row and samples in column
#' @param label vector, group labels
#' @param n integer, number of returned top genes for each group
#' @param method character, specify metric to compute, can be one of "median",
#' "mad", "mean"
Expand All @@ -61,12 +61,13 @@ top_markers_init <- function(data, label, n = 10,
#' data <- matrix(rgamma(100, 2), 10, dimnames = list(1:10))
#' top_markers_abs(data, label = rep(c("A", "B"), 5))
top_markers_abs <- function(data, label, n = 10,
pooled.sd = FALSE,
method = c("median", "mad", "mean"),
scale = TRUE, use.mgm = TRUE,
softmax = TRUE) {
method <- match.arg(method)
if (scale && use.mgm) {
data <- scale_mgm(expr = data, label = label)
data <- scale_mgm(expr = data, label = label, pooled.sd = pooled.sd)
} else if (scale && !use.mgm) {
## scale scores on rows
# mu_s <- sparseMatrixStats::rowMeans2(data, na.rm = TRUE)
Expand Down Expand Up @@ -104,8 +105,8 @@ top_markers_abs <- function(data, label, n = 10,

#' calculate group mean score using glm and order genes based on scores difference
#'
#' @inheritParams scale_mgm
#' @param data matrix, features in row and samples in column
#' @param label vector, group labels
#' @param n integer, number of returned top genes for each group
#' @param family family for glm, details in [stats::glm()]
#' @param scale logical, if to scale data by row
Expand All @@ -121,6 +122,7 @@ top_markers_abs <- function(data, label, n = 10,
top_markers_glm <- function(data, label, n = 10,
family = gaussian(), # score are continuous non-negative, can use gamma or inverse.gaussian, if continuous and unbounded use gaussian, if discrete use poisson, if binary or proportions between [0,1] or binary freq counts use binomial
scale = TRUE, use.mgm = TRUE,
pooled.sd = FALSE,
# log = TRUE,
softmax = TRUE) {
label <- factor(label) # factorize label
Expand All @@ -135,7 +137,7 @@ top_markers_glm <- function(data, label, n = 10,
data <- t(scale(t(data)))
data[is.na(data)] <- 0 # assign 0 to NA when sd = 0
} else if (scale && use.mgm) {
data <- scale_mgm(expr = data, label = label)
data <- scale_mgm(expr = data, label = label, pooled.sd = pooled.sd)
}

# ## log score
Expand Down
4 changes: 3 additions & 1 deletion man/scale_mgm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/top_markers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/top_markers_abs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/top_markers_glm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/top_markers_init.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vignettes/smartid_Demo.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ Scaling is needed to find the markers specific to the group, however, standard s

The scale method is depicted as below:

$$z=\frac{x-\frac{\sum_k^{n_D}(\mu_k)}{n_D}}{s_{pooled}},\ s_{pooled}=\sqrt{\frac{\sum_k^{n_D}{(n_k-1){s_k}^2}}{\sum_k^{n_D}{n_k}-k}}$$
$$z=\frac{x-\frac{\sum_k^{n_D}(\mu_k)}{n_D}}{sd}$$

The score will be transformed using softmax before passing to EM algorithm.

Expand Down

0 comments on commit 14e5f63

Please sign in to comment.