Skip to content

Commit

Permalink
Update top_markers function, add hyper parameter for softmax.
Browse files Browse the repository at this point in the history
  • Loading branch information
Gene233 committed Mar 29, 2024
1 parent 0b611b6 commit 67a095b
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 14 deletions.
21 changes: 10 additions & 11 deletions R/top_markers.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,9 @@ top_markers_init <- function(data, label, n = 10,
#' calculate group median, MAD or mean score and order genes based on scores
#'
#' @inheritParams scale_mgm
#' @param data matrix, features in row and samples in column
#' @param n integer, number of returned top genes for each group
#' @inheritParams top_markers_glm
#' @param method character, specify metric to compute, can be one of "median",
#' "mad", "mean"
#' @param scale logical, if to scale data by row
#' @param use.mgm logical, if to scale data using [scale_mgm()]
#' @param softmax logical, if to apply softmax transformation on output
#'
#' @return a tibble with feature names, group labels and ordered processed scores
#' @export
Expand All @@ -64,7 +60,8 @@ top_markers_abs <- function(data, label, n = 10,
pooled.sd = FALSE,
method = c("median", "mad", "mean"),
scale = TRUE, use.mgm = TRUE,
softmax = TRUE) {
softmax = TRUE,
tau = 1) {
method <- match.arg(method)
if (scale && use.mgm) {
data <- scale_mgm(expr = data, label = label, pooled.sd = pooled.sd)
Expand All @@ -91,14 +88,14 @@ top_markers_abs <- function(data, label, n = 10,
# dplyr::mutate(Scores = Scores / sd(Scores, na.rm = TRUE)) |> # norm by sd
# dplyr::mutate(Scores = sigmoid(Scores)) |> # sigmoid
# dplyr::mutate(Scores = tanh(Scores)) |> # tanh
dplyr::mutate(Scores = softmax(Scores)) # softmax
dplyr::mutate(Scores = softmax(Scores, tau = tau)) # softmax
}

data <- dplyr::slice_max(data, Scores, n = n) ## extract top n markers

# ## softmax
# if(softmax == TRUE)
# data <- dplyr::mutate(data, Scores = softmax(Scores))
# data <- dplyr::mutate(data, Scores = softmax(Scores, tau = tau))

return(data)
}
Expand All @@ -112,6 +109,7 @@ top_markers_abs <- function(data, label, n = 10,
#' @param scale logical, if to scale data by row
#' @param use.mgm logical, if to scale data using [scale_mgm()]
#' @param softmax logical, if to apply softmax transformation on output
#' @param tau numeric, hyper parameter for softmax
#'
#' @return a tibble with feature names, group labels and ordered processed scores
#' @export
Expand All @@ -124,7 +122,8 @@ top_markers_glm <- function(data, label, n = 10,
scale = TRUE, use.mgm = TRUE,
pooled.sd = FALSE,
# log = TRUE,
softmax = TRUE) {
softmax = TRUE,
tau = 1) {
label <- factor(label) # factorize label

## scale
Expand Down Expand Up @@ -170,14 +169,14 @@ top_markers_glm <- function(data, label, n = 10,
# dplyr::mutate(Scores = Scores / sd(Scores, na.rm = TRUE)) |> # norm by sd
# dplyr::mutate(Scores = sigmoid(Scores)) |> # sigmoid
# dplyr::mutate(Scores = tanh(Scores)) |> # tanh
dplyr::mutate(Scores = softmax(Scores)) # softmax
dplyr::mutate(Scores = softmax(Scores, tau = tau)) # softmax
}

data <- dplyr::slice_max(data, Scores, n = n) ## extract top n markers

# ## softmax
# if(softmax == TRUE)
# data <- dplyr::mutate(data, Scores = softmax(Scores))
# data <- dplyr::mutate(data, Scores = softmax(Scores, tau = tau))

return(data)
}
Expand Down
5 changes: 4 additions & 1 deletion man/top_markers_abs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/top_markers_glm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vignettes/smartid_Demo.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ $\mathbf{TF_{i,j}}=\frac{N_{i,j}}{\sum_j{N_{i,j}}},$
$\mathbf{IDF_i} = \log(1+\frac{n}{n_i+1}),$
$\mathbf{IAE_i} = \log(1+\frac{n}{\sum_j^n\hat N_{i,j}+1})$

Where $N_{i,j}$ is the counts of feature $i$ in cell $j$; $\hat N_{i,j}$ is $\max(0,N_{i,j}-threshold)$;
Where $N_{i,j}$ is the counts of feature $i$ in cell $j$; $\hat N_{i,j}$ is $\max(0,N_{i,j}-\mathrm{threshold})$;
$n$ is the total number of documents(cells); $n_i$ is $\sum_{j = 1}^{n} \mathrm{sign}(N_{i,j} > \mathrm{threshold})$.

Here for labeled data, we can choose logTF * IDF_prob * IAE_prob for marker identification:
Expand Down

0 comments on commit 67a095b

Please sign in to comment.