From e6ba0364123a2682ece357ca279565b63476fdc4 Mon Sep 17 00:00:00 2001 From: Gene233 Date: Wed, 27 Mar 2024 03:32:08 +1100 Subject: [PATCH] Removed un-used commented codes and update vignette. --- R/gs_score-methods.R | 2 +- R/scale_mgm.R | 2 +- R/tf_idf_iae_wrappers.R | 14 -------------- vignettes/smartid_Demo.Rmd | 27 ++++++++++++++------------- 4 files changed, 16 insertions(+), 29 deletions(-) diff --git a/R/gs_score-methods.R b/R/gs_score-methods.R index a5221d3..28f3a99 100644 --- a/R/gs_score-methods.R +++ b/R/gs_score-methods.R @@ -64,7 +64,7 @@ setMethod( ## compute score score <- gs_score(data = expr, features = features, suffix = suffix) - data@colData <- cbind(data@colData, score) + colData(data) <- cbind(colData(data), score) return(data) } diff --git a/R/scale_mgm.R b/R/scale_mgm.R index a6d1034..8ed7be0 100644 --- a/R/scale_mgm.R +++ b/R/scale_mgm.R @@ -13,7 +13,7 @@ scale_mgm <- function(expr, label) { sds <- sparseMatrixStats::rowSds(expr, na.rm = TRUE) # sds <- sapply(unique(label), \(i) # sparseMatrixStats::rowSds(expr[, label == i], na.rm = TRUE) - # ) # get mean of each group + # ) # get sds of each group # colnames(sds) <- unique(label) ## compute group means diff --git a/R/tf_idf_iae_wrappers.R b/R/tf_idf_iae_wrappers.R index 206b8a3..a1d243c 100644 --- a/R/tf_idf_iae_wrappers.R +++ b/R/tf_idf_iae_wrappers.R @@ -435,20 +435,6 @@ iae_hdb <- function(expr, features = NULL, multi = TRUE, ## factor cluster cluster <- factor(cluster) - # # thres <- 0 - # # thres <- sparseMatrixStats::rowQuantiles(expr[features, , drop = FALSE], probs = 0.25, na.rm = TRUE) - # expr_offset <- expr[features, , drop = FALSE] - thres ## subtract offset - # expr_offset[expr_offset < 0] <- 0 - # - # mean_row_in <- sapply(levels(cluster), function(type) { - # rowMeans(expr_offset[, cluster == type, drop = FALSE], na.rm = TRUE) - # }) |> setNames(levels(cluster)) ## mean counts for each gene in the group - # mean_row_notin <- sapply(levels(cluster), function(type) { - # apply(mean_row_in, 1, function(x) max(x[names(x) != type])) - # }) |> setNames(levels(cluster)) ## mean counts for each gene not in group - # - # iae <- log1p((mean_row_in/(mean_row_notin+0.01))[, cluster, drop = FALSE]) ## IDF scores - iae <- iae_prob( expr = expr, features = features, label = cluster, multi = multi, diff --git a/vignettes/smartid_Demo.Rmd b/vignettes/smartid_Demo.Rmd index 33ea1f6..5230d4d 100644 --- a/vignettes/smartid_Demo.Rmd +++ b/vignettes/smartid_Demo.Rmd @@ -85,8 +85,8 @@ defac <- as.data.frame(rowData(data_sim)[, cols]) up <- lapply(cols, \(id) dplyr::filter(defac, if_all(-!!sym(id), \(x) !!sym(id) / x > fc)) |> rownames()) -data_sim@metadata$up_markers <- setNames(up, cols) -data_sim@metadata$up_markers +slot(data_sim, "metadata")$up_markers <- setNames(up, cols) +slot(data_sim, "metadata")$up_markers data_sim ``` @@ -112,12 +112,13 @@ idf_iae_methods() ``` The basic version of TF, IDF and IAE can be termed as: -$\mathbf{TF_{i,j}}=\frac{N_{i,j}}{\sum_j{N_{i,j}}}$ -$\mathbf{IDF_i} = log(1+\frac{n}{n_i+1})$ + +$\mathbf{TF_{i,j}}=\frac{N_{i,j}}{\sum_j{N_{i,j}}},$ +$\mathbf{IDF_i} = log(1+\frac{n}{n_i+1}),$ $\mathbf{IAE_i} = log(1+\frac{n}{\hat N_{i,j}+1})$ -$where\ N_{i,j}\ is\ the\ counts\ of\ feature\ i\ in\ cell\ j;\ \hat N_{i,j}\ is\ max(0,\ N_{i,j} - threshold);$ -$\ n\ is\ total\ counts\ of\ documents(cells);\ n_i\ is\ \sum_{j = 1}^{n} sign(N_{i,j} > threshold)$ +$\mathbf{where\ N_{i,j}\ is\ the\ counts\ of\ feature\ i\ in\ cell\ j;\ \hat N_{i,j}\ is\ max(0,\ N_{i,j} - threshold)};$ +$\mathbf{\ n\ is\ total\ counts\ of\ documents(cells);\ n_i\ is\ \sum_{j = 1}^{n} sign(N_{i,j} > threshold)}$ Here for labeled data, we can choose logTF * IDF_prob * IAE_prob for marker identification. @@ -174,7 +175,7 @@ It's clear that the real UP DEGs are popping up to the top n features. And for t score_barplot( top_markers = top_m, column = ".dot", - f_list = data_sim@metadata$up_markers, + f_list = slot(data_sim, "metadata")$up_markers, n = 20 ) ``` @@ -185,14 +186,14 @@ This can also be confirmed in data simulation information, where the scale facto ```{r} ggplot(data.frame( - "Gene76" = data_sim@metadata$tf["Gene76", ], + "Gene76" = slot(data_sim, "metadata")$tf["Gene76", ], Group = data_sim$Group )) + geom_violin(aes(x = Group, y = Gene76, fill = Group)) + theme_bw() ## sim gene info -data_sim@rowRanges@elementMetadata[76, ] +SummarizedExperiment::elementMetadata(data_sim)[76, ] ``` ## Marker Selection @@ -222,7 +223,7 @@ We can also compare our selected markers with real DEGs. As there is no markers ```{r} library(UpSetR) -upset(fromList(c(data_sim@metadata$up_markers, marker_ls)), nsets = 6) +upset(fromList(c(slot(data_sim, "metadata")$up_markers, marker_ls)), nsets = 6) ``` `smartid` also provides some other implementation of marker selection. Here is another example using `mclust`. Different from `markers_mixmdl()`, `markers_mclust()` doesn't need a pre-defined number of components (which is 3 in `markers_mixmdl()`), instead, it will select the number of components by searching a series of potential numbers. This method is sometimes more robust than `markers_mixmdl()`. @@ -258,7 +259,7 @@ Here we choose logTF * IDF_sd * IAE_sd for for gene-set scoring as a use case. $\mathbf{score}=logTF*IDF_{sd}*IAE_{sd}$ -$where\ \mathbf{IDF} = log(1+sd(N_{i})*\frac{n}{n_i+1})$ +$where\ \mathbf{IDF} = log(1+sd(N_{i})*\frac{n}{n_i+1}),$ $\mathbf{IAE} = log(1+sd(N_{i})*\frac{n}{\sum_{j=1}^{n}N_{i,j}+1})$ ## Score Samples @@ -296,7 +297,7 @@ data_sim <- gs_score( ) ## saved score -colnames(data_sim@colData) +colnames(colData(data_sim)) ``` Now we get 3 columns of score for each group markers. We can then visualize the score across groups, see how well it can discern the target group. @@ -304,7 +305,7 @@ Now we get 3 columns of score for each group markers. We can then visualize the It's evident that the score can sufficiently separate the target group from all others. ```{r, fig.width=10, fig.height=3} -as.data.frame(data_sim@colData) |> +as.data.frame(colData(data_sim)) |> tidyr::pivot_longer("Group1.score.unlabel":"Group3.score.unlabel", names_to = "group markers", values_to = "score"