Skip to content

Commit

Permalink
Add option to rename features during nimble-append
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber committed Jan 28, 2025
1 parent 17142f7 commit 57da9dc
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 22 deletions.
8 changes: 6 additions & 2 deletions R/NimbleAPI.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ utils::globalVariables(
#' @param maxLibrarySizeRatio Passed directly to AppendNimbleCounts()
#' @param queryDatabaseForLineageUpdates If true, after downloading the raw nimble output, the code will query any feature not ending with 'g' against the database and replace that name with the current value of lineage.
#' @param replaceExistingAssayData If true, any existing data in the targetAssay will be deleted
#' @param featureRenameList An optional named list in the format <OLD_NAME> = <NEW_NAME>. If any <OLD_NAME> are present, the will be renamed to <NEW_NAME>. The intention of this is to recover specific ambiguous classes.
#' @return A modified Seurat object.
#' @export
DownloadAndAppendNimble <- function(seuratObject, targetAssayName, outPath=tempdir(), enforceUniqueFeatureNames=TRUE, allowableGenomes=NULL, ensureSamplesShareAllGenomes = TRUE, maxAmbiguityAllowed = 1, reuseExistingDownloads = FALSE, performDietSeurat = FALSE, normalizeData = TRUE, assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.05, queryDatabaseForLineageUpdates = FALSE, replaceExistingAssayData = TRUE) {
DownloadAndAppendNimble <- function(seuratObject, targetAssayName, outPath=tempdir(), enforceUniqueFeatureNames=TRUE, allowableGenomes=NULL, ensureSamplesShareAllGenomes = TRUE, maxAmbiguityAllowed = 1, reuseExistingDownloads = FALSE, performDietSeurat = FALSE, normalizeData = TRUE, assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.05, queryDatabaseForLineageUpdates = FALSE, replaceExistingAssayData = TRUE, featureRenameList = NULL) {
# Ensure we have a DatasetId column
if (is.null(seuratObject@meta.data[['DatasetId']])) {
stop('Seurat object lacks DatasetId column')
Expand Down Expand Up @@ -95,10 +96,11 @@ DownloadAndAppendNimble <- function(seuratObject, targetAssayName, outPath=tempd
print(paste0('Total features: ', length(unique(df$V1))))

outFile <- file.path(outPath, paste0("mergedNimbleCounts.tsv"))
df <- df[c('V1', 'V2', 'V3')] # grouping could re-order this
write.table(df, outFile, sep="\t", col.names=F, row.names=F, quote=F)

print(paste0('Appending counts to ', targetAssayName))
seuratObject <- AppendNimbleCounts(seuratObject=seuratObject, targetAssayName = targetAssayName, nimbleFile=outFile, maxAmbiguityAllowed = maxAmbiguityAllowed, performDietSeurat = FALSE, normalizeData = normalizeData, assayForLibrarySize = assayForLibrarySize, maxLibrarySizeRatio = maxLibrarySizeRatio, replaceExistingAssayData = replaceExistingAssayData)
seuratObject <- AppendNimbleCounts(seuratObject=seuratObject, targetAssayName = targetAssayName, nimbleFile=outFile, maxAmbiguityAllowed = maxAmbiguityAllowed, performDietSeurat = FALSE, normalizeData = normalizeData, assayForLibrarySize = assayForLibrarySize, maxLibrarySizeRatio = maxLibrarySizeRatio, replaceExistingAssayData = replaceExistingAssayData, featureRenameList = featureRenameList)
unlink(outFile)

return(seuratObject)
Expand Down Expand Up @@ -338,6 +340,8 @@ DownloadAndAppendNimble <- function(seuratObject, targetAssayName, outPath=tempd
group_by(V1, V3) %>%
summarize(V2 = sum(V2))

df <- df[c('V1', 'V2', 'V3')]

print(paste0('Distinct features after re-grouping: ', length(unique(df$V1))))

return(df)
Expand Down
50 changes: 32 additions & 18 deletions R/NimbleAppend.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
#' @param doPlot If true, FeaturePlots will be generated for the appended features
#' @param maxFeaturesToPlot If doPlot is true, this is the maximum number of features to plot
#' @param replaceExistingAssayData If true, any existing data in the targetAssay will be deleted
#' @param featureRenameList An optional named list in the format <OLD_NAME> = <NEW_NAME>. If any <OLD_NAME> are present, the will be renamed to <NEW_NAME>. The intention of this is to recover specific ambiguous classes.
#' @return A modified Seurat object.
#' @export
AppendNimbleCounts <- function(seuratObject, nimbleFile, targetAssayName, maxAmbiguityAllowed = 0, renameConflictingFeatures = TRUE, duplicateFeatureSuffix = ".Nimble", normalizeData = TRUE, performDietSeurat = (targetAssayName %in% names(seuratObject@assays)), assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.05, doPlot = TRUE, maxFeaturesToPlot = 40, replaceExistingAssayData = TRUE) {
AppendNimbleCounts <- function(seuratObject, nimbleFile, targetAssayName, maxAmbiguityAllowed = 0, renameConflictingFeatures = TRUE, duplicateFeatureSuffix = ".Nimble", normalizeData = TRUE, performDietSeurat = (targetAssayName %in% names(seuratObject@assays)), assayForLibrarySize = 'RNA', maxLibrarySizeRatio = 0.05, doPlot = TRUE, maxFeaturesToPlot = 40, replaceExistingAssayData = TRUE, featureRenameList = NULL) {
if (!file.exists(nimbleFile)) {
stop(paste0("Nimble file does not exist: ", nimbleFile))
}
Expand Down Expand Up @@ -71,44 +72,57 @@ AppendNimbleCounts <- function(seuratObject, nimbleFile, targetAssayName, maxAmb
maxAmbiguityAllowed <- 1
}

ambigFeatRows <- totalHitsByRow > maxAmbiguityAllowed
# Ensure consistent sorting of ambiguous features, and re-group if needed:
if (any(grepl(df$V1, pattern = ','))) {
print('Ensuring consistent feature sort within ambiguous features:')
df$V1 <- unlist(sapply(df$V1, function(y){
return(paste0(sort(unlist(strsplit(y, split = ','))), collapse = ','))
}))

df <- df %>%
group_by(V1, V3) %>%
summarize(V2 = sum(V2))

df <- df[c('V1', 'V2', 'V3')]

paste0('Distinct features after re-grouping: ', length(unique(df$V1)))
}

if (!all(is.null(featureRenameList))) {
print('Potentially renaming features:')
df$V1 <- as.character(df$V1)
for (featName in names(featureRenameList)) {
if (featName %in% df$V1) {
df$V1[df$V1 == featName] <- featureRenameList[[featName]]
}
}
}

ambigFeatRows <- totalHitsByRow > maxAmbiguityAllowed
if (sum(ambigFeatRows) > 0) {
print(paste0('Dropping ', sum(ambigFeatRows), ' rows with ambiguous features (>', maxAmbiguityAllowed, '), ', sum(ambigFeatRows),' of ', nrow(df)))
totalUMI <- sum(df$V2)
x <- df$V1[ambigFeatRows]
totalHitsByRow <- totalHitsByRow[ambigFeatRows]
x[totalHitsByRow > 3] <- 'ManyHits'

x <- sort(table(x), decreasing = T)
x <- data.frame(Feature = names(x), Total = as.numeric(unname(x)))
x$Fraction <- x$Total / totalUMI
print(x)
df <- df[!ambigFeatRows, , drop = F]

paste0('Distinct features after pruning: ', length(unique(df$V1)))
}

# TODO: consider a percent filter on ambiguous classes...

# Ensure consistent sorting of ambiguous features, and re-group if needed:
if (any(grepl(df$V1, pattern = ','))) {
print('Ensuring consistent feature sort within ambiguous features:')
df$V1 <- unlist(sapply(df$V1, function(y){
return(paste0(sort(unlist(strsplit(y, split = ','))), collapse = ','))
}))

df <- df %>%
group_by(V1, V3) %>%
summarize(V2 = sum(V2))

paste0('Distinct features after re-grouping: ', length(unique(df$V1)))
}

if (any(duplicated(df[c('V1','V3')]))) {
print(paste0('Duplicate cell/features found. Rows at start: ', nrow(df)))
df <- df %>%
group_by(V1, V3) %>%
summarize(V2 = sum(V2))

df <- df[c('V1', 'V2', 'V3')]

print(paste0('After re-grouping: ', nrow(df)))
}

Expand Down
5 changes: 4 additions & 1 deletion man/AppendNimbleCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/DownloadAndAppendNimble.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 57da9dc

Please sign in to comment.