Skip to content

Commit

Permalink
Add check for duplicate features
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber committed Jan 25, 2025
1 parent ffc7204 commit 146ac85
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
2 changes: 1 addition & 1 deletion R/NimbleAPI.R
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ DownloadAndAppendNimble <- function(seuratObject, targetAssayName, outPath=tempd
colFilter=makeFilter(
c("library_id", "EQUALS", libraryId),
c("ref_nt_id/lineage", "NOT_MISSING", ''),
c("ref_nt_id/name", "IN", paste0(unique(df$V1), collapse = ';'))
c("ref_nt_id/name", "IN", paste0(feats, collapse = ';'))
),
colNameOpt="rname"
))
Expand Down
37 changes: 24 additions & 13 deletions R/NimbleAppend.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,19 +81,30 @@ AppendNimbleCounts <- function(seuratObject, nimbleFile, targetAssayName, maxAmb
paste0('Distinct features after pruning: ', length(unique(df$V1)))
}

# Ensure consistent sorting of ambiguous features, and re-group if needed:
if (any(grepl(df$V1, pattern = ','))) {
print('Ensuring consistent feature sort within ambiguous features:')
df$V1 <- unlist(sapply(df$V1, function(y){
return(paste0(sort(unlist(strsplit(y, split = ','))), collapse = ','))
}))

df <- df %>%
group_by(V1, V3) %>%
summarize(V2 = sum(V2))

paste0('Distinct features after re-grouping: ', length(unique(df$V1)))
}
# TODO: consider a percent filter on ambiguous classes...

# Ensure consistent sorting of ambiguous features, and re-group if needed:
if (any(grepl(df$V1, pattern = ','))) {
print('Ensuring consistent feature sort within ambiguous features:')
df$V1 <- unlist(sapply(df$V1, function(y){
return(paste0(sort(unlist(strsplit(y, split = ','))), collapse = ','))
}))

df <- df %>%
group_by(V1, V3) %>%
summarize(V2 = sum(V2))

paste0('Distinct features after re-grouping: ', length(unique(df$V1)))
}

if (any(duplicated(df[c('V1','V3')]))) {
print(paste0('Duplicate cell/features found. Rows at start: ', nrow(df)))
df <- df %>%
group_by(V1, V3) %>%
summarize(V2 = sum(V2))

print(paste0('After re-grouping: ', nrow(df)))
}

tryCatch({
# Group to ensure we have one value per combination:
Expand Down

0 comments on commit 146ac85

Please sign in to comment.