Skip to content

Commit

Permalink
Separate AssignMhcFeaturesToLocus into separate method
Browse files Browse the repository at this point in the history
  • Loading branch information
bbimber committed Dec 21, 2023
1 parent b3854a1 commit e11de0a
Showing 1 changed file with 22 additions and 19 deletions.
41 changes: 22 additions & 19 deletions R/NimbleAppend.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,23 +169,8 @@ AppendNimbleCounts <- function(seuratObject, nimbleFile, targetAssayName, dropAm
return(seuratObject)
}


#' @title PerformMhcNormalization
#' @description This is a fairly specific normalization step for MHC data. It will divide the raw counts for each feature by the sum of counts in that cell from that locus (e.g., MHC-A, MHC-B, MHC-E, MHC-I, DPA, DPB)
#'
#' @param seuratObject A Seurat object
#' @param sourceAssayName The assay to normalize
#' @param featurePrefix This prefix is stripped from the start of all feature names
#' @param delimiter Used to split the locus from allele designation
#' @param ambiguousFeatureDelim This character is used to split feature names in the case of ambiguous features. If a feature is ambiguous, the locus is assigned as the unique loci of the feature set.
#' @param perCell If true, the feature counts are scaled based on the library size of features from that locus in that cell. If false, it is scaled based on the library size of features in that locus from all cells matching cellGroupingVariable
#' @param cellGroupingVariable If perCell is FALSE, the library size is calculated by taking the sum of features from that locus across all cells where this metadata variable matches the current cell
#' @param stripNumbersFromLocus If true, numeric values will be stripped from all locus strings
#' @return A modified Seurat object.
#' @export
PerformMhcNormalization <- function(seuratObj, sourceAssayName = 'MHC', featurePrefix = 'Mamu-', delimiter = '*', ambiguousFeatureDelim = ',', perCell = TRUE, cellGroupingVariable = 'DatasetId', stripNumbersFromLocus = TRUE) {
.AssignLocusToMhcFeatures <- function(seuratObj, sourceAssayName = 'MHC', featurePrefix = 'Mamu-', delimiter = '*', ambiguousFeatureDelim = ',', stripNumbersFromLocus = TRUE) {
seuratObj[[sourceAssayName]]@meta.features$locus <- NA

for (featName in rownames(seuratObj[[sourceAssayName]])) {
feats <- unlist(strsplit(x = featName, split = ambiguousFeatureDelim))
loci <- c()
Expand All @@ -204,17 +189,36 @@ PerformMhcNormalization <- function(seuratObj, sourceAssayName = 'MHC', featureP
if (stripNumbersFromLocus) {
locus <- gsub(x = locus, pattern = '[0-9]+.*$', replacement = '')
}

loci <- unique(c(loci, locus))
}

if (length(loci) > 1) {
warning(paste0('Feature matched multi loci: ', featName, ', ', paste0(loci, collapse = ',')))
warning(paste0('Feature matched multiple loci: ', featName, ', ', paste0(loci, collapse = ',')))
}

seuratObj[[sourceAssayName]]@meta.features$locus[rownames(seuratObj[[sourceAssayName]]) == feat] <- paste0(loci, collapse = ',')
}

return(seuratObj)
}

#' @title PerformMhcNormalization
#' @description This is a fairly specific normalization step for MHC data. It will divide the raw counts for each feature by the sum of counts in that cell from that locus (e.g., MHC-A, MHC-B, MHC-E, MHC-I, DPA, DPB)
#'
#' @param seuratObject A Seurat object
#' @param sourceAssayName The assay to normalize
#' @param featurePrefix This prefix is stripped from the start of all feature names
#' @param delimiter Used to split the locus from allele designation
#' @param ambiguousFeatureDelim This character is used to split feature names in the case of ambiguous features. If a feature is ambiguous, the locus is assigned as the unique loci of the feature set.
#' @param perCell If true, the feature counts are scaled based on the library size of features from that locus in that cell. If false, it is scaled based on the library size of features in that locus from all cells matching cellGroupingVariable
#' @param cellGroupingVariable If perCell is FALSE, the library size is calculated by taking the sum of features from that locus across all cells where this metadata variable matches the current cell
#' @param stripNumbersFromLocus If true, numeric values will be stripped from all locus strings
#' @return A modified Seurat object.
#' @export
PerformMhcNormalization <- function(seuratObj, sourceAssayName = 'MHC', featurePrefix = 'Mamu-', delimiter = '*', ambiguousFeatureDelim = ',', perCell = TRUE, cellGroupingVariable = 'DatasetId', stripNumbersFromLocus = TRUE) {
seuratObj <- .AssignLocusToMhcFeatures(seuratObj, sourceAssayName = sourceAssayName, featurePrefix = featurePrefix, delimiter = delimiter, ambiguousFeatureDelim = ambiguousFeatureDelim, stripNumbersFromLocus = stripNumbersFromLocus)

dat <- Seurat::GetAssayData(seuratObj, assay = sourceAssayName, slot = 'counts')
margin <- 2

Expand Down Expand Up @@ -243,7 +247,6 @@ PerformMhcNormalization <- function(seuratObj, sourceAssayName = 'MHC', featureP
}
}
}


for (locus in sort(unique(seuratObj[[sourceAssayName]]@meta.features$locus))) {
print(paste0('Normalizing locus: ', locus))
Expand Down

0 comments on commit e11de0a

Please sign in to comment.