From 86c1b9d71448761163487541c2d126c33cd53f67 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Thu, 23 Jan 2025 23:42:38 -0500 Subject: [PATCH] add intersectByRowData --- NAMESPACE | 2 + R/subsetBy-methods.R | 92 ++++++++++++++++++++++++++++++++++++++++++++ man/subsetBy.Rd | 49 ++++++++++++++++++++++- 3 files changed, 142 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index d687640..6dba82b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(getWithColData) export(hasAssay) export(hasRowData) export(hasRowRanges) +export(intersectByRowData) export(intersectColumns) export(intersectRows) export(listToMap) @@ -68,6 +69,7 @@ exportMethods(experiments) exportMethods(exportClass) exportMethods(hasRowData) exportMethods(hasRowRanges) +exportMethods(intersectByRowData) exportMethods(isEmpty) exportMethods(length) exportMethods(longForm) diff --git a/R/subsetBy-methods.R b/R/subsetBy-methods.R index 507d056..bf581b3 100644 --- a/R/subsetBy-methods.R +++ b/R/subsetBy-methods.R @@ -105,6 +105,28 @@ NULL #' * `subsetByColumn`: Select observations by assay or for each assay #' * `subsetByRow`: Select rows by assay or for each assay #' * `subsetByAssay`: Select experiments +#' * `subsetByRowData`: Select rows by values in the rowData +#' * `intersectByRowData`: Intersect with values in the rowData +#' +#' @section rowData: +#' +#' Some assays may have additional metadata associated with the rows. +#' This metadata is stored in the `rowData` slot of the object, typically a +#' `SummarizedExperiment` or `RangedSummarizedExperiment`. +#' +#' `subsetByRowData` allows the user to subset the rows of the assays +#' based on the values in the `rowData`. +#' +#' `intersectByRowData` is a special case of `subsetByRowData` where +#' the `rowData` values are intersected with the `y` values. Naturally, +#' the `y` values are expected to be of type `character`. +#' +#' Note that `rowDataCols` allows the user to specify a particular +#' column from which to extract the values for subsetting. This column +#' name must be consistent across assays. If the column is not present +#' in an assay, the assay will be skipped and considered a no-op. Assays +#' are also skipped when there are no values in the `rowData` that match +#' the `y` values. #' #' @return `subsetBy*`: operations are endomorphic and return either #' `MultiAssayExperiment` or `ExperimentList` depending on the @@ -160,6 +182,23 @@ NULL #' subsetByRowData( #' mae, "ENST00000355076", "rownames", i = "Affy" #' ) +#' +#' ## use miniACC as example MAE +#' data("miniACC") +#' +#' ## intersect values of y with rownames in rowData +#' intersectByRowData( +#' x = miniACC, +#' y = c("G6PD", "PETN"), +#' rowDataCol = "rownames", +#' i = c("RNASeq2GeneNorm", "gistict") +#' ) +#' +#' ## no-op when rowDataCol is not present or there is no data +#' intersectByRowData( +#' x = miniACC, y = c("G6PD", "PETN"), rowDataCol = "Genes", +#' i = c("RNASeq2GeneNorm", "gistict") +#' ) NULL # subsetBy Generics ------------------------------------------------------- @@ -434,3 +473,56 @@ setMethod( subsetByRow(x = x, y = y, i = i) } ) + + +# intersectByRowData,MultiAssayExperiment-method -------------------------- + +#' @rdname subsetBy +#' +#' @aliases intersectByRowData +#' +#' @export +setGeneric( + "intersectByRowData", + function(x, y, rowDataCol, i, ...) + standardGeneric("intersectByRowData") +) + +#' @rdname subsetBy +#' @exportMethod intersectByRowData +setMethod( + "intersectByRowData", c("MultiAssayExperiment", "character", "character"), + function(x, y, rowDataCol, i = TRUE, ...) { + if (is.character(i)) + logi <- names(x) %in% i + else if (is.logical(i) || is.numeric(i)) + logi <- names(x) %in% names(x)[i] + else + stop("Invalid experiment subscript type for 'i'") + i <- hasRowData(x) & logi + if (!any(i)) + stop("No 'rowData' available for subsetting") + y <- lapply( + experiments(x)[i], + function(exper) { + rd <- rowData(exper) + if (rowDataCol %in% c("rownames", "row.names")) + intersect(rownames(rd), y) + else if (rowDataCol %in% colnames(rd)) + intersect(rd[[rowDataCol]], y) + else + NULL + } + ) + noRowData <- + vapply(y, function(z) is.null(z) || !length(z), logical(1)) + if (any(noRowData)) { + noRDnames <- paste(shQuote(names(y)[noRowData]), collapse = ", ") + warning( + "No 'rowData' intersected for assays:\n ", noRDnames, + call. = FALSE + ) + } + subsetByRow(x = x, y = y, i = i) + } +) diff --git a/man/subsetBy.Rd b/man/subsetBy.Rd index fe6e4ea..cf68f1c 100644 --- a/man/subsetBy.Rd +++ b/man/subsetBy.Rd @@ -26,6 +26,8 @@ \alias{subsetByColumn,MultiAssayExperiment,ANY-method} \alias{subsetByAssay,MultiAssayExperiment-method} \alias{subsetByRowData,MultiAssayExperiment,character,character-method} +\alias{intersectByRowData} +\alias{intersectByRowData,MultiAssayExperiment,character,character-method} \alias{[,MultiAssayExperiment,ANY,ANY,ANY-method} \alias{[[,MultiAssayExperiment,ANY,ANY-method} \alias{[[<-,MultiAssayExperiment,ANY,ANY-method} @@ -74,6 +76,10 @@ subsetByAssay(x, y) \S4method{subsetByRowData}{MultiAssayExperiment,character,character}(x, y, rowDataCol, i = TRUE, ...) +intersectByRowData(x, y, rowDataCol, i, ...) + +\S4method{intersectByRowData}{MultiAssayExperiment,character,character}(x, y, rowDataCol, i = TRUE, ...) + \S4method{[}{MultiAssayExperiment,ANY,ANY,ANY}(x, i, j, k, ..., drop = FALSE) \S4method{[[}{MultiAssayExperiment,ANY,ANY}(x, i, j, ...) @@ -121,7 +127,7 @@ A set of functions for extracting and dividing a \code{MultiAssayExperiment} } \details{ -Subsetting a MultiAssayExperiment by the \strong{j} index can yield a call +Subsetting a \code{MultiAssayExperiment} by the \strong{j} index can yield a call to either \code{subsetByColData} or \code{subsetByColumn}. For vector inputs, the subset will be applied to the \code{colData} rows. For \code{List}-type inputs, the List will be applied to each of the elements in the @@ -134,8 +140,32 @@ The order of the subsetting elements in the \item \code{subsetByColumn}: Select observations by assay or for each assay \item \code{subsetByRow}: Select rows by assay or for each assay \item \code{subsetByAssay}: Select experiments +\item \code{subsetByRowData}: Select rows by values in the rowData +\item \code{intersectByRowData}: Intersect with values in the rowData } } +\section{rowData}{ + + +Some assays may have additional metadata associated with the rows. +This metadata is stored in the \code{rowData} slot of the object, typically a +\code{SummarizedExperiment} or \code{RangedSummarizedExperiment}. + +\code{subsetByRowData} allows the user to subset the rows of the assays +based on the values in the \code{rowData}. + +\code{intersectByRowData} is a special case of \code{subsetByRowData} where +the \code{rowData} values are intersected with the \code{y} values. Naturally, +the \code{y} values are expected to be of type \code{character}. + +Note that \code{rowDataCols} allows the user to specify a particular +column from which to extract the values for subsetting. This column +name must be consistent across assays. If the column is not present +in an assay, the assay will be skipped and considered a no-op. Assays +are also skipped when there are no values in the \code{rowData} that match +the \code{y} values. +} + \examples{ ## Load the example MultiAssayExperiment example("MultiAssayExperiment") @@ -186,4 +216,21 @@ subsetByRow(mae, egr, i = hasRowRanges(mae)) subsetByRowData( mae, "ENST00000355076", "rownames", i = "Affy" ) + +## use miniACC as example MAE +data("miniACC") + +## intersect values of y with rownames in rowData +intersectByRowData( + x = miniACC, + y = c("G6PD", "PETN"), + rowDataCol = "rownames", + i = c("RNASeq2GeneNorm", "gistict") +) + +## no-op when rowDataCol is not present or there is no data +intersectByRowData( + x = miniACC, y = c("G6PD", "PETN"), rowDataCol = "Genes", + i = c("RNASeq2GeneNorm", "gistict") +) }