From 40a60a09f9636e296d42fa8534dbb4c8f0cbaff8 Mon Sep 17 00:00:00 2001 From: Benjamin James Date: Wed, 24 Jul 2024 17:44:38 -0400 Subject: [PATCH] Added n_cells to pseudobulk() --- benj/pseudobulk.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benj/pseudobulk.py b/benj/pseudobulk.py index 2d351d3..fa47dae 100644 --- a/benj/pseudobulk.py +++ b/benj/pseudobulk.py @@ -45,7 +45,7 @@ def _pb_anncollection(ac, transform, layer_list=None, batch_size:int=10000): L[lay] = convert_X(L[lay].tocsr()) return anndata.AnnData(X=convert_X(X.tocsr()), layers=L, var=pd.DataFrame(index=ac.var_names)) -def pseudobulk(adata, groupby, which:str="sum", batch_size:int=10000, prefix="PB#", dense:bool=False): +def pseudobulk(adata, groupby, which:str="sum", batch_size:int=10000, prefix="PB#", dense:bool=False, n_cells="n_cells"): import numpy as np import scipy.sparse import pandas as pd @@ -57,7 +57,8 @@ def pseudobulk(adata, groupby, which:str="sum", batch_size:int=10000, prefix="PB else: mc = pd.get_dummies(adata.obs.loc[:, groupby]).reindex(adata.obs_names, fill_value=0) cols = pseudobulk_valid_columns(adata.obs, mc) - newobs = adata.obs.loc[mc.idxmax().values, cols] + newobs = adata.obs.loc[mc.idxmax().values, cols].copy() + newobs[n_cells] = mc.loc[:, mc.idxmax().index].values.sum(0) newobs.index = ["%s%d" % (prefix, x) for x, _ in enumerate(mc.columns)] if which == "sum": S = scipy.sparse.csr_matrix(mc.values, dtype="i8")