diff --git a/requirements.txt b/requirements.txt index 7e18a37..00ada45 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ scanpy>=1.7.0 leidenalg>=0.8.1 numpy<2.0 joblib>=0.17.0 -pandas<2.0 +pandas<=2.2.3 statsmodels>=0.11.1 matplotlib<3.9 plotly>=4.8.1 @@ -18,7 +18,7 @@ typing_extensions scikit-learn scikit-misc simpleppt>=1.1.3 -elpigraph-python==0.3.1 +elpigraph-python==0.3.2 networkx<3.0 igraph>=0.9.6 adjustText>=0.7.3 diff --git a/scFates/datasets/test.h5ad b/scFates/datasets/test.h5ad index 2b67bcb..f4c1439 100644 Binary files a/scFates/datasets/test.h5ad and b/scFates/datasets/test.h5ad differ diff --git a/scFates/plot/features.py b/scFates/plot/features.py index 4cc8bfc..fe4c23a 100644 --- a/scFates/plot/features.py +++ b/scFates/plot/features.py @@ -182,7 +182,7 @@ def trends( fitted = pd.DataFrame( adata[:, features].layers["fitted"], index=adata.obs_names, columns=features ).T.copy(deep=True) - g = adata.obs.groupby("seg") + g = adata.obs.groupby("seg",observed=False ) dct = graph["milestones"] keys = np.array(list(dct.keys())) @@ -476,7 +476,7 @@ def add_frames(axis, vert): gs_emb = gridspec.GridSpecFromSubplotSpec(1, 1, subplot_spec=gsubs[0]) axemb = fig.add_subplot(gs_emb[0]) axs = axs + [axemb] - adata_temp.obs["mean_trajectory"] = 0 + adata_temp.obs["mean_trajectory"] = 0.0 adata_temp.obs.loc[ fitted_sorted.columns, "mean_trajectory" ] = fitted_sorted.mean(axis=0).values diff --git a/scFates/plot/matrix.py b/scFates/plot/matrix.py index b863a60..bc9d596 100644 --- a/scFates/plot/matrix.py +++ b/scFates/plot/matrix.py @@ -238,7 +238,7 @@ def matrix( if return_data: data = pd.DataFrame( - 0, index=features, columns=adata_sub.obs.split.cat.categories + 0.0, index=features, columns=adata_sub.obs.split.cat.categories ) allidx = data.stack().index for group in data.columns: @@ -338,7 +338,7 @@ def matrix( borderpad=2, ) - mappable = cm.ScalarMappable(cmap=cm.get_cmap(cmap)) + mappable = cm.ScalarMappable(cmap=plt.get_cmap(cmap)) cbar = fig.colorbar(mappable, cax=position, orientation="horizontal", aspect=50) cbar.set_ticks([0, 1]) cbar.set_ticklabels( diff --git a/scFates/plot/slide_cors.py b/scFates/plot/slide_cors.py index 50c6e76..7bee3e3 100644 --- a/scFates/plot/slide_cors.py +++ b/scFates/plot/slide_cors.py @@ -198,7 +198,7 @@ def slide_cors( emb[np.argsort(freq), 0], emb[np.argsort(freq), 1], s=10, - c=freq[np.argsort(freq)], + c=freq.iloc[np.argsort(freq)], cmap=gr, rasterized=True, ) @@ -327,11 +327,11 @@ def slide_cors( pairwise_distances(np.array([0, 0]).reshape(1, -1), fB).argsort() )[0][:top_focus] texts_A = [ - ax_scat.text(fA.loc[t][0], fA.loc[t][1], t, **kwargs_text) + ax_scat.text(fA.loc[t].iloc[0], fA.loc[t].iloc[1], t, **kwargs_text) for t in fA.index[topA] ] texts_B = [ - ax_scat.text(fB.loc[t][0], fB.loc[t][1], t, **kwargs_text) + ax_scat.text(fB.loc[t].iloc[0], fB.loc[t].iloc[1], t, **kwargs_text) for t in fB.index[topB] ] texts = texts_A + texts_B diff --git a/scFates/plot/trajectory.py b/scFates/plot/trajectory.py index 6c784ad..ce1cc7f 100644 --- a/scFates/plot/trajectory.py +++ b/scFates/plot/trajectory.py @@ -748,7 +748,7 @@ def _get_color_values( # when plotting, the color of the dots is determined for each plot # the data is either categorical or continuous and the data could be in # 'obs' or in 'var' - if not is_categorical_dtype(values): + if not isinstance(values, pd.CategoricalDtype): return values, False else: # is_categorical_dtype(values) color_key = f"{value_to_plot}_colors" diff --git a/scFates/plot/utils.py b/scFates/plot/utils.py index 9365e43..af0bd08 100644 --- a/scFates/plot/utils.py +++ b/scFates/plot/utils.py @@ -129,13 +129,11 @@ def make_projection_available(projection): def is_categorical(data, c=None): - from pandas.api.types import is_categorical_dtype as cat - if c is None: - return cat(data) # if data is categorical/array + return isinstance(data, pd.CategoricalDtype) # if data is categorical/array if not is_view(data): # if data is anndata view strings_to_categoricals(data) - return isinstance(c, str) and c in data.obs.keys() and cat(data.obs[c]) + return isinstance(c, str) and c in data.obs.keys() and isinstance(data.obs[c], pd.CategoricalDtype) def is_view(adata): diff --git a/scFates/tests/test_w_plots.py b/scFates/tests/test_w_plots.py index f298e46..ddba971 100644 --- a/scFates/tests/test_w_plots.py +++ b/scFates/tests/test_w_plots.py @@ -2,6 +2,7 @@ import scanpy as sc import numpy as np import pandas as pd +import matplotlib.pyplot as plt def test_pipeline(): @@ -80,6 +81,8 @@ def test_pipeline(): scf.pl.trajectory(adata_2, color_seg="milestones", arrows=True) scf.pl.trajectory(adata_2, color_seg="seg", arrows=True) + plt.close() + df = scf.tl.getpath(adata, root_milestone="43", milestones=["18"]) adata.obsm["X_umap3d"] = np.concatenate( @@ -89,6 +92,8 @@ def test_pipeline(): scf.pl.trajectory_3d(adata) scf.pl.trajectory_3d(adata, color="seg") + plt.close() + adata_s1 = scf.tl.subset_tree( adata, root_milestone="88", milestones=["18"], mode="substract", copy=True ) @@ -100,7 +105,7 @@ def test_pipeline(): adata_lim = scf.tl.subset_tree(adata, t_max=adata.obs.t.max() / 4 * 3, copy=True) - scf.tl.test_association(adata, n_jobs=2) + scf.tl.test_association(adata, n_jobs=1) scf.tl.test_association(adata, A_cut=0.3, reapply_filters=True) scf.tl.test_association(adata_2, layer="scaled", A_cut=0.3) nsigni = adata.var.signi.sum() @@ -148,6 +153,7 @@ def test_pipeline(): plot_emb=False, ordering="quantile", ) + plt.close() scf.pl.trends( adata, @@ -158,6 +164,7 @@ def test_pipeline(): ordering="pearson", show=False, ) + plt.close() scf.pl.single_trend(adata, feature=adata.var_names[0], color_exp="k") @@ -173,15 +180,15 @@ def test_pipeline(): ) scf.tl.test_fork( - adata, layer="scaled", root_milestone="43", milestones=["24", "18"], n_jobs=2 + adata, layer="scaled", root_milestone="43", milestones=["24", "18"], n_jobs=1 ) - scf.tl.test_fork(adata, root_milestone="43", milestones=["24", "18"], n_jobs=2) + scf.tl.test_fork(adata, root_milestone="43", milestones=["24", "18"], n_jobs=1) signi_fdr_nonscaled = adata.uns["43->24<>18"]["fork"].signi_fdr.sum() adata.uns["43->24<>18"]["fork"]["fdr"] = 0.02 scf.pl.test_fork(adata, root_milestone="43", milestones=["24", "18"]) scf.tl.test_fork( - adata, root_milestone="43", milestones=["24", "18"], n_jobs=2, rescale=True + adata, root_milestone="43", milestones=["24", "18"], n_jobs=1, rescale=True ) signi_fdr_rescaled = adata.uns["43->24<>18"]["fork"].signi_fdr.sum() diff --git a/scFates/tools/bifurcation_tools.py b/scFates/tools/bifurcation_tools.py index a177bfc..8629a66 100644 --- a/scFates/tools/bifurcation_tools.py +++ b/scFates/tools/bifurcation_tools.py @@ -42,7 +42,7 @@ def test_fork( copy: bool = False, ): - """\ + """ Test for branch differential gene expression and differential upregulation from progenitor to terminal state. First, differential gene expression between two branches is performed. The following model is used: @@ -154,7 +154,7 @@ def get_branches(m): return ddf brcells = pd.concat([get_branches(m) for m in milestones]) - brcells = brcells.loc[brcells.index.drop_duplicates(False)] + brcells = brcells.loc[brcells.index.drop_duplicates(keep=False)] matw = None if matw is None: @@ -324,7 +324,7 @@ def gt_fun(data): tmin = np.min([sdf.loc[sdf.i == i, "t"].max() for i in sdf.i.unique()]) sdf = sdf.loc[sdf.t <= tmin] - Amps = sdf.groupby("i").apply(lambda x: np.mean(x.exp)) + Amps = sdf.groupby("i")['exp'].mean() res = Amps - Amps.max() res["de_p"] = rmgcv.summary_gam(m)[3][1] @@ -351,7 +351,7 @@ def branch_specific( copy: bool = False, ): - """\ + """ Assign genes differentially expressed between two post-bifurcation branches. Parameters @@ -439,7 +439,7 @@ def activation( layer=None, ): - """\ + """ Identify pseudotime of activation of branch-specififc features. This aims in classifying the genes according to their their activation timing @@ -522,7 +522,7 @@ def activation_map(m): df = adata.obs.loc[:, ["t", "seg"]] else: df = adata.uns["pseudotime_list"][str(m)] - acti = pd.Series(0, index=stats.index) + acti = pd.Series(0.0, index=stats.index) for leave in leaves: subtree = getpath(img, root, graph["tips"], leave, graph, df).sort_values( @@ -617,9 +617,9 @@ def get_activation(data): subtree = data[0] subtree["exp"] = data[1] # load parameters - deriv_cut = subtree["deriv_cut"][0] - nwin = subtree["nwin"][0] - steps = subtree["steps"][0] + deriv_cut = subtree["deriv_cut"].iloc[0] + nwin = subtree["nwin"].iloc[0] + steps = subtree["steps"].iloc[0] wf = warnings.filters.copy() warnings.filterwarnings("ignore") @@ -676,7 +676,7 @@ def activation_lm( layer=None, ): - """\ + """ A more robust version of `tl.activation`. This is considered to be a more robust version of :func:`scFates.tl.activation`. diff --git a/scFates/tools/dendrogram.py b/scFates/tools/dendrogram.py index d3df06a..32146d4 100644 --- a/scFates/tools/dendrogram.py +++ b/scFates/tools/dendrogram.py @@ -96,7 +96,7 @@ def dendrogram(adata: anndata.AnnData, crowdedness: float = 1, n_jobs: int = 1): adata.obsm["X_dendro"] = dend.loc[adata.obs_names].values # generate segments - newseg = newseg[newseg.argsort()] + newseg = newseg.iloc[newseg.argsort()] segments = [] for i, n in enumerate(newseg.index): @@ -264,7 +264,7 @@ def _group_longform(self, vals, grouper, order): """Group a long-form variable by another with correct order.""" # Group the val data - grouped_vals = vals.groupby(grouper) + grouped_vals = vals.groupby(grouper,observed=False) out_data = [] for g in order: try: diff --git a/scFates/tools/fit.py b/scFates/tools/fit.py index 1a2621a..efe5757 100644 --- a/scFates/tools/fit.py +++ b/scFates/tools/fit.py @@ -198,7 +198,7 @@ def fit( def gt_fun(data): sdf = data[0] sdf["exp"] = data[1] - gamma = sdf["gamma"][0] + gamma = sdf["gamma"].iloc[0] global rmgcv global rstats diff --git a/scFates/tools/graph_operations.py b/scFates/tools/graph_operations.py index 103d033..53bcc62 100644 --- a/scFates/tools/graph_operations.py +++ b/scFates/tools/graph_operations.py @@ -1,6 +1,7 @@ from typing import Optional, Union, Iterable from typing_extensions import Literal from anndata import AnnData +import anndata as ad import numpy as np import pandas as pd from pandas import DataFrame @@ -433,9 +434,7 @@ def attach_tree( newcells = np.concatenate([adata.obs_names, adata_branch.obs_names]) - adata = adata.concatenate( - adata_branch, batch_key=None, index_unique=None, uns_merge="first", join="outer" - ) + adata = ad.concat([adata,adata_branch],uns_merge="first", join="outer") logg.info(" tree refitting") use_rep = graph["use_rep"] diff --git a/scFates/tools/linearity_deviation.py b/scFates/tools/linearity_deviation.py index b7467b8..d19d6ae 100644 --- a/scFates/tools/linearity_deviation.py +++ b/scFates/tools/linearity_deviation.py @@ -93,7 +93,7 @@ def lindev_map(m): t_perc = [cells.t.quantile(p / 100) for p in percentiles] - df[name + "_lindev_sel"] = np.nan + df[name + "_lindev_sel"] = 'none' df.loc[ adata.obs_names.isin(cells.index), name + "_lindev_sel" ] = "putative bridge" diff --git a/scFates/tools/pseudotime.py b/scFates/tools/pseudotime.py index 11acb92..13dfe53 100644 --- a/scFates/tools/pseudotime.py +++ b/scFates/tools/pseudotime.py @@ -290,7 +290,7 @@ def map_on_edges(v): if vcells.shape[0] > 0: nv = np.array(g.neighborhood(v, order=1))[1:] - nvd = g.shortest_paths(v, nv, weights=g.es["weight"]) + nvd = g.distances(v, nv, weights=g.es["weight"]) ndf = pd.DataFrame( { @@ -314,7 +314,7 @@ def map_on_edges(v): for i in range(ndf.shape[0]) ] - ndf["seg"] = 0 + ndf["seg"] = '0' isinfork = (graph["pp_info"].loc[ndf.v0, "PP"].isin(graph["forks"])).values ndf.loc[isinfork, "seg"] = ( graph["pp_info"].loc[ndf.loc[isinfork, "v1"], "seg"].values @@ -332,7 +332,7 @@ def map_on_edges(v): df.sort_values("cell", inplace=True) # df.index = graph["cells_fitted"] - df["edge"] = df.apply(lambda x: str(int(x[1])) + "|" + str(int(x[2])), axis=1) + df["edge"] = df.apply(lambda x: str(int(x.iloc[1])) + "|" + str(int(x.iloc[2])), axis=1) df.drop(["cell", "v0", "v1", "d"], axis=1, inplace=True) @@ -417,11 +417,11 @@ def unroll_circle(adata: AnnData, copy: bool = False): adata = adata.copy() if copy else adata pp_seg = adata.uns["graph"]["pp_seg"] - adata.obs.t[adata.obs.seg == "0"] = ( + adata.obs.loc[adata.obs.seg == "0",'t'] = ( -adata.obs.t[adata.obs.seg == "0"] + adata.obs.t[adata.obs.seg == "0"].max() ) - adata.obs.t[adata.obs.seg == "0"] = ( + adata.obs.loc[adata.obs.seg == "0",'t'] = ( adata.obs.t[adata.obs.seg == "0"] + adata.obs.t[adata.obs.seg == "0"].max() ) @@ -452,7 +452,7 @@ def unroll_circle(adata: AnnData, copy: bool = False): "n": 0, "from": a, "to": b, - "d": g.shortest_paths(a, b, weights="weight")[0][0], + "d": g.distances(a, b, weights="weight")[0][0], }, index=[0], ) diff --git a/scFates/tools/root.py b/scFates/tools/root.py index fd998f9..be31a58 100644 --- a/scFates/tools/root.py +++ b/scFates/tools/root.py @@ -124,9 +124,9 @@ def root( sub_g = g.vs.select(group=0).subgraph() a, b = np.argwhere(np.array(sub_g.degree()) == 1).ravel() - dst_0 = sub_g.shortest_paths(a, b, weights="weight")[0][0] + dst_0 = sub_g.distances(a, b, weights="weight")[0][0] a, b = sub_g.vs["label"][a], sub_g.vs["label"][b] - dst_1 = g.shortest_paths(root, furthest, weights="weight")[0][0] + dst_1 = g.distances(root, furthest, weights="weight")[0][0] pp_seg = pd.concat( [ pd.Series([0, a, b, dst_0], index=["n", "from", "to", "d"]), @@ -290,7 +290,7 @@ def roots(adata: AnnData, roots, meeting, copy: bool = False): nodes = np.argwhere( np.apply_along_axis(arr=(csr > 0).todense(), axis=0, func1d=np.sum) != 2 ).flatten() - pp_seg = pd.DataFrame(columns=["n", "from", "to", "d"]) + pp_seg = [] for node1, node2 in itertools.combinations(nodes, 2): paths12 = g.get_shortest_paths(node1, node2) paths12 = np.array([val for sublist in paths12 for val in sublist]) @@ -300,19 +300,19 @@ def roots(adata: AnnData, roots, meeting, copy: bool = False): path_root = root_dist_matrix[[node1, node2]] fro = fromto[np.argmin(path_root)] to = fromto[np.argmax(path_root)] - pp_info.loc[paths12, "seg"] = pp_seg.shape[0] + 1 - pp_seg = pp_seg.append( + pp_info.loc[paths12, "seg"] = len(pp_seg) + 1 + pp_seg.append( pd.DataFrame( { - "n": pp_seg.shape[0] + 1, + "n": len(pp_seg) + 1, "from": fro, "to": to, "d": shortest_path(csr, directed=False, indices=fro)[to], }, - index=[pp_seg.shape[0] + 1], + index=[len(pp_seg) + 1], ) ) - + pp_seg = pd.concat(pp_seg, axis=0) pp_seg["n"] = pp_seg["n"].astype(int).astype(str) pp_seg["n"] = pp_seg["n"].astype(int).astype(str) diff --git a/scFates/tools/utils.py b/scFates/tools/utils.py index ccab3b9..c13d660 100644 --- a/scFates/tools/utils.py +++ b/scFates/tools/utils.py @@ -12,12 +12,12 @@ def get_X(adata, cells, genes, layer, togenelist=False): if layer is None: if sparse.issparse(adata.X): - X = adata[cells, genes].X.A + X = adata[cells, genes].X.toarray() else: X = adata[cells, genes].X else: if sparse.issparse(adata.layers[layer]): - X = adata[cells, genes].layers[layer].A + X = adata[cells, genes].layers[layer].toarray() else: X = adata[cells, genes].layers[layer]