Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CAS-61 Make the APIClient return model objects #81

Merged
merged 7 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ and this project adheres to `Semantic Versioning <https://semver.org/spec/v2.0.0

Changed
~~~~~~~
- Explicitly typed all objects returned from the client

Fixed
~~~~~
- Removed unused method parameters


1.4.13 - 2024-09-06
Expand All @@ -29,7 +31,7 @@ Changed
~~~~~~~
- Added ability to render documentation locally
- PR tests will now fail when doc is invalid
- Fixed bugs documentation navigation
- Fixed bugs in documentation navigation
- Update Cell Type Ontology to the version used in cellxgene schema v5

Fixed
Expand Down
165 changes: 95 additions & 70 deletions cellarium/cas/client.py

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions cellarium/cas/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,32 @@ class Headers:
client_session_id = "x-client-session-id"
# The client action id that is used to track a user's logical action that may span multiple requests.
client_action_id = "x-client-action-id"


class CellMetadataFeatures(Enum):
"""
Represents the cell features that can be queried for in the CAS API.

"""

CAS_CELL_INDEX: str = "cas_cell_index"
CELL_TYPE: str = "cell_type"
ASSAY: str = "assay"
DISEASE: str = "disease"
DONOR_ID: str = "donor_id"
IS_PRIMARY_DATA: str = "is_primary_data"
DEVELOPMENT_STAGE: str = "development_stage"
ORGANISM: str = "organism"
SELF_REPORTED_ETHNICITY: str = "self_reported_ethnicity"
SEX: str = "sex"
SUSPENSION_TYPE: str = "suspension_type"
TISSUE: str = "tissue"
TOTAL_MRNA_UMIS: str = "total_mrna_umis"
CELL_TYPE_ONTOLOGY_TERM_ID: str = "cell_type_ontology_term_id"
ASSAY_ONTOLOGY_TERM_ID: str = "assay_ontology_term_id"
DISEASE_ONTOLOGY_TERM_ID: str = "disease_ontology_term_id"
DEVELOPMENT_STAGE_ONTOLOGY_TERM_ID: str = "development_stage_ontology_term_id"
ORGANISM_ONTOLOGY_TERM_ID: str = "organism_ontology_term_id"
SELF_REPORTED_ETHNICITY_ONTOLOGY_TERM_ID: str = "self_reported_ethnicity_ontology_term_id"
SEX_ONTOLOGY_TERM_ID: str = "sex_ontology_term_id"
TISSUE_ONTOLOGY_TERM_ID: str = "tissue_ontology_term_id"
188 changes: 188 additions & 0 deletions cellarium/cas/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import typing as t

from pydantic import BaseModel, Field


class CellTypeSummaryStatisticsResults(BaseModel):
"""
Represents the data object returned by the CAS API for nearest neighbor annotations.
"""

class DatasetStatistics(BaseModel):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possibly a dumb question: Are these classes nested because they are only used for fields of CellTypeSummaryStatisticsResults?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's exactly right

dataset_id: str = Field(
description="The ID of the dataset containing cells", examples=["a7a92fb49-50741b00a-244955d47"]
)
count_per_dataset: int = Field(description="The number of cells found in the dataset", examples=[10])
min_distance: float = Field(
description="The minimum distance between the query cell and the dataset cells",
examples=[1589.847900390625],
)
max_distance: float = Field(
description="The maximum distance between the query cell and the dataset cells",
examples=[1840.047119140625],
)
median_distance: float = Field(
description="The median distance between the query cell and the dataset cells", examples=[1791.372802734375]
)
mean_distance: float = Field(
description="The mean distance between the query cell and the dataset cells", examples=[1791.372802734375]
)

class SummaryStatistics(BaseModel):
cell_type: str = Field(description="The cell type of the cluster of cells", examples=["erythrocyte"])
cell_count: int = Field(description="The number of cells in the cluster", examples=[94])
min_distance: float = Field(
description="The minimum distance between the query cell and the cluster cells",
examples=[1589.847900390625],
)
p25_distance: float = Field(
description="The 25th percentile distance between the query cell and the cluster cells",
examples=[1664.875244140625],
)
median_distance: float = Field(
description="The median distance between the query cell and the cluster cells", examples=[1791.372802734375]
)
p75_distance: float = Field(
description="The 75th percentile distance between the query cell and the cluster cells",
examples=[1801.3585205078125],
)
max_distance: float = Field(
description="The maximum distance between the query cell and the cluster cells",
examples=[1840.047119140625],
)
dataset_ids_with_counts: t.Optional[t.List["CellTypeSummaryStatisticsResults.DatasetStatistics"]] = None

class NeighborhoodAnnotation(BaseModel):
"""
Represents the data object returned by the CAS API for a single nearest neighbor annotation.
"""

query_cell_id: str = Field(description="The ID of the querying cell", examples=["ATTACTTATTTAGTT-12311"])
matches: t.List["CellTypeSummaryStatisticsResults.SummaryStatistics"]

data: t.List["CellTypeSummaryStatisticsResults.NeighborhoodAnnotation"] = Field(description="The annotations found")


CellTypeSummaryStatisticsResults.model_rebuild()


class CellTypeOntologyAwareResults(BaseModel):
"""
Represents the data object returned by the CAS API for a ontology-aware annotations.
"""

class Match(BaseModel):
score: float = Field(description="The score of the match", examples=[0.789])
cell_type_ontology_term_id: str = Field(
description="The ontology term ID of the cell type for the match", examples=["CL:0000121"]
)
cell_type: str = Field(description="The cell type of the match", examples=["erythrocyte"])

class OntologyAwareAnnotation(BaseModel):
"""
Represents the data object returned by the CAS API for a single ontology-aware annotation.
"""

query_cell_id: str = Field(description="The ID of the querying cell", examples=["ATTACTTATTTAGTT-12311"])
matches: t.List["CellTypeOntologyAwareResults.Match"] = Field(
description="The matches found for the querying cell"
)
total_weight: float = Field(description="The total weight of the matches", examples=[11.23232])
total_neighbors: int = Field(description="The total number of neighbors matched", examples=[1023])
total_neighbors_unrecognized: int = Field(
description="The total number of neighbors that were not recognized", examples=[5]
)

data: t.List["CellTypeOntologyAwareResults.OntologyAwareAnnotation"] = Field(description="The annotations found")


CellTypeOntologyAwareResults.model_rebuild()


class MatrixQueryResults(BaseModel):
"""
Represents the data object returned by the CAS API when performing a cell matrix query
(e.g. a query of the cell database using a matrix).
"""

class Match(BaseModel):
cas_cell_index: float = Field(description="CAS-specific ID of a single cell", examples=[123])
distance: float = Field(
description="The distance between this querying cell and the found cell", examples=[0.123]
)

class MatrixQueryResult(BaseModel):
"""
Represents the data object returned by the CAS API for a single cell query.
"""

query_cell_id: str = Field(description="The ID of the querying cell", examples=["ATTACTTATTTAGTT-12311"])
neighbors: t.List["MatrixQueryResults.Match"]

data: t.List["MatrixQueryResults.MatrixQueryResult"] = Field(description="The results of the query")


MatrixQueryResults.model_rebuild()


class CellQueryResults(BaseModel):
"""
Represents the data object returned by the CAS API for a cell query.
"""

class CellariumCellMetadata(BaseModel):
cas_cell_index: int = Field(description="The CAS-specific ID of the cell", examples=[123])
cell_type: t.Optional[str] = Field(description="The cell type of the cell", examples=["enterocyte"])
assay: t.Optional[str] = Field(description="The assay used to generate the cell", examples=["10x 3' v2"])
disease: t.Optional[str] = Field(description="The disease state of the cell", examples=["glioblastoma"])
donor_id: t.Optional[str] = Field(description="The ID of the donor of the cell", examples=["H20.33.013"])
is_primary_data: t.Optional[bool] = Field(description="Whether the cell is primary data", examples=[True])
development_stage: t.Optional[str] = Field(
description="The development stage of the cell donor", examples=["human adult stage"]
)
organism: t.Optional[str] = Field(description="The organism of the cell", examples=["Homo sapiens"])
self_reported_ethnicity: t.Optional[str] = Field(
description="The self reported ethnicity of the cell donor", examples=["Japanese"]
)
sex: t.Optional[str] = Field(description="The sex of the cell donor", examples=["male"])
suspension_type: t.Optional[str] = Field(description="The cell suspension types used", examples=["nucleus"])
tissue: t.Optional[str] = Field(
description="The tissue-type that the cell was a part of", examples=["cerebellum"]
)
total_mrna_umis: t.Optional[int] = Field(
description="The count of mRNA UMIs associated with this cell", examples=[24312]
)

# Ontology term IDs for the fields
cell_type_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the type of the cell", examples=["CL:0000121"]
)
assay_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the assay used to generate the cell", examples=["EFO:0010550"]
)
disease_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the disease state of the cell", examples=["PATO:0000461"]
)
development_stage_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the development stage of the cell donor",
examples=["HsapDv:0000053"],
)
organism_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the organism of the cell", examples=["NCBITaxon:9606"]
)
self_reported_ethnicity_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the self reported ethnicity of the cell donor",
examples=["HANCESTRO:0019"],
)
sex_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the sex of the cell donor", examples=["PATO:0000384"]
)
tissue_ontology_term_id: t.Optional[str] = Field(
description="The ID used by the ontology for the tissue type that the cell was a part of",
examples=["UBERON:0002037"],
)

data: t.List["CellQueryResults.CellariumCellMetadata"] = Field(description="The metadata of the found cells")


CellQueryResults.model_rebuild()
22 changes: 12 additions & 10 deletions cellarium/cas/postprocessing/ontology_aware.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import scipy.sparse as sp
from anndata import AnnData

from cellarium.cas.models import CellTypeOntologyAwareResults

from .cell_ontology.cell_ontology_cache import CL_CELL_ROOT_NODE, CL_EUKARYOTIC_CELL_ROOT_NODE, CellOntologyCache
from .common import get_obs_indices_for_cluster

Expand All @@ -22,7 +24,7 @@


def convert_cas_ontology_aware_response_to_score_matrix(
adata: AnnData, cas_ontology_aware_response: list, cl: CellOntologyCache
adata: AnnData, cas_ontology_aware_response: CellTypeOntologyAwareResults, cl: CellOntologyCache
) -> sp.csr_matrix:
"""
Generate a sparse matrix of CAS ontology-aware scores.
Expand All @@ -35,7 +37,7 @@ def convert_cas_ontology_aware_response_to_score_matrix(
:type adata: AnnData

:param cas_ontology_aware_response: A list of CAS ontology-aware responses.
:type cas_ontology_aware_response: list
:type cas_ontology_aware_response: CellTypeOntologyAwareResults

:param cl: A CellOntologyCache object containing the cell ontology information.
:type cl: CellOntologyCache
Expand All @@ -48,27 +50,27 @@ def convert_cas_ontology_aware_response_to_score_matrix(
data = []

obs_values = adata.obs.index.values
for obs_idx, cas_cell_response in enumerate(cas_ontology_aware_response):
assert cas_cell_response["query_cell_id"] == obs_values[obs_idx]
for match in cas_cell_response["matches"]:
for obs_idx, cas_cell_response in enumerate(cas_ontology_aware_response.data):
assert cas_cell_response.query_cell_id == obs_values[obs_idx]
for match in cas_cell_response.matches:
row.append(obs_idx)
col.append(cl.cl_names_to_idx_map[match["cell_type_ontology_term_id"]])
data.append(match["score"])
col.append(cl.cl_names_to_idx_map[match.cell_type_ontology_term_id])
data.append(match.score)

n_obs = len(cas_ontology_aware_response)
n_obs = len(cas_ontology_aware_response.data)
n_cl_names = len(cl.cl_names)
return sp.coo_matrix((data, (row, col)), shape=(n_obs, n_cl_names)).tocsr()


def insert_cas_ontology_aware_response_into_adata(
cas_ontology_aware_response: list, adata: AnnData, cl: CellOntologyCache
cas_ontology_aware_response: CellTypeOntologyAwareResults, adata: AnnData, cl: CellOntologyCache
) -> None:
"""
Inserts Cellarium CAS ontology aware response into `obsm` property of a provided AnnData file as a
:class:`scipy.sparse.csr_matrix` named `cas_cl_scores`.

:param cas_ontology_aware_response: The Cellarium CAS ontology aware response.
:type cas_ontology_aware_response: list
:type cas_ontology_aware_response: CellTypeOntologyAwareResults

:param adata: The AnnData object to insert the response into.
:type adata: AnnData
Expand Down
4 changes: 1 addition & 3 deletions cellarium/cas/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,22 +372,20 @@ def get_user_quota(self) -> t.Dict[str, t.Any]:
return self.get_json(endpoint=endpoints.GET_USER_QUOTA)

def query_cells_by_ids(
self, model_name: str, cell_ids: t.List[int], metadata_feature_names: t.List[str]
self, cell_ids: t.List[int], metadata_feature_names: t.List[str]
) -> t.List[t.Dict[str, t.Any]]:
"""
Retrieve cells by their ids from Cellarium Cloud database.

Refer to API Docs:
{api_url}/api/docs#/cell-analysis/get_cells_by_ids_api_cellarium_cas_query_cells_by_ids_post

:param model_name: Name of the model to use. Model name is required to locate the correct database.
:param cell_ids: List of cell ids from Cellarium Cloud database to query by.
:param metadata_feature_names: List of metadata feature names to include in the response.

:return: List of cells with metadata.
"""
request_data = {
"model_name": model_name,
"cas_cell_ids": cell_ids,
"metadata_feature_names": metadata_feature_names,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dash.development.base_component import Component
from plotly.express.colors import sample_colorscale

from cellarium.cas.models import CellTypeOntologyAwareResults
from cellarium.cas.postprocessing import (
CAS_CL_SCORES_ANNDATA_OBSM_KEY,
CellOntologyScoresAggregationDomain,
Expand Down Expand Up @@ -175,7 +176,7 @@ class CASCircularTreePlotUMAPDashApp:
def __init__(
self,
adata: AnnData,
cas_ontology_aware_response: list,
cas_ontology_aware_response: CellTypeOntologyAwareResults,
cluster_label_obs_column: t.Optional[str] = None,
aggregation_op: CellOntologyScoresAggregationOp = CellOntologyScoresAggregationOp.MEAN,
aggregation_domain: CellOntologyScoresAggregationDomain = CellOntologyScoresAggregationDomain.OVER_THRESHOLD,
Expand Down
19 changes: 18 additions & 1 deletion docs/source/automodules/client.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,21 @@ Client
.. autoclass:: cellarium.cas.constants.CountMatrixInput
:members:
:undoc-members:
:member-order: bysource
:member-order: bysource

.. autoclass:: cellarium.cas.constants.CellMetadataFeatures
:members:
:undoc-members:
:member-order: bysource

.. autopydantic_model:: cellarium.cas.models::CellTypeSummaryStatisticsResults
:member-order: bysource

.. autopydantic_model:: cellarium.cas.models::CellTypeOntologyAwareResults
:member-order: bysource

.. autopydantic_model:: cellarium.cas.models::MatrixQueryResults
:member-order: bysource

.. autopydantic_model:: cellarium.cas.models::CellQueryResults
:member-order: bysource
10 changes: 10 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx_substitution_extensions",
"sphinxcontrib.autodoc_pydantic",
]

# Provide substitutions for common values
Expand All @@ -50,3 +51,12 @@
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = "sphinx_rtd_theme"

nitpicky = True
nitpick_ignore_regex = [
# Ignore exceptions from nested Pydantic models
(r'py:.*', r'cellarium\.cas\.models\..*'),
]

# The JSON schema is a bit much in the docs
autodoc_pydantic_model_show_json = False
Loading