Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to browse statements underlying gene set analysis results #195

Merged
merged 12 commits into from
Jan 24, 2025
Merged
2 changes: 2 additions & 0 deletions src/indra_cogex/apps/gla/gene_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def discretize_analysis():
errors=errors,
method=form.correction.data,
alpha=form.alpha.data,
minimum_evidence=form.minimum_evidence.data,
minimum_belief=form.minimum_belief.data,
go_results=results["go"],
wikipathways_results=results["wikipathways"],
reactome_results=results["reactome"],
Expand Down
25 changes: 17 additions & 8 deletions src/indra_cogex/apps/queries_web/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,29 @@
gene_expression_ns = Namespace("Gene Expression Queries", CATEGORY_DESCRIPTIONS['gene_expression'], path="/api")
go_terms_ns = Namespace("GO Terms Queries", CATEGORY_DESCRIPTIONS['go_terms'], path="/api")
clinical_trials_ns = Namespace("Clinical Trials Queries", CATEGORY_DESCRIPTIONS['clinical_trials'], path="/api")
biological_pathways_ns = Namespace("Biological Pathways Queries", CATEGORY_DESCRIPTIONS['biological_pathways'], path="/api")
biological_pathways_ns = Namespace("Biological Pathways Queries", CATEGORY_DESCRIPTIONS['biological_pathways'],
path="/api")
drug_side_effects_ns = Namespace("Drug Side Effects Queries", CATEGORY_DESCRIPTIONS['drug_side_effects'], path="/api")
ontology_ns = Namespace("Ontology Queries", CATEGORY_DESCRIPTIONS['ontology'], path="/api")
literature_metadata_ns = Namespace("Literature Metadata Queries", CATEGORY_DESCRIPTIONS['literature_metadata'], path="/api")
literature_metadata_ns = Namespace("Literature Metadata Queries", CATEGORY_DESCRIPTIONS['literature_metadata'],
path="/api")
statements_ns = Namespace("Statements Queries", CATEGORY_DESCRIPTIONS['statements'], path="/api")
drug_targets_ns = Namespace("Drug Targets Queries", CATEGORY_DESCRIPTIONS['drug_targets'], path="/api")
cell_markers_ns = Namespace("Cell Markers Queries", CATEGORY_DESCRIPTIONS['cell_markers'], path="/api")
disease_phenotypes_ns = Namespace("Disease-Phenotypes Association Queries", CATEGORY_DESCRIPTIONS['disease_phenotypes'], path="/api")
gene_disease_variant_ns = Namespace("Gene-Disease-Variant Association Queries", CATEGORY_DESCRIPTIONS['gene_disease_variant'], path="/api")
research_project_output_ns = Namespace("Project-Research Queries", CATEGORY_DESCRIPTIONS['project_research_outputs'], path="/api")
disease_phenotypes_ns = Namespace("Disease-Phenotypes Association Queries", CATEGORY_DESCRIPTIONS['disease_phenotypes'],
path="/api")
gene_disease_variant_ns = Namespace("Gene-Disease-Variant Association Queries",
CATEGORY_DESCRIPTIONS['gene_disease_variant'], path="/api")
research_project_output_ns = Namespace("Project-Research Queries", CATEGORY_DESCRIPTIONS['project_research_outputs'],
path="/api")
gene_domains_ns = Namespace("Gene Domain Queries", CATEGORY_DESCRIPTIONS['gene_domains'], path="/api")
phenotype_variant_ns = Namespace("Phenotype-Variant Association Queries", CATEGORY_DESCRIPTIONS['phenotype_variant'], path="/api")
phenotype_variant_ns = Namespace("Phenotype-Variant Association Queries", CATEGORY_DESCRIPTIONS['phenotype_variant'],
path="/api")
drug_indications_ns = Namespace("Drug Indication Queries", CATEGORY_DESCRIPTIONS['drug_indications'], path="/api")
gene_codependence_ns = Namespace("Gene Codependence Queries", CATEGORY_DESCRIPTIONS['gene_codependence'], path="/api")
enzyme_activity_ns = Namespace("Enzyme Activity Queries", CATEGORY_DESCRIPTIONS['enzyme_activity'], path="/api")
cell_line_properties_ns = Namespace("Cell Line Property Queries", CATEGORY_DESCRIPTIONS['cell_line_properties'], path="/api")
cell_line_properties_ns = Namespace("Cell Line Property Queries", CATEGORY_DESCRIPTIONS['cell_line_properties'],
path="/api")
analysis_ns = Namespace("Analysis Queries", CATEGORY_DESCRIPTIONS['analysis'], path="/api")
subnetwork_ns = Namespace("Subnetwork Queries", CATEGORY_DESCRIPTIONS['subnetwork'], path="/api")

Expand Down Expand Up @@ -320,7 +327,6 @@ def get_example_data():
}
}


examples_dict = {
"tissue": fields.List(fields.String, example=["UBERON", "UBERON:0001162"]),
"gene": {
Expand Down Expand Up @@ -402,6 +408,9 @@ def get_example_data():
"positive_genes": fields.List(fields.String, example=EXAMPLE_POSITIVE_HGNC_IDS),
"negative_genes": fields.List(fields.String, example=EXAMPLE_NEGATIVE_HGNC_IDS),
"gene_names": fields.List(fields.String, example=continuous_analysis_example_names),
"target_id": fields.String(example="hgnc:646"),
"is_downstream": fields.Boolean(example=False),
"minimum_evidence": fields.Float(example=2),
"log_fold_change": fields.List(fields.Float, example=continuous_analysis_example_data),
"species": fields.String(example="human"),
"permutations": fields.Integer(example=100),
Expand Down
165 changes: 161 additions & 4 deletions src/indra_cogex/apps/search/search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from typing import List, Optional, Mapping, Tuple

from flask import Blueprint, render_template, request, jsonify, redirect, url_for
from flask_jwt_extended import jwt_required
Expand All @@ -8,12 +9,20 @@
from wtforms.fields.simple import BooleanField
from wtforms.validators import DataRequired

from indra_cogex.apps.utils import render_statements, format_stmts
from indra_cogex.apps.utils import render_statements
from indra_cogex.client import Neo4jClient, autoclient
from indra_cogex.client.queries import *
from indra_cogex.representation import norm_id

__all__ = ["search_blueprint"]

from indra_cogex.client.queries import enrich_statements

from indra_cogex.representation import indra_stmts_from_relations

search_blueprint = Blueprint("search", __name__, url_prefix="/search")


class SearchForm(FlaskForm):
agent_name = StringField("Agent Name", validators=[DataRequired()])
agent_role = StringField("Agent Role")
Expand All @@ -29,7 +38,7 @@ class SearchForm(FlaskForm):
submit = SubmitField("Search")


@search_blueprint.route("/", methods=['GET','POST'])
@search_blueprint.route("/", methods=['GET', 'POST'])
@jwt_required(optional=True)
def search():
stmt_types = {c.__name__ for c in get_all_descendants(Statement)}
Expand Down Expand Up @@ -105,9 +114,11 @@ def search():
stmt_types_json=stmt_types_json,
)


from flask import current_app

@search_blueprint.route("/gilda_ground", methods=["GET","POST"])

@search_blueprint.route("/gilda_ground", methods=["GET", "POST"])
@jwt_required(optional=True)
def gilda_ground_endpoint():
data = request.get_json()
Expand All @@ -122,6 +133,7 @@ def gilda_ground_endpoint():
except Exception as e:
return {"error": str(e)}, 500


def gilda_ground(agent_text):
try:
from gilda.api import ground
Expand All @@ -131,4 +143,149 @@ def gilda_ground(agent_text):
res = requests.post('http://grounding.indra.bio/ground', json={'text': agent_text})
return res.json()
except Exception as e:
return {"error": f"Grounding failed: {str(e)}"}
return {"error": f"Grounding failed: {str(e)}"}


@autoclient()
def get_ora_statements(
target_id: str,
genes: List[str],
minimum_belief: float = 0.0,
minimum_evidence: Optional[int] = None,
is_downstream: bool = False,
*,
client: Neo4jClient,
) -> Tuple[List[Statement], Mapping[int, int]]:
"""Get statements connecting input genes to target entity for ORA analysis.

Parameters
----------
target_id : str
The ID of the target entity (e.g., 'GO:0006955', 'MESH:D007239')
genes : List[str]
List of gene IDs (e.g., ['HGNC:6019', 'HGNC:11876'])
minimum_belief : float
Minimum belief score for relationships
minimum_evidence : Optional[int]
Minimum number of evidences required for a statement to be included
is_downstream : bool
Whether this is a downstream analysis
client : Neo4jClient
The Neo4j client to use for querying

Returns
-------
:
A tuple containing:
- List of INDRA statements representing the relationships
- Dictionary mapping statement hashes to their evidence counts
"""
# Normalize gene IDs
normalized_genes = [norm_id('HGNC', gene.split(':')[1]) for gene in genes]
print(f"DEBUG: Normalized genes: {normalized_genes[:5]}...")

# Handle different entity types and their relationships
namespace = target_id.split(':')[0].lower()
id_part = target_id.split(':')[1]

if namespace == 'mesh':
normalized_target = f"mesh:{id_part}"
rel_types = ["indra_rel", "has_indication"]
elif namespace == 'fplx':
normalized_target = f"fplx:{id_part}"
rel_types = ["indra_rel", "isa"]
else:
normalized_target = target_id.lower()
rel_types = ["indra_rel"]

# Main query for getting statements
query = """
MATCH p = (d:BioEntity {id: $target_id})-[r]->(u:BioEntity)
WHERE type(r) IN $rel_types
AND u.id STARTS WITH "hgnc"
AND NOT u.obsolete
AND u.id IN $genes
AND (type(r) <> 'indra_rel' OR r.belief > $minimum_belief)
WITH distinct r.stmt_hash AS hash, collect(p) as pp
RETURN pp
"""

if is_downstream:
query = """
MATCH p = (u:BioEntity)-[r]->(d:BioEntity {id: $target_id})
WHERE type(r) IN $rel_types
AND u.id STARTS WITH "hgnc"
AND NOT u.obsolete
AND u.id IN $genes
AND (type(r) <> 'indra_rel' OR r.belief > $minimum_belief)
WITH distinct r.stmt_hash AS hash, collect(p) as pp
RETURN pp
"""

params = {
"target_id": normalized_target,
"genes": normalized_genes,
"rel_types": rel_types,
"minimum_belief": minimum_belief
}
results = client.query_tx(query, **params)
flattened_rels = [client.neo4j_to_relation(i[0]) for rel in results for i in rel]

# Filter relations based on minimum_evidence
if minimum_evidence:
flattened_rels = [
rel for rel in flattened_rels
if rel.data.get("evidence_count", 0) >= minimum_evidence
]

stmts = indra_stmts_from_relations(flattened_rels, deduplicate=True)

# Enrich statements with complete evidence (no limit)
stmts = enrich_statements(
stmts,
client=client
)

# Create evidence count mapping
evidence_counts = {
stmt.get_hash(): rel.data.get("evidence_count", 0)
for rel, stmt in zip(flattened_rels, stmts)
}

return stmts, evidence_counts


@search_blueprint.route("/ora_statements/", methods=['GET'])
@jwt_required(optional=True)
def search_ora_statements():
"""Endpoint to get INDRA statements connecting input genes to a target entity."""
target_id = request.args.get("target_id")
genes = request.args.getlist("genes")
is_downstream = request.args.get("is_downstream", "").lower() == "true"

try:
minimum_evidence = int(request.args.get("minimum_evidence") or 2)
minimum_belief = float(request.args.get("minimum_belief") or 0.0)
except (ValueError, TypeError):
return jsonify({"error": "Invalid parameter values"}), 400

if not target_id or not genes:
return jsonify({"error": "target_id and genes are required"}), 400

try:
statements, evidence_counts = get_ora_statements(
target_id=target_id,
genes=genes,
minimum_belief=minimum_belief,
minimum_evidence=minimum_evidence,
is_downstream=is_downstream
)

return render_statements(
stmts=statements,
evidence_count=evidence_counts
)

except Exception as e:
print(f"Error in get_ora_statements: {str(e)}")
return jsonify({"error": str(e)}), 500
47 changes: 43 additions & 4 deletions src/indra_cogex/apps/templates/gene_analysis/discrete_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
$("#table-wikipathways").DataTable(datatablesConf);
$("#table-hpo").DataTable(datatablesConf);
{% if indra_downstream_results is not none %}
$("#table-downstream").DataTable(datatablesConf);
$("#table-upstream").DataTable(datatablesConf);
$("#indra-table-downstream").DataTable(datatablesConf);
$("#indra-table-upstream").DataTable(datatablesConf);
{% endif %}
});
</script>
Expand Down Expand Up @@ -82,6 +82,45 @@
</table>
{% endmacro %}

{% macro render_indra_table(df, table_id) -%}
{% set is_downstream = "downstream" in table_id %}
<table class="table table-hover table-striped table-ora" id="indra-{{ table_id }}" style="width: 100%;">
<thead>
<tr>
<th scope="col">CURIE</th>
<th scope="col">Name</th>
<th scope="col"><i>p</i>-value</th>
<th scope="col"><i>q</i>-value</th>
<th scope="col">Statements</th>
</tr>
</thead>
<tbody>
{% for curie, name, p, mlp, q, mlq in df.values %}
{% set gene_list = [] %}
{% for gene_id in genes.keys() %}
{% do gene_list.append("HGNC:" + gene_id) %}
{% endfor %}
<tr>
<td><a href="https://bioregistry.io/{{ curie }}" target="_blank">{{ curie }}</a></td>
<td>{{ name }}</td>
<td>{{ "{:.2e}".format(p) }}</td>
<td>{{ "{:.2e}".format(q) }}</td>
<td>
<a href="{{ url_for('search.search_ora_statements',
target_id=curie,
genes=gene_list,
minimum_evidence=minimum_evidence,
minimum_belief=minimum_belief,
is_downstream=is_downstream) }}"
target="_blank"
class="btn btn-primary btn-sm">View Statements</a>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endmacro %}

{% block container %}
<div class="card card-body bg-light">
<h1 class="display-3">Gene List Analysis</h1>
Expand Down Expand Up @@ -195,7 +234,7 @@ <h3>Over-Representation Analysis</h3>
</p>
<p>
</p>
{{ render_table(indra_downstream_results, "table-downstream") }}
{{ render_indra_table(indra_downstream_results, "table-downstream") }}
</div>
<div class="tab-pane" id="upstream" role="tabpanel" aria-labelledby="upstream-tab">
<p>
Expand All @@ -206,7 +245,7 @@ <h3>Over-Representation Analysis</h3>
and correcting using {{ method }} and α={{ alpha }} on genes causally upstream in
one step from all entities in the INDRA Database.
</p>
{{ render_table(indra_upstream_results, "table-upstream") }}
{{ render_indra_table(indra_upstream_results, "table-upstream") }}
</div>
{% endif %}
</div>
Expand Down
Loading