Skip to content

Commit

Permalink
Merge pull request #195 from prasham21/browse-statements
Browse files Browse the repository at this point in the history
Add ability to browse statements underlying gene set analysis results
  • Loading branch information
bgyori authored Jan 24, 2025
2 parents f54a466 + 4cbb65e commit 16bb5bc
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 16 deletions.
2 changes: 2 additions & 0 deletions src/indra_cogex/apps/gla/gene_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def discretize_analysis():
errors=errors,
method=form.correction.data,
alpha=form.alpha.data,
minimum_evidence=form.minimum_evidence.data,
minimum_belief=form.minimum_belief.data,
go_results=results["go"],
wikipathways_results=results["wikipathways"],
reactome_results=results["reactome"],
Expand Down
25 changes: 17 additions & 8 deletions src/indra_cogex/apps/queries_web/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,29 @@
gene_expression_ns = Namespace("Gene Expression Queries", CATEGORY_DESCRIPTIONS['gene_expression'], path="/api")
go_terms_ns = Namespace("GO Terms Queries", CATEGORY_DESCRIPTIONS['go_terms'], path="/api")
clinical_trials_ns = Namespace("Clinical Trials Queries", CATEGORY_DESCRIPTIONS['clinical_trials'], path="/api")
biological_pathways_ns = Namespace("Biological Pathways Queries", CATEGORY_DESCRIPTIONS['biological_pathways'], path="/api")
biological_pathways_ns = Namespace("Biological Pathways Queries", CATEGORY_DESCRIPTIONS['biological_pathways'],
path="/api")
drug_side_effects_ns = Namespace("Drug Side Effects Queries", CATEGORY_DESCRIPTIONS['drug_side_effects'], path="/api")
ontology_ns = Namespace("Ontology Queries", CATEGORY_DESCRIPTIONS['ontology'], path="/api")
literature_metadata_ns = Namespace("Literature Metadata Queries", CATEGORY_DESCRIPTIONS['literature_metadata'], path="/api")
literature_metadata_ns = Namespace("Literature Metadata Queries", CATEGORY_DESCRIPTIONS['literature_metadata'],
path="/api")
statements_ns = Namespace("Statements Queries", CATEGORY_DESCRIPTIONS['statements'], path="/api")
drug_targets_ns = Namespace("Drug Targets Queries", CATEGORY_DESCRIPTIONS['drug_targets'], path="/api")
cell_markers_ns = Namespace("Cell Markers Queries", CATEGORY_DESCRIPTIONS['cell_markers'], path="/api")
disease_phenotypes_ns = Namespace("Disease-Phenotypes Association Queries", CATEGORY_DESCRIPTIONS['disease_phenotypes'], path="/api")
gene_disease_variant_ns = Namespace("Gene-Disease-Variant Association Queries", CATEGORY_DESCRIPTIONS['gene_disease_variant'], path="/api")
research_project_output_ns = Namespace("Project-Research Queries", CATEGORY_DESCRIPTIONS['project_research_outputs'], path="/api")
disease_phenotypes_ns = Namespace("Disease-Phenotypes Association Queries", CATEGORY_DESCRIPTIONS['disease_phenotypes'],
path="/api")
gene_disease_variant_ns = Namespace("Gene-Disease-Variant Association Queries",
CATEGORY_DESCRIPTIONS['gene_disease_variant'], path="/api")
research_project_output_ns = Namespace("Project-Research Queries", CATEGORY_DESCRIPTIONS['project_research_outputs'],
path="/api")
gene_domains_ns = Namespace("Gene Domain Queries", CATEGORY_DESCRIPTIONS['gene_domains'], path="/api")
phenotype_variant_ns = Namespace("Phenotype-Variant Association Queries", CATEGORY_DESCRIPTIONS['phenotype_variant'], path="/api")
phenotype_variant_ns = Namespace("Phenotype-Variant Association Queries", CATEGORY_DESCRIPTIONS['phenotype_variant'],
path="/api")
drug_indications_ns = Namespace("Drug Indication Queries", CATEGORY_DESCRIPTIONS['drug_indications'], path="/api")
gene_codependence_ns = Namespace("Gene Codependence Queries", CATEGORY_DESCRIPTIONS['gene_codependence'], path="/api")
enzyme_activity_ns = Namespace("Enzyme Activity Queries", CATEGORY_DESCRIPTIONS['enzyme_activity'], path="/api")
cell_line_properties_ns = Namespace("Cell Line Property Queries", CATEGORY_DESCRIPTIONS['cell_line_properties'], path="/api")
cell_line_properties_ns = Namespace("Cell Line Property Queries", CATEGORY_DESCRIPTIONS['cell_line_properties'],
path="/api")
analysis_ns = Namespace("Analysis Queries", CATEGORY_DESCRIPTIONS['analysis'], path="/api")
subnetwork_ns = Namespace("Subnetwork Queries", CATEGORY_DESCRIPTIONS['subnetwork'], path="/api")

Expand Down Expand Up @@ -320,7 +327,6 @@ def get_example_data():
}
}


examples_dict = {
"tissue": fields.List(fields.String, example=["UBERON", "UBERON:0001162"]),
"gene": {
Expand Down Expand Up @@ -402,6 +408,9 @@ def get_example_data():
"positive_genes": fields.List(fields.String, example=EXAMPLE_POSITIVE_HGNC_IDS),
"negative_genes": fields.List(fields.String, example=EXAMPLE_NEGATIVE_HGNC_IDS),
"gene_names": fields.List(fields.String, example=continuous_analysis_example_names),
"target_id": fields.String(example="hgnc:646"),
"is_downstream": fields.Boolean(example=False),
"minimum_evidence": fields.Float(example=2),
"log_fold_change": fields.List(fields.Float, example=continuous_analysis_example_data),
"species": fields.String(example="human"),
"permutations": fields.Integer(example=100),
Expand Down
165 changes: 161 additions & 4 deletions src/indra_cogex/apps/search/search.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from typing import List, Optional, Mapping, Tuple

from flask import Blueprint, render_template, request, jsonify, redirect, url_for
from flask_jwt_extended import jwt_required
Expand All @@ -8,12 +9,20 @@
from wtforms.fields.simple import BooleanField
from wtforms.validators import DataRequired

from indra_cogex.apps.utils import render_statements, format_stmts
from indra_cogex.apps.utils import render_statements
from indra_cogex.client import Neo4jClient, autoclient
from indra_cogex.client.queries import *
from indra_cogex.representation import norm_id

__all__ = ["search_blueprint"]

from indra_cogex.client.queries import enrich_statements

from indra_cogex.representation import indra_stmts_from_relations

search_blueprint = Blueprint("search", __name__, url_prefix="/search")


class SearchForm(FlaskForm):
agent_name = StringField("Agent Name", validators=[DataRequired()])
agent_role = StringField("Agent Role")
Expand All @@ -29,7 +38,7 @@ class SearchForm(FlaskForm):
submit = SubmitField("Search")


@search_blueprint.route("/", methods=['GET','POST'])
@search_blueprint.route("/", methods=['GET', 'POST'])
@jwt_required(optional=True)
def search():
stmt_types = {c.__name__ for c in get_all_descendants(Statement)}
Expand Down Expand Up @@ -105,9 +114,11 @@ def search():
stmt_types_json=stmt_types_json,
)


from flask import current_app

@search_blueprint.route("/gilda_ground", methods=["GET","POST"])

@search_blueprint.route("/gilda_ground", methods=["GET", "POST"])
@jwt_required(optional=True)
def gilda_ground_endpoint():
data = request.get_json()
Expand All @@ -122,6 +133,7 @@ def gilda_ground_endpoint():
except Exception as e:
return {"error": str(e)}, 500


def gilda_ground(agent_text):
try:
from gilda.api import ground
Expand All @@ -131,4 +143,149 @@ def gilda_ground(agent_text):
res = requests.post('http://grounding.indra.bio/ground', json={'text': agent_text})
return res.json()
except Exception as e:
return {"error": f"Grounding failed: {str(e)}"}
return {"error": f"Grounding failed: {str(e)}"}


@autoclient()
def get_ora_statements(
target_id: str,
genes: List[str],
minimum_belief: float = 0.0,
minimum_evidence: Optional[int] = None,
is_downstream: bool = False,
*,
client: Neo4jClient,
) -> Tuple[List[Statement], Mapping[int, int]]:
"""Get statements connecting input genes to target entity for ORA analysis.
Parameters
----------
target_id : str
The ID of the target entity (e.g., 'GO:0006955', 'MESH:D007239')
genes : List[str]
List of gene IDs (e.g., ['HGNC:6019', 'HGNC:11876'])
minimum_belief : float
Minimum belief score for relationships
minimum_evidence : Optional[int]
Minimum number of evidences required for a statement to be included
is_downstream : bool
Whether this is a downstream analysis
client : Neo4jClient
The Neo4j client to use for querying
Returns
-------
:
A tuple containing:
- List of INDRA statements representing the relationships
- Dictionary mapping statement hashes to their evidence counts
"""
# Normalize gene IDs
normalized_genes = [norm_id('HGNC', gene.split(':')[1]) for gene in genes]
print(f"DEBUG: Normalized genes: {normalized_genes[:5]}...")

# Handle different entity types and their relationships
namespace = target_id.split(':')[0].lower()
id_part = target_id.split(':')[1]

if namespace == 'mesh':
normalized_target = f"mesh:{id_part}"
rel_types = ["indra_rel", "has_indication"]
elif namespace == 'fplx':
normalized_target = f"fplx:{id_part}"
rel_types = ["indra_rel", "isa"]
else:
normalized_target = target_id.lower()
rel_types = ["indra_rel"]

# Main query for getting statements
query = """
MATCH p = (d:BioEntity {id: $target_id})-[r]->(u:BioEntity)
WHERE type(r) IN $rel_types
AND u.id STARTS WITH "hgnc"
AND NOT u.obsolete
AND u.id IN $genes
AND (type(r) <> 'indra_rel' OR r.belief > $minimum_belief)
WITH distinct r.stmt_hash AS hash, collect(p) as pp
RETURN pp
"""

if is_downstream:
query = """
MATCH p = (u:BioEntity)-[r]->(d:BioEntity {id: $target_id})
WHERE type(r) IN $rel_types
AND u.id STARTS WITH "hgnc"
AND NOT u.obsolete
AND u.id IN $genes
AND (type(r) <> 'indra_rel' OR r.belief > $minimum_belief)
WITH distinct r.stmt_hash AS hash, collect(p) as pp
RETURN pp
"""

params = {
"target_id": normalized_target,
"genes": normalized_genes,
"rel_types": rel_types,
"minimum_belief": minimum_belief
}
results = client.query_tx(query, **params)
flattened_rels = [client.neo4j_to_relation(i[0]) for rel in results for i in rel]

# Filter relations based on minimum_evidence
if minimum_evidence:
flattened_rels = [
rel for rel in flattened_rels
if rel.data.get("evidence_count", 0) >= minimum_evidence
]

stmts = indra_stmts_from_relations(flattened_rels, deduplicate=True)

# Enrich statements with complete evidence (no limit)
stmts = enrich_statements(
stmts,
client=client
)

# Create evidence count mapping
evidence_counts = {
stmt.get_hash(): rel.data.get("evidence_count", 0)
for rel, stmt in zip(flattened_rels, stmts)
}

return stmts, evidence_counts


@search_blueprint.route("/ora_statements/", methods=['GET'])
@jwt_required(optional=True)
def search_ora_statements():
"""Endpoint to get INDRA statements connecting input genes to a target entity."""
target_id = request.args.get("target_id")
genes = request.args.getlist("genes")
is_downstream = request.args.get("is_downstream", "").lower() == "true"

try:
minimum_evidence = int(request.args.get("minimum_evidence") or 2)
minimum_belief = float(request.args.get("minimum_belief") or 0.0)
except (ValueError, TypeError):
return jsonify({"error": "Invalid parameter values"}), 400

if not target_id or not genes:
return jsonify({"error": "target_id and genes are required"}), 400

try:
statements, evidence_counts = get_ora_statements(
target_id=target_id,
genes=genes,
minimum_belief=minimum_belief,
minimum_evidence=minimum_evidence,
is_downstream=is_downstream
)

return render_statements(
stmts=statements,
evidence_count=evidence_counts
)

except Exception as e:
print(f"Error in get_ora_statements: {str(e)}")
return jsonify({"error": str(e)}), 500
47 changes: 43 additions & 4 deletions src/indra_cogex/apps/templates/gene_analysis/discrete_results.html
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
$("#table-wikipathways").DataTable(datatablesConf);
$("#table-hpo").DataTable(datatablesConf);
{% if indra_downstream_results is not none %}
$("#table-downstream").DataTable(datatablesConf);
$("#table-upstream").DataTable(datatablesConf);
$("#indra-table-downstream").DataTable(datatablesConf);
$("#indra-table-upstream").DataTable(datatablesConf);
{% endif %}
});
</script>
Expand Down Expand Up @@ -82,6 +82,45 @@
</table>
{% endmacro %}

{% macro render_indra_table(df, table_id) -%}
{% set is_downstream = "downstream" in table_id %}
<table class="table table-hover table-striped table-ora" id="indra-{{ table_id }}" style="width: 100%;">
<thead>
<tr>
<th scope="col">CURIE</th>
<th scope="col">Name</th>
<th scope="col"><i>p</i>-value</th>
<th scope="col"><i>q</i>-value</th>
<th scope="col">Statements</th>
</tr>
</thead>
<tbody>
{% for curie, name, p, mlp, q, mlq in df.values %}
{% set gene_list = [] %}
{% for gene_id in genes.keys() %}
{% do gene_list.append("HGNC:" + gene_id) %}
{% endfor %}
<tr>
<td><a href="https://bioregistry.io/{{ curie }}" target="_blank">{{ curie }}</a></td>
<td>{{ name }}</td>
<td>{{ "{:.2e}".format(p) }}</td>
<td>{{ "{:.2e}".format(q) }}</td>
<td>
<a href="{{ url_for('search.search_ora_statements',
target_id=curie,
genes=gene_list,
minimum_evidence=minimum_evidence,
minimum_belief=minimum_belief,
is_downstream=is_downstream) }}"
target="_blank"
class="btn btn-primary btn-sm">View Statements</a>
</td>
</tr>
{% endfor %}
</tbody>
</table>
{% endmacro %}

{% block container %}
<div class="card card-body bg-light">
<h1 class="display-3">Gene List Analysis</h1>
Expand Down Expand Up @@ -195,7 +234,7 @@ <h3>Over-Representation Analysis</h3>
</p>
<p>
</p>
{{ render_table(indra_downstream_results, "table-downstream") }}
{{ render_indra_table(indra_downstream_results, "table-downstream") }}
</div>
<div class="tab-pane" id="upstream" role="tabpanel" aria-labelledby="upstream-tab">
<p>
Expand All @@ -206,7 +245,7 @@ <h3>Over-Representation Analysis</h3>
and correcting using {{ method }} and α={{ alpha }} on genes causally upstream in
one step from all entities in the INDRA Database.
</p>
{{ render_table(indra_upstream_results, "table-upstream") }}
{{ render_indra_table(indra_upstream_results, "table-upstream") }}
</div>
{% endif %}
</div>
Expand Down

0 comments on commit 16bb5bc

Please sign in to comment.