diff --git a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java index dc2521a7e..74c6dc10b 100644 --- a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java +++ b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/SolrFieldMapper.java @@ -76,8 +76,8 @@ public static List mapFieldsList(Collection ols3FieldNames) { continue; } - if (legacyFieldName.equals("_json")) { - newFields.add("_json" + suffix); + if (legacyFieldName.equals("annotations_trimmed")) { + newFields.add(prefix + "searchableAnnotationValues" + suffix); continue; } } diff --git a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java index 1d641b071..ca6d2fdaa 100644 --- a/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java +++ b/backend/src/main/java/uk/ac/ebi/spot/ols/controller/api/v1/V1SearchController.java @@ -20,6 +20,7 @@ import uk.ac.ebi.spot.ols.repository.transforms.RemoveLiteralDatatypesTransform; import uk.ac.ebi.spot.ols.repository.v1.JsonHelper; import uk.ac.ebi.spot.ols.repository.v1.V1OntologyRepository; +import uk.ac.ebi.spot.ols.repository.v1.mappers.AnnotationExtractor; import javax.servlet.http.HttpServletResponse; import java.io.IOException; @@ -27,6 +28,7 @@ import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @author Simon Jupp @@ -73,7 +75,7 @@ public void search( if (queryFields == null) { // if exact just search the supplied fields for exact matches if (exact) { - String[] fields = {"label_s", "synonym_s", "short_form_s", "obo_id_s", "iri_s", "_json"}; + String[] fields = {"label_s", "synonym_s", "short_form_s", "obo_id_s", "iri_s", "annotations_trimmed"}; solrQuery.setQuery( "((" + createUnionQuery(query.toLowerCase(), SolrFieldMapper.mapFieldsList(List.of(fields)).toArray(new String[0]), true) @@ -85,7 +87,7 @@ public void search( solrQuery.set("defType", "edismax"); solrQuery.setQuery(query); - String[] fields = {"label^5", "synonym^3", "definition", "short_form^2", "obo_id^2", "iri", "_json"}; + String[] fields = {"label^5", "synonym^3", "definition", "short_form^2", "obo_id^2", "iri", "annotations_trimmed"}; solrQuery.set("qf", String.join(" ", SolrFieldMapper.mapFieldsList(List.of(fields)))); @@ -106,7 +108,10 @@ public void search( } } - solrQuery.setFields("_json"); + if (fieldList.contains("score")) + solrQuery.setFields("_json","score"); + else + solrQuery.setFields("_json"); if (ontologies != null && !ontologies.isEmpty()) { @@ -231,11 +236,26 @@ public void search( if (fieldList.contains("synonym")) outDoc.put("synonym", JsonHelper.getStrings(json, "synonym")); if (fieldList.contains("ontology_prefix")) outDoc.put("ontology_prefix", JsonHelper.getString(json, "ontologyPreferredPrefix")); if (fieldList.contains("subset")) outDoc.put("subset", JsonHelper.getStrings(json, "http://www.geneontology.org/formats/oboInOwl#inSubset")); + if (fieldList.contains("ontology_iri")) outDoc.put("ontology_iri", JsonHelper.getStrings(json, "ontologyIri").get(0)); + if (fieldList.contains("score")) outDoc.put("score", res.get("score")); + + // Include annotations that were specified with _annotation + boolean anyAnnotations = fieldList.stream() + .anyMatch(s -> s.endsWith("_annotation")); + if (anyAnnotations) { + Stream annotationFields = fieldList.stream().filter(s -> s.endsWith("_annotation")); + Map termAnnotations = AnnotationExtractor.extractAnnotations(json); + + annotationFields.forEach(annotationName -> { + // Remove _annotation suffix to get plain annotation name + String fieldName = annotationName.replaceFirst("_annotation$", ""); + outDoc.put(annotationName, termAnnotations.get(fieldName)); + }); + } docs.add(outDoc); } - Map responseHeader = new HashMap<>(); responseHeader.put("status", 0); responseHeader.put("QTime", qr.getQTime()); diff --git a/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java b/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java index 6a2cff813..55a150bf3 100644 --- a/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java +++ b/backend/src/main/java/uk/ac/ebi/spot/ols/repository/v2/helpers/V2SearchFieldsParser.java @@ -20,6 +20,7 @@ public static void addSearchFieldsToQuery(OlsSolrQuery query, String searchField query.addSearchField("id", 1, SearchType.WHITESPACE_EDGES); query.addSearchField("oboId", 1, SearchType.WHITESPACE_EDGES); query.addSearchField("synonym", 1, SearchType.WHITESPACE_EDGES); + query.addSearchField("searchableAnnotationValues", 1, SearchType.WHITESPACE_EDGES); } else { for (ParsedField field : parseFieldsString(searchFields)) { query.addSearchField(field.property, field.weight, SearchType.CASE_INSENSITIVE_TOKENS); diff --git a/dataload/json2neo/src/main/java/OntologyWriter.java b/dataload/json2neo/src/main/java/OntologyWriter.java index bb6d649aa..30c1a4fd5 100644 --- a/dataload/json2neo/src/main/java/OntologyWriter.java +++ b/dataload/json2neo/src/main/java/OntologyWriter.java @@ -23,7 +23,9 @@ public class OntologyWriter { public static final Set PROPERTY_BLACKLIST = Set.of( // large and doesn't get queried - "appearsIn" + "appearsIn", + // all property values together, this is for solr and not useful in neo4j + "searchableAnnotationValues" ); public static final Set EDGE_BLACKLIST = Set.of( diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java index 052849e5e..275e6a511 100644 --- a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/OntologyGraph.java @@ -247,6 +247,7 @@ private String urlToFilename(String url) { long endTime = System.nanoTime(); System.out.println("load ontology: " + ((endTime - startTime) / 1000 / 1000 / 1000)); + SearchableAnnotationValuesAnnotator.annotateSearchableAnnotationValues(this); InverseOfAnnotator.annotateInverseOf(this); NegativePropertyAssertionAnnotator.annotateNegativePropertyAssertions(this); OboSynonymTypeNameAnnotator.annotateOboSynonymTypeNames(this); // n.b. this one labels axioms so must run before the ReifiedPropertyAnnotator diff --git a/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/SearchableAnnotationValuesAnnotator.java b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/SearchableAnnotationValuesAnnotator.java new file mode 100644 index 000000000..6e8d83276 --- /dev/null +++ b/dataload/rdf2json/src/main/java/uk/ac/ebi/rdf2json/annotators/SearchableAnnotationValuesAnnotator.java @@ -0,0 +1,61 @@ +package uk.ac.ebi.rdf2json.annotators; + +import uk.ac.ebi.rdf2json.OntologyGraph; +import uk.ac.ebi.rdf2json.OntologyNode; +import uk.ac.ebi.rdf2json.properties.PropertyValue; + +import java.util.ArrayList; +import java.util.List; + +import static uk.ac.ebi.rdf2json.properties.PropertyValue.Type.LITERAL; + +public class SearchableAnnotationValuesAnnotator { + + // Roughly equivalent to "annotations_trimmed" in OLS3. + // + // A field that contains a list of just the values (no predicates) of all of the "annotations" (which is not a well + // defined term, so we have to make it up) of an entity. + // + // This field is used for solr searching, so that you can search for the value of any property (regardless of how + // important OLS thinks it is), and still expect a result. + // + public static void annotateSearchableAnnotationValues(OntologyGraph graph) { + + long startTime3 = System.nanoTime(); + for(String id : graph.nodes.keySet()) { + OntologyNode c = graph.nodes.get(id); + if(c.types.contains(OntologyNode.NodeType.CLASS) || + c.types.contains(OntologyNode.NodeType.PROPERTY) || + c.types.contains(OntologyNode.NodeType.INDIVIDUAL) || + c.types.contains(OntologyNode.NodeType.ONTOLOGY)) { + + List values = new ArrayList<>(); + + for(var predicate : c.properties.getPropertyPredicates()) { + + // namespaces that are NOT considered annotations for this exercise... + // + if(predicate.startsWith("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + || predicate.startsWith("http://www.w3.org/2000/01/rdf-schema#") + || predicate.startsWith("http://www.w3.org/2002/07/owl#")) { + + continue; + } + + for(var value : c.properties.getPropertyValues(predicate)) { + if(value.getType().equals(LITERAL)) { + values.add(value); + } + } + } + + for(var value : values) { + c.properties.addProperty("searchableAnnotationValues", value); + } + } + } + + long endTime3 = System.nanoTime(); + System.out.println("annotate searchable annotation values: " + ((endTime3 - startTime3) / 1000 / 1000 / 1000)); + } +} diff --git a/dataload/solr_config/ols4_entities/conf/managed-schema.xml b/dataload/solr_config/ols4_entities/conf/managed-schema.xml index f538162e8..bed3f4ab2 100644 --- a/dataload/solr_config/ols4_entities/conf/managed-schema.xml +++ b/dataload/solr_config/ols4_entities/conf/managed-schema.xml @@ -135,7 +135,7 @@ - + diff --git a/frontend/index.html.in b/frontend/index.html.in index fd9be5325..3ae03bdc0 100644 --- a/frontend/index.html.in +++ b/frontend/index.html.in @@ -15,7 +15,7 @@