Skip to content

Commit 6872632

Browse files
authored
Merge pull request #5016 from freelawproject/4977-if-a-citation-cannot-be-disambiguated-thats-ok-we-can-send-it-to-a-c-page-which-will-list-multiple-options
Link citations cannot be disambiguated
2 parents 82b75c3 + 8d01774 commit 6872632

File tree

5 files changed

+161
-10
lines changed

5 files changed

+161
-10
lines changed

cl/citations/annotate_citations.py

+28-5
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
import re
33
from typing import Dict, List
44

5+
from django.urls import reverse
56
from eyecite import annotate_citations, clean_text
67

7-
from cl.citations.match_citations import NO_MATCH_RESOURCE
8+
from cl.citations.match_citations import (
9+
MULTIPLE_MATCHES_RESOURCE,
10+
NO_MATCH_RESOURCE,
11+
)
812
from cl.citations.types import MatchedResourceType, SupportedCitationType
913
from cl.custom_filters.templatetags.text_filters import best_case_name
1014
from cl.lib.string_utils import trunc
@@ -55,25 +59,44 @@ def generate_annotations(
5559
:param citation_resolutions: A map of lists of citations in the opinion
5660
:return The new HTML containing citations
5761
"""
62+
from cl.opinion_page.views import make_citation_url_dict
63+
5864
annotations: List[List] = []
5965
for opinion, citations in citation_resolutions.items():
6066
if opinion is NO_MATCH_RESOURCE: # If unsuccessfully matched...
6167
annotation = [
6268
'<span class="citation no-link">',
6369
"</span>",
6470
]
65-
else: # If successfully matched...
71+
# Annotate all unmatched citations
72+
annotations.extend([[c.span()] + annotation for c in citations])
73+
elif opinion is MULTIPLE_MATCHES_RESOURCE:
74+
# Multiple matches, can't disambiguate
75+
for c in citations:
76+
# Annotate all citations can't be disambiguated to citation
77+
# lookup page
78+
kwargs = make_citation_url_dict(**c.groups)
79+
citation_url = reverse("citation_redirector", kwargs=kwargs)
80+
annotation = [
81+
'<span class="citation multiple-matches">'
82+
f'<a href="{html.escape(citation_url)}">',
83+
"</a></span>",
84+
]
85+
annotations.append([c.span()] + annotation)
86+
else:
87+
# Successfully matched citation
6688
case_name = trunc(best_case_name(opinion.cluster), 60, "...")
6789
safe_case_name = html.escape(case_name)
90+
opinion_url = html.escape(opinion.cluster.get_absolute_url())
6891
annotation = [
6992
f'<span class="citation" data-id="{opinion.pk}">'
70-
f'<a href="{opinion.cluster.get_absolute_url()}"'
93+
f'<a href="{opinion_url}"'
7194
f' aria-description="Citation for case: {safe_case_name}"'
7295
">",
7396
"</a></span>",
7497
]
75-
for c in citations:
76-
annotations.append([c.span()] + annotation)
98+
# Annotate all matched citations for the resource
99+
annotations.extend([[c.span()] + annotation for c in citations])
77100
return annotations
78101

79102

cl/citations/match_citations.py

+10
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@
2828

2929

3030
NO_MATCH_RESOURCE = Resource(case_citation(source_text="UNMATCHED_CITATION"))
31+
MULTIPLE_MATCHES_RESOURCE = Resource(
32+
case_citation(
33+
source_text="MULTIPLE_MATCHES", page="999999", volume="999999"
34+
)
35+
)
3136

3237

3338
def filter_by_matching_antecedent(
@@ -56,6 +61,11 @@ def resolve_fullcase_citation(
5661
if type(full_citation) is FullCaseCitation:
5762
db_search_results: list[Hit]
5863
db_search_results, _ = es_search_db_for_full_citation(full_citation)
64+
# If there is more than one result, return a placeholder with the
65+
# citation with multiple results
66+
if len(db_search_results) > 1:
67+
return MULTIPLE_MATCHES_RESOURCE
68+
5969
# If there is one search result, try to return it
6070
if len(db_search_results) == 1:
6171
result_id = db_search_results[0]["id"]

cl/citations/match_citations_queries.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,9 @@ def es_search_db_for_full_citation(
214214
full_citation,
215215
full_citation.citing_opinion,
216216
)
217-
return results, citation_found
218-
# Give up.
219-
return [], citation_found
217+
218+
# Return all possible results
219+
return results, citation_found
220220

221221

222222
def es_get_query_citation(
@@ -244,6 +244,7 @@ def es_get_query_citation(
244244
missing_citations.append(citation)
245245

246246
if len(citations) == 1 and matches and len(matches) == 1:
247-
# If more than one match, don't show the tip
247+
# If only one match, show the tip
248248
return matches[0], missing_citations
249-
return matches, missing_citations
249+
# No exact match, don't show the tip
250+
return None, missing_citations

cl/citations/tasks.py

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
is_parenthetical_descriptive,
1919
)
2020
from cl.citations.match_citations import (
21+
MULTIPLE_MATCHES_RESOURCE,
2122
NO_MATCH_RESOURCE,
2223
do_resolve_citations,
2324
)
@@ -165,6 +166,9 @@ def store_opinion_citations_and_update_parentheticals(
165166
# Put apart the unmatched citations
166167
unmatched_citations = citation_resolutions.pop(NO_MATCH_RESOURCE, [])
167168

169+
# Delete citations with multiple matches
170+
citation_resolutions.pop(MULTIPLE_MATCHES_RESOURCE, None)
171+
168172
# Increase the citation count for the cluster of each matched opinion
169173
# if that cluster has not already been cited by this opinion. First,
170174
# calculate a list of the IDs of every opinion whose cluster will need

cl/citations/tests.py

+113
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
get_representative_parenthetical,
4343
)
4444
from cl.citations.match_citations import (
45+
MULTIPLE_MATCHES_RESOURCE,
4546
NO_MATCH_RESOURCE,
4647
do_resolve_citations,
4748
resolve_fullcase_citation,
@@ -512,6 +513,7 @@ def setUpTestData(cls) -> None:
512513
# Courts
513514
cls.court_scotus = CourtFactory(id="scotus")
514515
court_ca1 = CourtFactory(id="ca1")
516+
cls.court_ca5 = CourtFactory(id="ca5")
515517

516518
# Citation 1
517519
cls.citation1 = CitationWithParentsFactory.create(
@@ -588,6 +590,83 @@ def setUpTestData(cls) -> None:
588590
),
589591
),
590592
)
593+
594+
cls.citation6 = CitationWithParentsFactory.create(
595+
volume="114",
596+
reporter="F.3d",
597+
page="1182",
598+
cluster=OpinionClusterFactoryWithChildrenAndParents(
599+
docket=DocketFactory(court=cls.court_ca5),
600+
case_name="Foo v. Bar",
601+
date_filed=date(1997, 4, 10),
602+
),
603+
)
604+
605+
cls.citation7 = CitationWithParentsFactory.create(
606+
volume="114",
607+
reporter="F.3d",
608+
page="1182",
609+
cluster=OpinionClusterFactoryWithChildrenAndParents(
610+
docket=DocketFactory(court=cls.court_ca5),
611+
case_name="Lorem v. Ipsum",
612+
date_filed=date(1997, 4, 8),
613+
),
614+
)
615+
616+
cls.citation8 = CitationWithParentsFactory.create(
617+
volume="1",
618+
reporter="U.S.",
619+
page="1",
620+
cluster=OpinionClusterFactoryWithChildrenAndParents(
621+
docket=DocketFactory(court=cls.court_ca5),
622+
case_name="John v. Doe",
623+
date_filed=date(1997, 4, 9),
624+
sub_opinions=RelatedFactory(
625+
OpinionWithChildrenFactory,
626+
factory_related_name="cluster",
627+
plain_text="""Lorem ipsum, 114 F.3d 1182""",
628+
),
629+
),
630+
)
631+
632+
cls.citation9 = CitationWithParentsFactory.create(
633+
volume="114",
634+
reporter="F.3d",
635+
page="1181",
636+
cluster=OpinionClusterFactoryWithChildrenAndParents(
637+
docket=DocketFactory(court=cls.court_ca5),
638+
case_name="Lorem v. Ipsum",
639+
date_filed=date(1997, 4, 8),
640+
),
641+
)
642+
643+
cls.citation10 = CitationWithParentsFactory.create(
644+
volume="114",
645+
reporter="F.3d",
646+
page="1181",
647+
cluster=OpinionClusterFactoryWithChildrenAndParents(
648+
docket=DocketFactory(court=cls.court_ca5),
649+
case_name="Lorem v. Ipsum",
650+
date_filed=date(1997, 4, 8),
651+
),
652+
)
653+
654+
cls.citation11 = CitationWithParentsFactory.create(
655+
volume="1",
656+
reporter="U.S.",
657+
page="1",
658+
cluster=OpinionClusterFactoryWithChildrenAndParents(
659+
docket=DocketFactory(court=cls.court_ca5),
660+
case_name="Foo v. Bar",
661+
date_filed=date(1997, 4, 9),
662+
sub_opinions=RelatedFactory(
663+
OpinionWithChildrenFactory,
664+
factory_related_name="cluster",
665+
plain_text="""Lorem ipsum, 114 F.3d 1182, consectetur adipiscing elit, 114 F.3d 1181""",
666+
),
667+
),
668+
)
669+
591670
call_command(
592671
"cl_index_parent_and_child_docs",
593672
search_type=SEARCH_TYPES.OPINION,
@@ -906,6 +985,40 @@ def test_citation_matching_issue621(self) -> None:
906985
results = resolve_fullcase_citation(citation)
907986
self.assertEqual(NO_MATCH_RESOURCE, results)
908987

988+
def test_citation_multiple_matches(self) -> None:
989+
"""Make sure that we can identify multiple matches for a single citation"""
990+
citation_str = "114 F.3d 1182"
991+
citation = get_citations(citation_str, tokenizer=HYPERSCAN_TOKENIZER)[
992+
0
993+
]
994+
results = resolve_fullcase_citation(citation)
995+
self.assertEqual(MULTIPLE_MATCHES_RESOURCE, results)
996+
997+
# Verify if the annotated citation is correct
998+
opinion = self.citation8.cluster.sub_opinions.all().first()
999+
get_and_clean_opinion_text(opinion)
1000+
citations = get_citations(
1001+
opinion.cleaned_text, tokenizer=HYPERSCAN_TOKENIZER
1002+
)
1003+
citation_resolutions = do_resolve_citations(citations, opinion)
1004+
new_html = create_cited_html(opinion, citation_resolutions)
1005+
1006+
expected_citation_annotation = '<pre class="inline">Lorem ipsum, </pre><span class="citation multiple-matches"><a href="/c/F.3d/114/1182/">114 F.3d 1182</a></span><pre class="inline"></pre>'
1007+
self.assertIn(expected_citation_annotation, new_html, msg="Failed!!")
1008+
1009+
# Verify if we can annotate multiple citations that can't be
1010+
# disambiguated
1011+
opinion = self.citation11.cluster.sub_opinions.all().first()
1012+
get_and_clean_opinion_text(opinion)
1013+
citations = get_citations(
1014+
opinion.cleaned_text, tokenizer=HYPERSCAN_TOKENIZER
1015+
)
1016+
self.assertEqual(len(citations), 2)
1017+
citation_resolutions = do_resolve_citations(citations, opinion)
1018+
new_html = create_cited_html(opinion, citation_resolutions)
1019+
expected_citation_annotation = '<pre class="inline">Lorem ipsum, </pre><span class="citation multiple-matches"><a href="/c/F.3d/114/1182/">114 F.3d 1182</a></span><pre class="inline">, consectetur adipiscing elit, </pre><span class="citation multiple-matches"><a href="/c/F.3d/114/1181/">114 F.3d 1181</a></span><pre class="inline"></pre>'
1020+
self.assertIn(expected_citation_annotation, new_html)
1021+
9091022
def test_citation_increment(self) -> None:
9101023
"""Make sure that found citations update the increment on the cited
9111024
opinion's citation count"""

0 commit comments

Comments
 (0)