Skip to content

Commit 91422a2

Browse files
Support for reading Schema 4.6 values from crossref
1 parent 06cc3ac commit 91422a2

File tree

4 files changed

+260
-3
lines changed

4 files changed

+260
-3
lines changed

lib/bolognese/readers/crossref_reader.rb

+37-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ module Bolognese
44
module Readers
55
module CrossrefReader
66
# CrossRef types from https://api.crossref.org/types
7+
8+
CR_TO_DC_CONTRIBUTOR_TYPES = {
9+
"editor" => "Editor",
10+
"translator" => "Translator",
11+
}
12+
713
def get_crossref(id: nil, **options)
814
return { "string" => nil, "state" => "not_found" } unless id.present?
915

@@ -138,7 +144,7 @@ def read_crossref(string: nil, **options)
138144

139145
state = meta.present? || read_options.present? ? "findable" : "not_found"
140146

141-
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata))
147+
related_identifiers = Array.wrap(crossref_is_part_of(journal_metadata)) + Array.wrap(crossref_references(bibliographic_metadata)) + Array.wrap(crossref_has_translation(program_metadata)) + Array.wrap(crossref_is_translation_of(program_metadata))
142148

143149
container = if journal_metadata.present?
144150
issn = normalize_issn(journal_metadata.to_h.fetch("issn", nil))
@@ -187,7 +193,7 @@ def read_crossref(string: nil, **options)
187193
"titles" => titles,
188194
"identifiers" => identifiers,
189195
"creators" => crossref_people(bibliographic_metadata, "author"),
190-
"contributors" => crossref_people(bibliographic_metadata, "editor"),
196+
"contributors" => crossref_people(bibliographic_metadata, "editor") + crossref_people(bibliographic_metadata, "translator"),
191197
"funding_references" => crossref_funding_reference(program_metadata),
192198
"publisher" => publisher,
193199
"container" => container,
@@ -276,13 +282,15 @@ def crossref_people(bibliographic_metadata, contributor_role)
276282
end
277283
end.compact
278284

285+
contributor_type = CR_TO_DC_CONTRIBUTOR_TYPES[a["contributor_role"]]
286+
279287
{ "nameType" => "Personal",
280288
"nameIdentifiers" => name_identifiers,
281289
"name" => [family_name, given_name].compact.join(", "),
282290
"givenName" => given_name,
283291
"familyName" => family_name,
284292
"affiliation" => affiliation.presence,
285-
"contributorType" => contributor_role == "editor" ? "Editor" : nil }.compact
293+
"contributorType" => contributor_type }.compact
286294
else
287295
{ "nameType" => "Organizational",
288296
"name" => a["name"] || a["__content__"] }
@@ -362,6 +370,32 @@ def crossref_references(bibliographic_metadata)
362370
end
363371
end.compact.unwrap
364372
end
373+
374+
def crossref_has_translation(program_metadata)
375+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
376+
Array.wrap(refs).select { |a| a["intra_work_relation"]["relationship_type"] == "hasTranslation" }.map do |c|
377+
if c["intra_work_relation"]["identifier_type"] == "doi"
378+
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
379+
"relationType" => "HasTranslation",
380+
"relatedIdentifierType" => "DOI" }.compact
381+
else
382+
nil
383+
end
384+
end.compact.unwrap
385+
end
386+
387+
def crossref_is_translation_of(program_metadata)
388+
refs = program_metadata.dig("related_item") if program_metadata.is_a?(Hash)
389+
Array.wrap(refs).select { |a| a["intra_work_relation"]["relationship_type"] == "isTranslationOf" }.map do |c|
390+
if c["intra_work_relation"]["identifier_type"] == "doi"
391+
{ "relatedIdentifier" => parse_attributes(c["intra_work_relation"]).downcase,
392+
"relationType" => "IsTranslationOf",
393+
"relatedIdentifierType" => "DOI" }.compact
394+
else
395+
nil
396+
end
397+
end.compact.unwrap
398+
end
365399
end
366400
end
367401
end

spec/fixtures/crossref.xml

+18
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@
6464
<surname>Hardtke</surname>
6565
<affiliation>Department of Plant Molecular Biology, University of Lausanne, Lausanne, Switzerland</affiliation>
6666
</person_name>
67+
<person_name contributor_role="editor" sequence="additional">
68+
<given_name>Ashwini</given_name>
69+
<surname>Sukale</surname>
70+
<affiliation>DataCite</affiliation>
71+
</person_name>
72+
<person_name contributor_role="translator" sequence="additional">
73+
<given_name>Cody</given_name>
74+
<surname>Ross</surname>
75+
<affiliation>DataCite</affiliation>
76+
</person_name>
6777
</contributors>
6878
<jats:abstract xmlns:jats="http://www.ncbi.nlm.nih.gov/JATS1">
6979
<jats:p>Among various advantages, their small size makes model organisms preferred subjects of investigation. Yet, even in model systems detailed analysis of numerous developmental processes at cellular level is severely hampered by their scale. For instance, secondary growth of Arabidopsis hypocotyls creates a radial pattern of highly specialized tissues that comprises several thousand cells starting from a few dozen. This dynamic process is difficult to follow because of its scale and because it can only be investigated invasively, precluding comprehensive understanding of the cell proliferation, differentiation, and patterning events involved. To overcome such limitation, we established an automated quantitative histology approach. We acquired hypocotyl cross-sections from tiled high-resolution images and extracted their information content using custom high-throughput image processing and segmentation. Coupled with automated cell type recognition through machine learning, we could establish a cellular resolution atlas that reveals vascular morphodynamics during secondary growth, for example equidistant phloem pole formation.</jats:p>
@@ -153,6 +163,14 @@
153163
<rel:description>Data from: Automated quantitative histology reveals vascular morphodynamics during Arabidopsis hypocotyl secondary growth</rel:description>
154164
<rel:inter_work_relation identifier-type="doi" relationship-type="isSupplementedBy">10.5061/dryad.b835k</rel:inter_work_relation>
155165
</rel:related_item>
166+
<rel:related_item>
167+
<rel:description>Portuguese translation of an article</rel:description>
168+
<rel:intra_work_relation relationship-type="isTranslationOf" identifier-type="doi">10.5555/original_language</rel:intra_work_relation>
169+
</rel:related_item>
170+
<rel:related_item>
171+
<rel:description>Spanish translation of an article</rel:description>
172+
<rel:intra_work_relation relationship-type="hasTranslation" identifier-type="doi">10.5555/other_language</rel:intra_work_relation>
173+
</rel:related_item>
156174
</rel:program>
157175
<archive_locations>
158176
<archive name="CLOCKSS" />
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<crossref_result xmlns="http://www.crossref.org/qrschema/3.0" version="3.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.crossref.org/qrschema/3.0 http://www.crossref.org/schemas/crossref_query_output3.0.xsd">
3+
<query_result>
4+
<head>
5+
<doi_batch_id>none</doi_batch_id>
6+
</head>
7+
<body>
8+
<query status="resolved">
9+
<doi type="journal_article">10.7554/eLife.01567</doi>
10+
<crm-item name="publisher-name" type="string">eLife Sciences Publications, Ltd</crm-item>
11+
<crm-item name="prefix-name" type="string">eLife Sciences Publications, Ltd.</crm-item>
12+
<crm-item name="member-id" type="number">4374</crm-item>
13+
<crm-item name="citation-id" type="number">67124617</crm-item>
14+
<crm-item name="journal-id" type="number">189365</crm-item>
15+
<crm-item name="deposit-timestamp" type="number">20180823133646</crm-item>
16+
<crm-item name="owner-prefix" type="string">10.7554</crm-item>
17+
<crm-item name="last-update" type="date">2018-08-23T13:41:49Z</crm-item>
18+
<crm-item name="created" type="date">2014-02-11T16:29:04Z</crm-item>
19+
<crm-item name="citedby-count" type="number">13</crm-item>
20+
<doi_record>
21+
<crossref xmlns="http://www.crossref.org/xschema/1.1" xsi:schemaLocation="http://www.crossref.org/xschema/1.1 http://doi.crossref.org/schemas/unixref1.1.xsd">
22+
<journal>
23+
<journal_metadata language="en">
24+
<full_title>Journal of Metadata Perfection</full_title>
25+
<abbrev_title>JOMPer</abbrev_title>
26+
<doi_data>
27+
<doi>10.32013/487529</doi>
28+
<resource>https://www.crossref.org/jomper</resource>
29+
</doi_data>
30+
</journal_metadata>
31+
<journal_issue>
32+
<publication_date media_type="online">
33+
<month>08</month>
34+
<day>13</day>
35+
<year>2008</year>
36+
</publication_date>
37+
<publication_date media_type="print">
38+
<month>08</month>
39+
<day>14</day>
40+
<year>2008</year>
41+
</publication_date>
42+
<journal_volume>
43+
<volume>5</volume>
44+
</journal_volume>
45+
<issue>11</issue>
46+
</journal_issue>
47+
<journal_article publication_type="full_text">
48+
<titles>
49+
<title>Quand vos meilleures métadonnées ne suffisent pas: travailler avec une spécification imparfaite</title>
50+
<original_language_title language="en">When your best metadata isn't good enough: working with an imperfect specification </original_language_title>
51+
</titles>
52+
<contributors>
53+
<person_name sequence="first" contributor_role="author">
54+
<given_name>Minerva</given_name>
55+
<surname>Housecat</surname>
56+
<affiliation>Crossref University</affiliation>
57+
<ORCID authenticated="true">https://orcid.org/0000-0002-4011-3590</ORCID>
58+
</person_name>
59+
<person_name sequence="additional" contributor_role="author">
60+
<given_name>Josiah</given_name>
61+
<surname>Carberry</surname>
62+
<affiliation>Brown University</affiliation>
63+
<ORCID authenticated="true">https://orcid.org/0000-0002-1825-0097</ORCID>
64+
</person_name>
65+
</contributors>
66+
67+
<publication_date media_type="online">
68+
<month>08</month>
69+
<day>13</day>
70+
<year>2018</year>
71+
</publication_date>
72+
<publication_date media_type="print">
73+
<month>08</month>
74+
<day>14</day>
75+
<year>2018</year>
76+
</publication_date>
77+
<acceptance_date>
78+
<month>05</month>
79+
<day>21</day>
80+
<year>2018</year>
81+
</acceptance_date>
82+
<pages>
83+
<first_page>1</first_page>
84+
<last_page>3</last_page>
85+
</pages>
86+
<publisher_item>
87+
<item_number item_number_type="article_number">2347854-0</item_number>
88+
</publisher_item>
89+
<program xmlns="http://www.crossref.org/relations.xsd">
90+
<related_item>
91+
<description>English language version</description>
92+
<intra_work_relation relationship-type="hasTranslation" identifier-type="doi"
93+
>10.32013/zLl10OQ</intra_work_relation>
94+
</related_item>
95+
<related_item>
96+
<description>Portuguese translation of an article</description>
97+
<intra_work_relation relationship-type="isTranslationOf" identifier-type="doi">10.5555/original_language</intra_work_relation>
98+
</related_item>
99+
</program>
100+
<archive_locations>
101+
<archive name="CLOCKSS"/>
102+
<archive name="Internet Archive"/>
103+
<archive name="Portico"/>
104+
<archive name="KB"/>
105+
</archive_locations>
106+
<doi_data>
107+
<doi>10.32013/4859104</doi>
108+
<resource>https://www.crossref.org/xml-samples/</resource>
109+
<collection property="crawler-based">
110+
<item crawler="iParadigms">
111+
<resource>https://www.crossref.org/faqs.html</resource>
112+
</item>
113+
</collection>
114+
<collection property="text-mining">
115+
<item>
116+
<resource content_version="vor" mime_type="text/xml"
117+
>https://www.crossref.org/example.xml</resource>
118+
</item>
119+
</collection>
120+
<collection property="link-header">
121+
<item link_header_relationship="dul">
122+
<resource>http://www.crossref.org/exampleDULendpoint</resource>
123+
</item>
124+
</collection>
125+
</doi_data>
126+
<citation_list>
127+
<citation key="ref1">
128+
<doi>10.1101/144147</doi>
129+
<unstructured_citation>BioSharing: Harnessing Metadata Standards For The
130+
Data Commons. Susanna-Assunta Sansone, Alejandra Gonzalez-Beltran,
131+
Philippe Rocca-Serra, PeterMcQuilton, Massimiliano Izzo, Allyson Lister,
132+
Milo Thurston. bioRxiv 144147</unstructured_citation>
133+
</citation>
134+
<citation key="ref2">
135+
<doi>10.1515/jdis-2017-0012</doi>
136+
<unstructured_citation>Greenberg, J. (2017). Big Metadata, Smart Metadata,
137+
and Metadata Capital: Toward Greater Synergy Between Data Science and
138+
Metadata, Journal of Data and Information Science, 2(3), 19-36.
139+
</unstructured_citation>
140+
</citation>
141+
<citation key="ref3">
142+
<doi>10.32013/s4947892</doi>
143+
</citation>
144+
<citation key="ref4">
145+
<doi>10.1108/LHTN-12-2016-0059</doi>
146+
<unstructured_citation>Jung ran Park, Yuji Tosaka, (2017) “Emerging
147+
information standards and technologies: cataloging and metadata
148+
professionals’ perspectives", Library Hi Tech News, Vol. 34 Issue: 4,
149+
pp.22-26</unstructured_citation>
150+
</citation>
151+
<citation key="ref5">
152+
<journal_title>Information Technology and Libraries</journal_title>
153+
<author>Park</author>
154+
<volume>29</volume>
155+
<issue>3</issue>
156+
<first_page>104</first_page>
157+
<cYear>2010</cYear>
158+
<doi>10.6017/ital.v29i3.3136</doi>
159+
<article_title>Metadata creation practices in digital repositories and collections: Schemata, selection criteria, and interoperability</article_title>
160+
</citation>
161+
<citation key="ref6">
162+
<unstructured_citation>Riley, J. (2017). Understanding metadata.
163+
Bethesda,MD: NISO Press.</unstructured_citation>
164+
</citation>
165+
</citation_list>
166+
</journal_article>
167+
</journal>
168+
169+
</crossref>
170+
</doi_record>
171+
</query>
172+
</body>
173+
</query_result>
174+
</crossref_result>

spec/readers/crossref_reader_spec.rb

+31
Original file line numberDiff line numberDiff line change
@@ -1098,5 +1098,36 @@
10981098
expect(subject.agency).to eq("crossref")
10991099
expect(subject.state).to eq("not_found")
11001100
end
1101+
1102+
it "with Schema 4.6 Translator contributor" do
1103+
input = fixture_path + 'crossref.xml'
1104+
subject = Bolognese::Metadata.new(input: input)
1105+
1106+
expect(subject.contributors).to eq([
1107+
{"givenName"=>"Ashwini", "familyName"=>"Sukale", "name"=>"Sukale, Ashwini", "nameType"=>"Personal", "affiliation"=>[{"name"=>"DataCite"}], "contributorType"=>"Editor"},
1108+
{"givenName"=>"Cody", "familyName"=>"Ross", "name"=>"Ross, Cody", "nameType"=>"Personal", "affiliation"=>[{"name"=>"DataCite"}], "contributorType"=>"Translator"},
1109+
])
1110+
end
1111+
1112+
it "with Schema 4.6 HasTranslation and IsTranslationOf" do
1113+
input = fixture_path + 'crossref_schema_4.6_values.xml'
1114+
subject = Bolognese::Metadata.new(input: input)
1115+
1116+
expect(subject.related_identifiers.count).to eq(7)
1117+
expect(subject.related_identifiers).to include(
1118+
{
1119+
"relatedIdentifier"=>"10.32013/zll10oq",
1120+
"relatedIdentifierType"=>"DOI",
1121+
"relationType"=>"HasTranslation",
1122+
}
1123+
)
1124+
expect(subject.related_identifiers).to include(
1125+
{
1126+
"relatedIdentifier"=>"10.5555/original_language",
1127+
"relatedIdentifierType"=>"DOI",
1128+
"relationType"=>"IsTranslationOf",
1129+
}
1130+
)
1131+
end
11011132
end
11021133
end

0 commit comments

Comments
 (0)