-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Outcomment describedBy transformation RPB-225
This needs to be adjusted with regard to the strapi indexing.
- Loading branch information
Showing
15 changed files
with
118 additions
and
732 deletions.
There are no files selected for viewing
234 changes: 118 additions & 116 deletions
234
conf/hebisMarc2lobid-transformation/fix/describedBy.fix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,116 +1,118 @@ | ||
|
||
copy_field("almaMmsId", "describedBy.id") | ||
prepend("describedBy.id", "http://lobid.org/resources/") | ||
|
||
copy_field("almaMmsId", "describedBy.label") | ||
prepend("describedBy.label", "Webseite der hbz-Ressource ") | ||
|
||
add_array("describedBy.type[]", "BibliographicDescription") | ||
|
||
|
||
add_field("describedBy.inDataset.id","http://lobid.org/resources/dataset#!") | ||
|
||
add_field("describedBy.inDataset.label","lobid-resources – Der hbz-Verbundkatalog als Linked Open Data") | ||
|
||
add_array("describedBy.resultOf.type[]", "CreateAction") | ||
|
||
add_field("@createTime","$[createEndTime]") | ||
if all_match("@createTime","0") | ||
add_field("describedBy.resultOf.endTime","0000-00-00T00:00:00") | ||
else | ||
timestamp("describedBy.resultOf.endTime",format:"yyyy-MM-dd'T'HH:mm:ss", timezone:"Europe/Berlin") | ||
end | ||
|
||
|
||
add_field("describedBy.resultOf.instrument.id","https://github.com/hbz/lobid-resources") | ||
|
||
add_array("describedBy.resultOf.instrument.type[]", "SoftwareApplication") | ||
|
||
add_field("describedBy.resultOf.instrument.label","Software lobid-resources") | ||
|
||
copy_field("almaMmsId","describedBy.resultOf.object.id") | ||
prepend("describedBy.resultOf.object.id","https://lobid.org/marcxml/") | ||
|
||
# 008/00-05 has the initial cataloguing date. We test strictly if 008 only has 6 digits, sometimes records have 8 digits that are not valid. | ||
# We use MNG info as fallback. | ||
# MNG is a ALMA-specific element (MNG .b only states the indexing date into ALMA.) | ||
|
||
if any_match("008", "^\\d{6}\\D.*") # 008/00-05 is the correct form for the cataloguing date in MARC. | ||
copy_field("008","@initialCataloguingDate") | ||
substring("@initialCataloguingDate","0","6") | ||
end | ||
|
||
if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000 | ||
prepend("@initialCataloguingDate","20") | ||
elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000 | ||
prepend("@initialCataloguingDate","19") | ||
else | ||
copy_field("MNG .b","@initialCataloguingDate") | ||
end | ||
copy_field("@initialCataloguingDate","describedBy.resultOf.object.dateCreated") | ||
|
||
copy_field("MNG .d","describedBy.resultOf.object.dateModified") | ||
replace_all("describedBy.resultOf.object.dateCreated","-","") | ||
replace_all("describedBy.resultOf.object.dateCreated"," .*","") | ||
replace_all("describedBy.resultOf.object.dateCreated","c|©|\\s?|,|.|:|;|/|=","") | ||
replace_all("describedBy.resultOf.object.dateModified","-","") | ||
replace_all("describedBy.resultOf.object.dateModified"," .*","") | ||
replace_all("describedBy.resultOf.object.dateModified","c|©|\\s?|,|.|:|;|/|=","") | ||
#unless any_match("describedBy.resultOf.object.dateCreated","\\d{8}|\\d{4}") | ||
# remove_field("describedBy.resultOf.object.dateCreated") | ||
#end | ||
#unless any_match("describedBy.resultOf.object.dateModified","\\d{8}|\\d{4}") | ||
# remove_field("describedBy.resultOf.object.dateModified") | ||
#end | ||
replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3") | ||
replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3") | ||
replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01") | ||
replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01") | ||
|
||
add_array("describedBy.resultOf.object.type[]", "DataFeedItem") | ||
|
||
copy_field("almaMmsId","describedBy.resultOf.object.label") | ||
prepend("describedBy.resultOf.object.label","hbz-Ressource ") | ||
append("describedBy.resultOf.object.label"," im Exportformat MARC21 XML") | ||
|
||
add_field("describedBy.resultOf.object.inDataset.id", "http://sru.hebis.de/sru/DB=2.1?version=1.1") | ||
|
||
add_field("describedBy.resultOf.object.inDataset.label", "hbz_unioncatalog") | ||
|
||
add_array("describedBy.license[]") | ||
add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" ) | ||
add_field("describedBy.license[].$last.label","Creative Commons-Lizenz CC0 1.0 Universal" ) | ||
|
||
|
||
# TODO: It seems that there are a lot of organisations that are not in lobid, we should filter them out. | ||
|
||
# 040 - Cataloging Source (NR) - Subfield: $a (NR), $c (NR), $d (R) | ||
# ALMA has a lot of invalid repeated subfields $a | ||
|
||
do list(path: "040 ", "var":"$i") | ||
|
||
do list(path:"$i.a","var":"$j") | ||
unless exists("describedBy.resultOf.object.sourceOrganization.id") | ||
copy_field("$j", "describedBy.resultOf.object.sourceOrganization.id") | ||
end | ||
end | ||
do list(path:"$i.c","var":"$j") | ||
unless exists("describedBy.resultOf.object.provider.id") | ||
copy_field("$j", "describedBy.resultOf.object.provider.id") | ||
end | ||
end | ||
|
||
add_array("describedBy.resultOf.object.modifiedBy[]") | ||
do list(path:"$i.d", "var":"$j") | ||
copy_field("$j", "describedBy.resultOf.object.modifiedBy[].$append.id") | ||
end | ||
|
||
end | ||
|
||
call_macro("provenanceLinks",field: "describedBy.resultOf.object.sourceOrganization.id",label: "describedBy.resultOf.object.sourceOrganization.label") | ||
call_macro("provenanceLinks",field: "describedBy.resultOf.object.provider.id",label: "describedBy.resultOf.object.provider.label") | ||
do list(path:"describedBy.resultOf.object.modifiedBy[]","var":"$i") | ||
call_macro("provenanceLinks",field: "$i.id",label:"$i.label") | ||
end | ||
|
||
uniq("describedBy.resultOf.object.modifiedBy[]") | ||
nothing() # currently no transformation for describedBy is needed. | ||
|
||
# copy_field("almaMmsId", "describedBy.id") | ||
# prepend("describedBy.id", "http://lobid.org/resources/") | ||
# | ||
# copy_field("almaMmsId", "describedBy.label") | ||
# prepend("describedBy.label", "Webseite der hbz-Ressource ") | ||
# | ||
# add_array("describedBy.type[]", "BibliographicDescription") | ||
# | ||
# | ||
# add_field("describedBy.inDataset.id","http://lobid.org/resources/dataset#!") | ||
# | ||
# add_field("describedBy.inDataset.label","lobid-resources – Der hbz-Verbundkatalog als Linked Open Data") | ||
# | ||
# add_array("describedBy.resultOf.type[]", "CreateAction") | ||
# | ||
# add_field("@createTime","$[createEndTime]") | ||
# if all_match("@createTime","0") | ||
# add_field("describedBy.resultOf.endTime","0000-00-00T00:00:00") | ||
# else | ||
# timestamp("describedBy.resultOf.endTime",format:"yyyy-MM-dd'T'HH:mm:ss", timezone:"Europe/Berlin") | ||
# end | ||
# | ||
# | ||
# add_field("describedBy.resultOf.instrument.id","https://github.com/hbz/lobid-resources") | ||
# | ||
# add_array("describedBy.resultOf.instrument.type[]", "SoftwareApplication") | ||
# | ||
# add_field("describedBy.resultOf.instrument.label","Software lobid-resources") | ||
# | ||
# copy_field("almaMmsId","describedBy.resultOf.object.id") | ||
# prepend("describedBy.resultOf.object.id","https://lobid.org/marcxml/") | ||
# | ||
# # 008/00-05 has the initial cataloguing date. We test strictly if 008 only has 6 digits, sometimes records have 8 digits that are not valid. | ||
# # We use MNG info as fallback. | ||
# # MNG is a ALMA-specific element (MNG .b only states the indexing date into ALMA.) | ||
# | ||
# if any_match("008", "^\\d{6}\\D.*") # 008/00-05 is the correct form for the cataloguing date in MARC. | ||
# copy_field("008","@initialCataloguingDate") | ||
# substring("@initialCataloguingDate","0","6") | ||
# end | ||
# | ||
# if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000 | ||
# prepend("@initialCataloguingDate","20") | ||
# elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000 | ||
# prepend("@initialCataloguingDate","19") | ||
# else | ||
# copy_field("MNG .b","@initialCataloguingDate") | ||
# end | ||
# copy_field("@initialCataloguingDate","describedBy.resultOf.object.dateCreated") | ||
# | ||
# copy_field("MNG .d","describedBy.resultOf.object.dateModified") | ||
# replace_all("describedBy.resultOf.object.dateCreated","-","") | ||
# replace_all("describedBy.resultOf.object.dateCreated"," .*","") | ||
# replace_all("describedBy.resultOf.object.dateCreated","c|©|\\s?|,|.|:|;|/|=","") | ||
# replace_all("describedBy.resultOf.object.dateModified","-","") | ||
# replace_all("describedBy.resultOf.object.dateModified"," .*","") | ||
# replace_all("describedBy.resultOf.object.dateModified","c|©|\\s?|,|.|:|;|/|=","") | ||
# #unless any_match("describedBy.resultOf.object.dateCreated","\\d{8}|\\d{4}") | ||
# # remove_field("describedBy.resultOf.object.dateCreated") | ||
# #end | ||
# #unless any_match("describedBy.resultOf.object.dateModified","\\d{8}|\\d{4}") | ||
# # remove_field("describedBy.resultOf.object.dateModified") | ||
# #end | ||
# replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3") | ||
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3") | ||
# replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01") | ||
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01") | ||
# | ||
# add_array("describedBy.resultOf.object.type[]", "DataFeedItem") | ||
# | ||
# copy_field("almaMmsId","describedBy.resultOf.object.label") | ||
# prepend("describedBy.resultOf.object.label","hbz-Ressource ") | ||
# append("describedBy.resultOf.object.label"," im Exportformat MARC21 XML") | ||
# | ||
# add_field("describedBy.resultOf.object.inDataset.id", "http://sru.hebis.de/sru/DB=2.1?version=1.1") | ||
# | ||
# add_field("describedBy.resultOf.object.inDataset.label", "Hebis SRU") | ||
# | ||
# add_array("describedBy.license[]") | ||
# add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" ) | ||
# add_field("describedBy.license[].$last.label","Creative Commons-Lizenz CC0 1.0 Universal" ) | ||
# | ||
# | ||
# # TODO: It seems that there are a lot of organisations that are not in lobid, we should filter them out. | ||
# | ||
# # 040 - Cataloging Source (NR) - Subfield: $a (NR), $c (NR), $d (R) | ||
# # ALMA has a lot of invalid repeated subfields $a | ||
# | ||
# do list(path: "040 ", "var":"$i") | ||
# | ||
# do list(path:"$i.a","var":"$j") | ||
# unless exists("describedBy.resultOf.object.sourceOrganization.id") | ||
# copy_field("$j", "describedBy.resultOf.object.sourceOrganization.id") | ||
# end | ||
# end | ||
# do list(path:"$i.c","var":"$j") | ||
# unless exists("describedBy.resultOf.object.provider.id") | ||
# copy_field("$j", "describedBy.resultOf.object.provider.id") | ||
# end | ||
# end | ||
# | ||
# add_array("describedBy.resultOf.object.modifiedBy[]") | ||
# do list(path:"$i.d", "var":"$j") | ||
# copy_field("$j", "describedBy.resultOf.object.modifiedBy[].$append.id") | ||
# end | ||
# | ||
# end | ||
# | ||
# call_macro("provenanceLinks",field: "describedBy.resultOf.object.sourceOrganization.id",label: "describedBy.resultOf.object.sourceOrganization.label") | ||
# call_macro("provenanceLinks",field: "describedBy.resultOf.object.provider.id",label: "describedBy.resultOf.object.provider.label") | ||
# do list(path:"describedBy.resultOf.object.modifiedBy[]","var":"$i") | ||
# call_macro("provenanceLinks",field: "$i.id",label:"$i.label") | ||
# end | ||
# | ||
# uniq("describedBy.resultOf.object.modifiedBy[]") | ||
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.