Skip to content

Commit

Permalink
Outcomment describedBy transformation RPB-225
Browse files Browse the repository at this point in the history
This needs to be adjusted with regard to the strapi indexing.
  • Loading branch information
TobiasNx committed Feb 26, 2025
1 parent 07fe5d8 commit deedcd5
Show file tree
Hide file tree
Showing 15 changed files with 118 additions and 732 deletions.
234 changes: 118 additions & 116 deletions conf/hebisMarc2lobid-transformation/fix/describedBy.fix
Original file line number Diff line number Diff line change
@@ -1,116 +1,118 @@

copy_field("almaMmsId", "describedBy.id")
prepend("describedBy.id", "http://lobid.org/resources/")

copy_field("almaMmsId", "describedBy.label")
prepend("describedBy.label", "Webseite der hbz-Ressource ")

add_array("describedBy.type[]", "BibliographicDescription")


add_field("describedBy.inDataset.id","http://lobid.org/resources/dataset#!")

add_field("describedBy.inDataset.label","lobid-resources – Der hbz-Verbundkatalog als Linked Open Data")

add_array("describedBy.resultOf.type[]", "CreateAction")

add_field("@createTime","$[createEndTime]")
if all_match("@createTime","0")
add_field("describedBy.resultOf.endTime","0000-00-00T00:00:00")
else
timestamp("describedBy.resultOf.endTime",format:"yyyy-MM-dd'T'HH:mm:ss", timezone:"Europe/Berlin")
end


add_field("describedBy.resultOf.instrument.id","https://github.com/hbz/lobid-resources")

add_array("describedBy.resultOf.instrument.type[]", "SoftwareApplication")

add_field("describedBy.resultOf.instrument.label","Software lobid-resources")

copy_field("almaMmsId","describedBy.resultOf.object.id")
prepend("describedBy.resultOf.object.id","https://lobid.org/marcxml/")

# 008/00-05 has the initial cataloguing date. We test strictly if 008 only has 6 digits, sometimes records have 8 digits that are not valid.
# We use MNG info as fallback.
# MNG is a ALMA-specific element (MNG .b only states the indexing date into ALMA.)

if any_match("008", "^\\d{6}\\D.*") # 008/00-05 is the correct form for the cataloguing date in MARC.
copy_field("008","@initialCataloguingDate")
substring("@initialCataloguingDate","0","6")
end

if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000
prepend("@initialCataloguingDate","20")
elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000
prepend("@initialCataloguingDate","19")
else
copy_field("MNG .b","@initialCataloguingDate")
end
copy_field("@initialCataloguingDate","describedBy.resultOf.object.dateCreated")

copy_field("MNG .d","describedBy.resultOf.object.dateModified")
replace_all("describedBy.resultOf.object.dateCreated","-","")
replace_all("describedBy.resultOf.object.dateCreated"," .*","")
replace_all("describedBy.resultOf.object.dateCreated","c|©|\\s?|,|.|:|;|/|=","")
replace_all("describedBy.resultOf.object.dateModified","-","")
replace_all("describedBy.resultOf.object.dateModified"," .*","")
replace_all("describedBy.resultOf.object.dateModified","c|©|\\s?|,|.|:|;|/|=","")
#unless any_match("describedBy.resultOf.object.dateCreated","\\d{8}|\\d{4}")
# remove_field("describedBy.resultOf.object.dateCreated")
#end
#unless any_match("describedBy.resultOf.object.dateModified","\\d{8}|\\d{4}")
# remove_field("describedBy.resultOf.object.dateModified")
#end
replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01")
replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01")

add_array("describedBy.resultOf.object.type[]", "DataFeedItem")

copy_field("almaMmsId","describedBy.resultOf.object.label")
prepend("describedBy.resultOf.object.label","hbz-Ressource ")
append("describedBy.resultOf.object.label"," im Exportformat MARC21 XML")

add_field("describedBy.resultOf.object.inDataset.id", "http://sru.hebis.de/sru/DB=2.1?version=1.1")

add_field("describedBy.resultOf.object.inDataset.label", "hbz_unioncatalog")

add_array("describedBy.license[]")
add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" )
add_field("describedBy.license[].$last.label","Creative Commons-Lizenz CC0 1.0 Universal" )


# TODO: It seems that there are a lot of organisations that are not in lobid, we should filter them out.

# 040 - Cataloging Source (NR) - Subfield: $a (NR), $c (NR), $d (R)
# ALMA has a lot of invalid repeated subfields $a

do list(path: "040 ", "var":"$i")

do list(path:"$i.a","var":"$j")
unless exists("describedBy.resultOf.object.sourceOrganization.id")
copy_field("$j", "describedBy.resultOf.object.sourceOrganization.id")
end
end
do list(path:"$i.c","var":"$j")
unless exists("describedBy.resultOf.object.provider.id")
copy_field("$j", "describedBy.resultOf.object.provider.id")
end
end

add_array("describedBy.resultOf.object.modifiedBy[]")
do list(path:"$i.d", "var":"$j")
copy_field("$j", "describedBy.resultOf.object.modifiedBy[].$append.id")
end

end

call_macro("provenanceLinks",field: "describedBy.resultOf.object.sourceOrganization.id",label: "describedBy.resultOf.object.sourceOrganization.label")
call_macro("provenanceLinks",field: "describedBy.resultOf.object.provider.id",label: "describedBy.resultOf.object.provider.label")
do list(path:"describedBy.resultOf.object.modifiedBy[]","var":"$i")
call_macro("provenanceLinks",field: "$i.id",label:"$i.label")
end

uniq("describedBy.resultOf.object.modifiedBy[]")
nothing() # currently no transformation for describedBy is needed.

# copy_field("almaMmsId", "describedBy.id")
# prepend("describedBy.id", "http://lobid.org/resources/")
#
# copy_field("almaMmsId", "describedBy.label")
# prepend("describedBy.label", "Webseite der hbz-Ressource ")
#
# add_array("describedBy.type[]", "BibliographicDescription")
#
#
# add_field("describedBy.inDataset.id","http://lobid.org/resources/dataset#!")
#
# add_field("describedBy.inDataset.label","lobid-resources – Der hbz-Verbundkatalog als Linked Open Data")
#
# add_array("describedBy.resultOf.type[]", "CreateAction")
#
# add_field("@createTime","$[createEndTime]")
# if all_match("@createTime","0")
# add_field("describedBy.resultOf.endTime","0000-00-00T00:00:00")
# else
# timestamp("describedBy.resultOf.endTime",format:"yyyy-MM-dd'T'HH:mm:ss", timezone:"Europe/Berlin")
# end
#
#
# add_field("describedBy.resultOf.instrument.id","https://github.com/hbz/lobid-resources")
#
# add_array("describedBy.resultOf.instrument.type[]", "SoftwareApplication")
#
# add_field("describedBy.resultOf.instrument.label","Software lobid-resources")
#
# copy_field("almaMmsId","describedBy.resultOf.object.id")
# prepend("describedBy.resultOf.object.id","https://lobid.org/marcxml/")
#
# # 008/00-05 has the initial cataloguing date. We test strictly if 008 only has 6 digits, sometimes records have 8 digits that are not valid.
# # We use MNG info as fallback.
# # MNG is a ALMA-specific element (MNG .b only states the indexing date into ALMA.)
#
# if any_match("008", "^\\d{6}\\D.*") # 008/00-05 is the correct form for the cataloguing date in MARC.
# copy_field("008","@initialCataloguingDate")
# substring("@initialCataloguingDate","0","6")
# end
#
# if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000
# prepend("@initialCataloguingDate","20")
# elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000
# prepend("@initialCataloguingDate","19")
# else
# copy_field("MNG .b","@initialCataloguingDate")
# end
# copy_field("@initialCataloguingDate","describedBy.resultOf.object.dateCreated")
#
# copy_field("MNG .d","describedBy.resultOf.object.dateModified")
# replace_all("describedBy.resultOf.object.dateCreated","-","")
# replace_all("describedBy.resultOf.object.dateCreated"," .*","")
# replace_all("describedBy.resultOf.object.dateCreated","c|©|\\s?|,|.|:|;|/|=","")
# replace_all("describedBy.resultOf.object.dateModified","-","")
# replace_all("describedBy.resultOf.object.dateModified"," .*","")
# replace_all("describedBy.resultOf.object.dateModified","c|©|\\s?|,|.|:|;|/|=","")
# #unless any_match("describedBy.resultOf.object.dateCreated","\\d{8}|\\d{4}")
# # remove_field("describedBy.resultOf.object.dateCreated")
# #end
# #unless any_match("describedBy.resultOf.object.dateModified","\\d{8}|\\d{4}")
# # remove_field("describedBy.resultOf.object.dateModified")
# #end
# replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
# replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01")
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01")
#
# add_array("describedBy.resultOf.object.type[]", "DataFeedItem")
#
# copy_field("almaMmsId","describedBy.resultOf.object.label")
# prepend("describedBy.resultOf.object.label","hbz-Ressource ")
# append("describedBy.resultOf.object.label"," im Exportformat MARC21 XML")
#
# add_field("describedBy.resultOf.object.inDataset.id", "http://sru.hebis.de/sru/DB=2.1?version=1.1")
#
# add_field("describedBy.resultOf.object.inDataset.label", "Hebis SRU")
#
# add_array("describedBy.license[]")
# add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" )
# add_field("describedBy.license[].$last.label","Creative Commons-Lizenz CC0 1.0 Universal" )
#
#
# # TODO: It seems that there are a lot of organisations that are not in lobid, we should filter them out.
#
# # 040 - Cataloging Source (NR) - Subfield: $a (NR), $c (NR), $d (R)
# # ALMA has a lot of invalid repeated subfields $a
#
# do list(path: "040 ", "var":"$i")
#
# do list(path:"$i.a","var":"$j")
# unless exists("describedBy.resultOf.object.sourceOrganization.id")
# copy_field("$j", "describedBy.resultOf.object.sourceOrganization.id")
# end
# end
# do list(path:"$i.c","var":"$j")
# unless exists("describedBy.resultOf.object.provider.id")
# copy_field("$j", "describedBy.resultOf.object.provider.id")
# end
# end
#
# add_array("describedBy.resultOf.object.modifiedBy[]")
# do list(path:"$i.d", "var":"$j")
# copy_field("$j", "describedBy.resultOf.object.modifiedBy[].$append.id")
# end
#
# end
#
# call_macro("provenanceLinks",field: "describedBy.resultOf.object.sourceOrganization.id",label: "describedBy.resultOf.object.sourceOrganization.label")
# call_macro("provenanceLinks",field: "describedBy.resultOf.object.provider.id",label: "describedBy.resultOf.object.provider.label")
# do list(path:"describedBy.resultOf.object.modifiedBy[]","var":"$i")
# call_macro("provenanceLinks",field: "$i.id",label:"$i.label")
# end
#
# uniq("describedBy.resultOf.object.modifiedBy[]")
#
44 changes: 0 additions & 44 deletions conf/output/test-hebis-to-lobid-output-0.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,50 +15,6 @@
"location" : [ "Ingelheim am Rhein" ],
"publishedBy" : [ "Stadtverwaltung Ingelheim am Rhein" ]
} ],
"describedBy" : {
"id" : "http://lobid.org/resources/514226781",
"label" : "Webseite der hbz-Ressource 514226781",
"type" : [ "BibliographicDescription" ],
"inDataset" : {
"id" : "http://lobid.org/resources/dataset#!",
"label" : "lobid-resources – Der hbz-Verbundkatalog als Linked Open Data"
},
"resultOf" : {
"type" : [ "CreateAction" ],
"endTime" : "0000-00-00T00:00:00",
"instrument" : {
"id" : "https://github.com/hbz/lobid-resources",
"type" : [ "SoftwareApplication" ],
"label" : "Software lobid-resources"
},
"object" : {
"id" : "https://lobid.org/marcxml/514226781",
"dateCreated" : "2023-12-21",
"type" : [ "DataFeedItem" ],
"label" : "hbz-Ressource 514226781 im Exportformat MARC21 XML",
"inDataset" : {
"id" : "http://sru.hebis.de/sru/DB=2.1?version=1.1",
"label" : "hbz_unioncatalog"
},
"sourceOrganization" : {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
},
"provider" : {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
},
"modifiedBy" : [ {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
} ]
}
},
"license" : [ {
"id" : "http://creativecommons.org/publicdomain/zero/1.0",
"label" : "Creative Commons-Lizenz CC0 1.0 Universal"
} ]
},
"sameAs" : [ {
"id" : "http://worldcat.org/oclc/1415743560",
"label" : "OCLC Ressource"
Expand Down
44 changes: 0 additions & 44 deletions conf/output/test-hebis-to-lobid-output-1.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,50 +16,6 @@
"location" : [ "Mainz" ],
"publishedBy" : [ "Evangelisches Dekanat Mainz" ]
} ],
"describedBy" : {
"id" : "http://lobid.org/resources/512839662",
"label" : "Webseite der hbz-Ressource 512839662",
"type" : [ "BibliographicDescription" ],
"inDataset" : {
"id" : "http://lobid.org/resources/dataset#!",
"label" : "lobid-resources – Der hbz-Verbundkatalog als Linked Open Data"
},
"resultOf" : {
"type" : [ "CreateAction" ],
"endTime" : "0000-00-00T00:00:00",
"instrument" : {
"id" : "https://github.com/hbz/lobid-resources",
"type" : [ "SoftwareApplication" ],
"label" : "Software lobid-resources"
},
"object" : {
"id" : "https://lobid.org/marcxml/512839662",
"dateCreated" : "2023-11-01",
"type" : [ "DataFeedItem" ],
"label" : "hbz-Ressource 512839662 im Exportformat MARC21 XML",
"inDataset" : {
"id" : "http://sru.hebis.de/sru/DB=2.1?version=1.1",
"label" : "hbz_unioncatalog"
},
"sourceOrganization" : {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
},
"provider" : {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
},
"modifiedBy" : [ {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
} ]
}
},
"license" : [ {
"id" : "http://creativecommons.org/publicdomain/zero/1.0",
"label" : "Creative Commons-Lizenz CC0 1.0 Universal"
} ]
},
"sameAs" : [ {
"id" : "http://worldcat.org/oclc/1407066536",
"label" : "OCLC Ressource"
Expand Down
44 changes: 0 additions & 44 deletions conf/output/test-hebis-to-lobid-output-10.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,6 @@
"location" : [ "Mainz" ],
"publishedBy" : [ "Schmidt" ]
} ],
"describedBy" : {
"id" : "http://lobid.org/resources/090358163",
"label" : "Webseite der hbz-Ressource 090358163",
"type" : [ "BibliographicDescription" ],
"inDataset" : {
"id" : "http://lobid.org/resources/dataset#!",
"label" : "lobid-resources – Der hbz-Verbundkatalog als Linked Open Data"
},
"resultOf" : {
"type" : [ "CreateAction" ],
"endTime" : "0000-00-00T00:00:00",
"instrument" : {
"id" : "https://github.com/hbz/lobid-resources",
"type" : [ "SoftwareApplication" ],
"label" : "Software lobid-resources"
},
"object" : {
"id" : "https://lobid.org/marcxml/090358163",
"dateCreated" : "2000-03-14",
"type" : [ "DataFeedItem" ],
"label" : "hbz-Ressource 090358163 im Exportformat MARC21 XML",
"inDataset" : {
"id" : "http://sru.hebis.de/sru/DB=2.1?version=1.1",
"label" : "hbz_unioncatalog"
},
"sourceOrganization" : {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
},
"provider" : {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
},
"modifiedBy" : [ {
"id" : "http://lobid.org/organisations/DE-603#!",
"label" : "Hessisches BibliotheksInformationsSystem hebis"
} ]
}
},
"license" : [ {
"id" : "http://creativecommons.org/publicdomain/zero/1.0",
"label" : "Creative Commons-Lizenz CC0 1.0 Universal"
} ]
},
"sameAs" : [ {
"id" : "http://worldcat.org/oclc/76115483",
"label" : "OCLC Ressource"
Expand Down
Loading

0 comments on commit deedcd5

Please sign in to comment.