Skip to content

Commit

Permalink
Update transformation to current lobid fix RPB-225
Browse files Browse the repository at this point in the history
  • Loading branch information
TobiasNx committed Feb 28, 2025
1 parent deedcd5 commit bb13c82
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 20 deletions.
11 changes: 6 additions & 5 deletions conf/hebisMarc2lobid-transformation/fix/describedBy.fix
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ nothing() # currently no transformation for describedBy is needed.
# substring("@initialCataloguingDate","0","6")
# end
#
# if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000
# if any_match("@initialCataloguingDate","^[0-4]\\d(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])") # Assume dates from 2000-01-01 to 2049-12-31 ( e.g. matching 491231)
# prepend("@initialCataloguingDate","20")
# elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000
# elsif any_match("@initialCataloguingDate","\\d{2}(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])") # Assume dates from 1900-01-01 to 1999-12-31 ( e.g. matching 991231)
# prepend("@initialCataloguingDate","19")
# else
# copy_field("MNG .b","@initialCataloguingDate")
Expand All @@ -67,16 +67,18 @@ nothing() # currently no transformation for describedBy is needed.
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3")
# replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01")
# replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01")
# call_macro("leapYearChecker",date:"describedBy.resultOf.object.dateCreated")
# call_macro("leapYearChecker",date:"describedBy.resultOf.object.dateModified")
#
# add_array("describedBy.resultOf.object.type[]", "DataFeedItem")
#
# copy_field("almaMmsId","describedBy.resultOf.object.label")
# prepend("describedBy.resultOf.object.label","hbz-Ressource ")
# append("describedBy.resultOf.object.label"," im Exportformat MARC21 XML")
#
# add_field("describedBy.resultOf.object.inDataset.id", "http://sru.hebis.de/sru/DB=2.1?version=1.1")
# add_field("describedBy.resultOf.object.inDataset.id", "https://datahub.io/dataset/hbz_unioncatalog")
#
# add_field("describedBy.resultOf.object.inDataset.label", "Hebis SRU")
# add_field("describedBy.resultOf.object.inDataset.label", "hbz_unioncatalog")
#
# add_array("describedBy.license[]")
# add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" )
Expand Down Expand Up @@ -115,4 +117,3 @@ nothing() # currently no transformation for describedBy is needed.
# end
#
# uniq("describedBy.resultOf.object.modifiedBy[]")
#
19 changes: 19 additions & 0 deletions conf/hebisMarc2lobid-transformation/fix/macros.fix
Original file line number Diff line number Diff line change
Expand Up @@ -585,3 +585,22 @@ do put_macro("lobidResourcesFallbackLabel")
end
end

# validate leap years
do put_macro("leapYearChecker")
if any_match("$[date]","....-02-29")
unless any_match("$[date]","(((18|19|20)(04|08|[2468][048]|[13579][26]))|2000)-02-29")
replace_all("$[date]","(....-02)-29","$1-28")
end
end
end


# DE Sol1 Holding Tester excludes ZDB Records

do put_macro("deSol1BridgeTester")
if any_match("$[holdingId]",".*7830$")
unless exists("zdbId")
add_field("$i.deSol1Bridge","true")
end
end
end
7 changes: 6 additions & 1 deletion conf/hebisMarc2lobid-transformation/fix/otherFields.fix
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ replace_all("extent", " ", " ")
# 500 - General Note (R) Subfield: $a (NR)
add_array("note[]")
do list(path:"500 ", "var": "$i")
copy_field("$i.a", "note[].$append")
unless any_contain("$i.a","In:")
copy_field("$i.a", "note[].$append")
end
end
uniq("note[]")

Expand All @@ -130,6 +132,9 @@ do list(path:"520[ 23] ", "var": "$i")
copy_field("$i.[ab]", "abstract[].$append")
end

replace_all("abstract[].*","^<!\\[CDATA\\[(.*)\\]\\]>$","$1")
replace_all("abstract[].*","<[\\/]?.{1,2}>","")

# 502 - Dissertation Note (R) Subfield: $a (R)
add_array("thesisInformation[]")
do list(path:"502 ", "var": "$i")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ unless any_match("leader", "^.{7}[ad].*")
do list(path: "773??", "var": "$i")
unless any_equal("$i.9","LOCAL")
do list(path: "$i.w", "var": "$j")
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
add_hash("isPartOf[].$append")
add_array("isPartOf[].$last.hasSuperordinate[]")
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
if all_match("$j", "^\\((?:DE-600|DE-605)\\)(.*)$")
Expand All @@ -108,7 +108,7 @@ end
# 490 with 1. Indicator 1 has an identical entry in 830. So only 490 with 1. Indicator 0

do list(path: "4900?", "var": "$i")
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
add_hash("isPartOf[].$append")
add_array("isPartOf[].$last.hasSuperordinate[]")
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
add_array("isPartOf[].$last.hasSuperordinate[].$last.label")
Expand All @@ -130,18 +130,19 @@ end
# Element can be repeatable with local entries they have subfield $M.

do list(path: "830??", "var": "$i")
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
add_hash("isPartOf[].$append")
add_array("isPartOf[].$last.hasSuperordinate[]")
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
if all_match("$i.w", "^\\((?:DE-600|DE-605)\\)(.*)$")
copy_field("$i.w", "isPartOf[].$last.hasSuperordinate[].$last.id")
end
add_array("isPartOf[].$last.hasSuperordinate[].$last.label")
do list(path:"$i.a", "var":"$j")
copy_field("$j", "isPartOf[].$last.hasSuperordinate[].$last.label.$append")
end
join_field("isPartOf[].$last.hasSuperordinate[].$last.label", " / ")
copy_field("$i.v", "isPartOf[].$last.numbering")
unless is_empty("isPartOf[].$last.hasSuperordinate[].1")
copy_field("$i.v", "isPartOf[].$last.numbering")
end
end

do list(path: "4901?", "var": "$j")
Expand All @@ -164,7 +165,7 @@ if any_match("leader", "^.{7}[ad].*")
do list(path: "773??", "var": "$i")
unless any_equal("$i.9","LOCAL")
do list(path: "$i.w", "var": "$j")
add_array("isPartOf[].$append.type[]", "IsPartOfRelation")
add_hash("isPartOf[].$append")
add_array("isPartOf[].$last.hasSuperordinate[]")
add_hash( "isPartOf[].$last.hasSuperordinate[].$append")
if all_match("$j", "^\\((?:DE-600|DE-605)\\)(.*)$")
Expand Down Expand Up @@ -192,21 +193,26 @@ if any_match("leader", "^.{7}[ad].*")
end
end

do list(path: "isPartOf[].*.hasSuperordinate[]", "var": "$i")
unless exists("$i.label")
copy_field("@title", "$i.label")
do list(path: "isPartOf[]","var":"$i")
unless is_empty("$i.hasSuperordinate[].1")
do list(path:"$i.hasSuperordinate[]", "var": "$j") ## This is the fallback for isPartOf[].*.hasSuperordinate[].*.label
unless exists("$j.label")
copy_field("@title", "$j.label")
end
end
add_array("$i.type[]", "IsPartOfRelation")
end
end

replace_all("isPartOf[].*.hasSuperordinate[].*.id", "^\\(DE-605\\)(.*)$", "http://lobid.org/resources/$1#!")
replace_all("isPartOf[].*.hasSuperordinate[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/resources/ZDB-$1#!")

replace_all("isPartOf[].*.numbering", "^[©]|\\s?[,.:;/=]?$", "")
do list(path:"isPartOf[]","var":"$i")
call_macro("lobidResourcesFallbackLabel",field:"$i.hasSuperordinate[]")
end


uniq("isPartOf[]")


replace_all("containedIn[].*.id", "^\\(DE-605\\)(.*)$", "http://lobid.org/resources/$1#!")
replace_all("containedIn[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/resources/ZDB-$1#!")
replace_all("containedIn[].*.label","<<|>>","")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ do list(path: "publication[]", "var": "$i")
replace_all("$i.location[].*", "^\\[(.*)\\]$", "$1")
replace_all("$i.location[].*", "\\s?[,:;]$", "")
replace_all("$i.publishedBy[].*", "^[©]|\\s?[,:;/=]?$", "")
call_macro("leapYearChecker",date:"$i.startDate")
call_macro("leapYearChecker",date:"$i.endDate")
uniq("$i.location[]")
end

Expand Down
4 changes: 2 additions & 2 deletions conf/output/test-hebis-to-lobid-output-9.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
"label" : "OCLC Ressource"
} ],
"isPartOf" : [ {
"type" : [ "IsPartOfRelation" ],
"hasSuperordinate" : [ {
"label" : "Beiträge zur Geschichte des Gau-Algesheimer Raumes"
} ],
"numbering" : "42"
"numbering" : "42",
"type" : [ "IsPartOfRelation" ]
} ],
"language" : [ {
"id" : "http://id.loc.gov/vocabulary/iso639-2/ger",
Expand Down

0 comments on commit bb13c82

Please sign in to comment.