From bb13c82f67b60452e88808b5dbfb75cc2019a76c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Fri, 28 Feb 2025 14:04:06 +0100 Subject: [PATCH] Update transformation to current lobid fix RPB-225 --- .../fix/describedBy.fix | 11 +++---- .../fix/macros.fix | 19 ++++++++++++ .../fix/otherFields.fix | 7 ++++- .../fix/relatedRessourcesAndLinks.fix | 30 +++++++++++-------- .../fix/titleRelatedFields.fix | 2 ++ conf/output/test-hebis-to-lobid-output-9.json | 4 +-- 6 files changed, 53 insertions(+), 20 deletions(-) diff --git a/conf/hebisMarc2lobid-transformation/fix/describedBy.fix b/conf/hebisMarc2lobid-transformation/fix/describedBy.fix index 2d7a64f..734d7d9 100644 --- a/conf/hebisMarc2lobid-transformation/fix/describedBy.fix +++ b/conf/hebisMarc2lobid-transformation/fix/describedBy.fix @@ -41,9 +41,9 @@ nothing() # currently no transformation for describedBy is needed. # substring("@initialCataloguingDate","0","6") # end # -# if any_match("@initialCataloguingDate","^[0-4]\\d*") # Complete dates after 2000 +# if any_match("@initialCataloguingDate","^[0-4]\\d(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])") # Assume dates from 2000-01-01 to 2049-12-31 ( e.g. matching 491231) # prepend("@initialCataloguingDate","20") -# elsif any_match("@initialCataloguingDate","\\d*") # Complete dates before 2000 +# elsif any_match("@initialCataloguingDate","\\d{2}(0[1-9]|1[012])(0[1-9]|[12][0-9]|3[01])") # Assume dates from 1900-01-01 to 1999-12-31 ( e.g. matching 991231) # prepend("@initialCataloguingDate","19") # else # copy_field("MNG .b","@initialCataloguingDate") @@ -67,6 +67,8 @@ nothing() # currently no transformation for describedBy is needed. # replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})(\\d{2})(\\d{2})$","$1-$2-$3") # replace_all("describedBy.resultOf.object.dateCreated","^(\\d{4})$","$1-01-01") # replace_all("describedBy.resultOf.object.dateModified","^(\\d{4})$","$1-01-01") +# call_macro("leapYearChecker",date:"describedBy.resultOf.object.dateCreated") +# call_macro("leapYearChecker",date:"describedBy.resultOf.object.dateModified") # # add_array("describedBy.resultOf.object.type[]", "DataFeedItem") # @@ -74,9 +76,9 @@ nothing() # currently no transformation for describedBy is needed. # prepend("describedBy.resultOf.object.label","hbz-Ressource ") # append("describedBy.resultOf.object.label"," im Exportformat MARC21 XML") # -# add_field("describedBy.resultOf.object.inDataset.id", "http://sru.hebis.de/sru/DB=2.1?version=1.1") +# add_field("describedBy.resultOf.object.inDataset.id", "https://datahub.io/dataset/hbz_unioncatalog") # -# add_field("describedBy.resultOf.object.inDataset.label", "Hebis SRU") +# add_field("describedBy.resultOf.object.inDataset.label", "hbz_unioncatalog") # # add_array("describedBy.license[]") # add_field("describedBy.license[].$append.id","http://creativecommons.org/publicdomain/zero/1.0" ) @@ -115,4 +117,3 @@ nothing() # currently no transformation for describedBy is needed. # end # # uniq("describedBy.resultOf.object.modifiedBy[]") -# diff --git a/conf/hebisMarc2lobid-transformation/fix/macros.fix b/conf/hebisMarc2lobid-transformation/fix/macros.fix index 7a57a62..0d489cb 100644 --- a/conf/hebisMarc2lobid-transformation/fix/macros.fix +++ b/conf/hebisMarc2lobid-transformation/fix/macros.fix @@ -585,3 +585,22 @@ do put_macro("lobidResourcesFallbackLabel") end end +# validate leap years +do put_macro("leapYearChecker") + if any_match("$[date]","....-02-29") + unless any_match("$[date]","(((18|19|20)(04|08|[2468][048]|[13579][26]))|2000)-02-29") + replace_all("$[date]","(....-02)-29","$1-28") + end + end +end + + +# DE Sol1 Holding Tester excludes ZDB Records + +do put_macro("deSol1BridgeTester") + if any_match("$[holdingId]",".*7830$") + unless exists("zdbId") + add_field("$i.deSol1Bridge","true") + end + end +end diff --git a/conf/hebisMarc2lobid-transformation/fix/otherFields.fix b/conf/hebisMarc2lobid-transformation/fix/otherFields.fix index de90ad9..eb3e7d8 100644 --- a/conf/hebisMarc2lobid-transformation/fix/otherFields.fix +++ b/conf/hebisMarc2lobid-transformation/fix/otherFields.fix @@ -112,7 +112,9 @@ replace_all("extent", " ", " ") # 500 - General Note (R) Subfield: $a (NR) add_array("note[]") do list(path:"500 ", "var": "$i") - copy_field("$i.a", "note[].$append") + unless any_contain("$i.a","In:") + copy_field("$i.a", "note[].$append") + end end uniq("note[]") @@ -130,6 +132,9 @@ do list(path:"520[ 23] ", "var": "$i") copy_field("$i.[ab]", "abstract[].$append") end +replace_all("abstract[].*","^$","$1") +replace_all("abstract[].*","<[\\/]?.{1,2}>","") + # 502 - Dissertation Note (R) Subfield: $a (R) add_array("thesisInformation[]") do list(path:"502 ", "var": "$i") diff --git a/conf/hebisMarc2lobid-transformation/fix/relatedRessourcesAndLinks.fix b/conf/hebisMarc2lobid-transformation/fix/relatedRessourcesAndLinks.fix index 1ee1625..ee819fd 100644 --- a/conf/hebisMarc2lobid-transformation/fix/relatedRessourcesAndLinks.fix +++ b/conf/hebisMarc2lobid-transformation/fix/relatedRessourcesAndLinks.fix @@ -86,7 +86,7 @@ unless any_match("leader", "^.{7}[ad].*") do list(path: "773??", "var": "$i") unless any_equal("$i.9","LOCAL") do list(path: "$i.w", "var": "$j") - add_array("isPartOf[].$append.type[]", "IsPartOfRelation") + add_hash("isPartOf[].$append") add_array("isPartOf[].$last.hasSuperordinate[]") add_hash( "isPartOf[].$last.hasSuperordinate[].$append") if all_match("$j", "^\\((?:DE-600|DE-605)\\)(.*)$") @@ -108,7 +108,7 @@ end # 490 with 1. Indicator 1 has an identical entry in 830. So only 490 with 1. Indicator 0 do list(path: "4900?", "var": "$i") - add_array("isPartOf[].$append.type[]", "IsPartOfRelation") + add_hash("isPartOf[].$append") add_array("isPartOf[].$last.hasSuperordinate[]") add_hash( "isPartOf[].$last.hasSuperordinate[].$append") add_array("isPartOf[].$last.hasSuperordinate[].$last.label") @@ -130,18 +130,19 @@ end # Element can be repeatable with local entries they have subfield $M. do list(path: "830??", "var": "$i") - add_array("isPartOf[].$append.type[]", "IsPartOfRelation") + add_hash("isPartOf[].$append") add_array("isPartOf[].$last.hasSuperordinate[]") add_hash( "isPartOf[].$last.hasSuperordinate[].$append") if all_match("$i.w", "^\\((?:DE-600|DE-605)\\)(.*)$") copy_field("$i.w", "isPartOf[].$last.hasSuperordinate[].$last.id") end - add_array("isPartOf[].$last.hasSuperordinate[].$last.label") do list(path:"$i.a", "var":"$j") copy_field("$j", "isPartOf[].$last.hasSuperordinate[].$last.label.$append") end join_field("isPartOf[].$last.hasSuperordinate[].$last.label", " / ") - copy_field("$i.v", "isPartOf[].$last.numbering") + unless is_empty("isPartOf[].$last.hasSuperordinate[].1") + copy_field("$i.v", "isPartOf[].$last.numbering") + end end do list(path: "4901?", "var": "$j") @@ -164,7 +165,7 @@ if any_match("leader", "^.{7}[ad].*") do list(path: "773??", "var": "$i") unless any_equal("$i.9","LOCAL") do list(path: "$i.w", "var": "$j") - add_array("isPartOf[].$append.type[]", "IsPartOfRelation") + add_hash("isPartOf[].$append") add_array("isPartOf[].$last.hasSuperordinate[]") add_hash( "isPartOf[].$last.hasSuperordinate[].$append") if all_match("$j", "^\\((?:DE-600|DE-605)\\)(.*)$") @@ -192,9 +193,14 @@ if any_match("leader", "^.{7}[ad].*") end end -do list(path: "isPartOf[].*.hasSuperordinate[]", "var": "$i") - unless exists("$i.label") - copy_field("@title", "$i.label") +do list(path: "isPartOf[]","var":"$i") + unless is_empty("$i.hasSuperordinate[].1") + do list(path:"$i.hasSuperordinate[]", "var": "$j") ## This is the fallback for isPartOf[].*.hasSuperordinate[].*.label + unless exists("$j.label") + copy_field("@title", "$j.label") + end + end + add_array("$i.type[]", "IsPartOfRelation") end end @@ -202,11 +208,11 @@ replace_all("isPartOf[].*.hasSuperordinate[].*.id", "^\\(DE-605\\)(.*)$", "http: replace_all("isPartOf[].*.hasSuperordinate[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/resources/ZDB-$1#!") replace_all("isPartOf[].*.numbering", "^[©]|\\s?[,.:;/=]?$", "") -do list(path:"isPartOf[]","var":"$i") - call_macro("lobidResourcesFallbackLabel",field:"$i.hasSuperordinate[]") -end + uniq("isPartOf[]") + + replace_all("containedIn[].*.id", "^\\(DE-605\\)(.*)$", "http://lobid.org/resources/$1#!") replace_all("containedIn[].*.id", "^\\(DE-600\\)(.*)$", "http://lobid.org/resources/ZDB-$1#!") replace_all("containedIn[].*.label","<<|>>","") diff --git a/conf/hebisMarc2lobid-transformation/fix/titleRelatedFields.fix b/conf/hebisMarc2lobid-transformation/fix/titleRelatedFields.fix index 8fa1c5f..8fe1858 100644 --- a/conf/hebisMarc2lobid-transformation/fix/titleRelatedFields.fix +++ b/conf/hebisMarc2lobid-transformation/fix/titleRelatedFields.fix @@ -326,6 +326,8 @@ do list(path: "publication[]", "var": "$i") replace_all("$i.location[].*", "^\\[(.*)\\]$", "$1") replace_all("$i.location[].*", "\\s?[,:;]$", "") replace_all("$i.publishedBy[].*", "^[©]|\\s?[,:;/=]?$", "") + call_macro("leapYearChecker",date:"$i.startDate") + call_macro("leapYearChecker",date:"$i.endDate") uniq("$i.location[]") end diff --git a/conf/output/test-hebis-to-lobid-output-9.json b/conf/output/test-hebis-to-lobid-output-9.json index dd123e0..0ebe1b0 100644 --- a/conf/output/test-hebis-to-lobid-output-9.json +++ b/conf/output/test-hebis-to-lobid-output-9.json @@ -21,11 +21,11 @@ "label" : "OCLC Ressource" } ], "isPartOf" : [ { - "type" : [ "IsPartOfRelation" ], "hasSuperordinate" : [ { "label" : "Beiträge zur Geschichte des Gau-Algesheimer Raumes" } ], - "numbering" : "42" + "numbering" : "42", + "type" : [ "IsPartOfRelation" ] } ], "language" : [ { "id" : "http://id.loc.gov/vocabulary/iso639-2/ger",