Skip to content

Commit

Permalink
Set up external records bulk transformation (RPB-248)
Browse files Browse the repository at this point in the history
  • Loading branch information
fsteeg committed Mar 4, 2025
1 parent 7dcaf83 commit 08c5fa4
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 2 deletions.
10 changes: 9 additions & 1 deletion app/controllers/nwbib/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,14 @@ public static Promise<Result> delete(String id, String secret) throws FileNotFou
}
}

public static Promise<Result> putIdFromData(String secret) throws FileNotFoundException, RecognitionException, IOException {
return put(request().body().asJson().get("rpbId").textValue(), secret);
}

public static Promise<Result> deleteIdFromData(String secret) throws FileNotFoundException, RecognitionException, IOException {
return delete(request().body().asJson().get("rpbId").textValue(), secret);
}

private static Promise<Result> deleteFromIndex(String id) throws UnsupportedEncodingException {
Cache.remove(String.format("/%s", id));
WSRequest request = WS.url(elasticsearchUrl(id)).setHeader("Content-Type", "application/json");
Expand Down Expand Up @@ -1008,7 +1016,7 @@ private static JsonNode transform(JsonNode jsonBody)

private static Promise<JsonNode> addToLobidData(JsonNode transformedJson) {
String lobidUrl = transformedJson.get("hbzId").textValue();
WSRequest lobidRequest = WS.url(lobidUrl).setHeader("Content-Type", "application/json");
WSRequest lobidRequest = WS.url(lobidUrl).setQueryParameter("format", "json");
Promise<JsonNode> lobidPromise = lobidRequest.get().map(WSResponse::asJson);
Promise<JsonNode> merged = lobidPromise.map(lobidJson -> mergeRecords(transformedJson, lobidJson));
return merged;
Expand Down
4 changes: 3 additions & 1 deletion conf/nwbib.routes
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@ GET /sw/:rpbId controllers.nwbib.Application.showSw(rpbId)
GET /o:id controllers.nwbib.Application.searchSpatial(id, from:Int?=0, size:Int?=25, format?="html")
GET /:id controllers.nwbib.Application.show(id, format ?= "")
PUT /:id controllers.nwbib.Application.put(id, secret ?= "")
DELETE /:id controllers.nwbib.Application.delete(id, secret ?= "")
DELETE /:id controllers.nwbib.Application.delete(id, secret ?= "")
PUT / controllers.nwbib.Application.putIdFromData(secret ?= "")
DELETE / controllers.nwbib.Application.deleteIdFromData(secret ?= "")
10 changes: 10 additions & 0 deletions conf/rpb-titel-to-lobid-external.flux
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
default outfile = "conf/output/bulk/bulk-${i}.ndjson";
default url = "http://localhost:9000/";
default secret = "";
input
| open-file
| as-lines
| open-http(url=url+"?secret="+secret, method="PUT", body="@-", contentType="application/json")
| as-lines
| print
;
15 changes: 15 additions & 0 deletions transformAndIndex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ TIME=$(date "+%Y%m%d-%H%M")
INDEX="resources-rpb-$TIME"
ALIAS="resources-rpb-test"

RPB_INPUT="conf/output/output-strapi-external-rpb.ndjson"
RPB_URL="http://test.rpb.lobid.org/"

VINO_INPUT="conf/output/output-strapi-external-vino.ndjson"
VINO_URL="http://test.wein.lobid.org/"

RPB_SECRET=""

# Transform the Strapi data

# Get rpb-authority data from Strapi export:
Expand All @@ -17,9 +25,16 @@ sbt "runMain rpb.ETL conf/rpb-sw.flux" # creates TSV lookup file for to-lobid tr
# Instead, we use the backup exports created in Strapi lifecycle afterCreate and afterUpdate hooks (copy from backup/ in Strapi instance):
cat conf/articles.ndjson | grep '"data"' | jq -c .data > conf/output/output-strapi.ndjson
cat conf/independent_works.ndjson | grep '"data"' | jq -c .data >> conf/output/output-strapi.ndjson
# External records:
cat conf/external_records.ndjson | grep '"data"' | jq -c .data >> conf/output/output-strapi-external.ndjson
cat conf/output/output-strapi-external.ndjson | grep '"nur RPB"\|"RPB und BiblioVino"' >> conf/output/output-strapi-external-rpb.ndjson
cat conf/output/output-strapi-external.ndjson | grep '"nur BiblioVino"\|"RPB und BiblioVino"' >> conf/output/output-strapi-external-vino.ndjson
# Remove old index data:
rm conf/output/bulk/bulk-*.ndjson
# Transform:
sbt "runMain rpb.ETL conf/rpb-titel-to-lobid.flux index=$INDEX"
sbt "runMain rpb.ETL conf/rpb-titel-to-lobid-external.flux input=$RPB_INPUT url=$RPB_URL secret=$RPB_SECRET"
sbt "runMain rpb.ETL conf/rpb-titel-to-lobid-external.flux input=$VINO_INPUT url=$VINO_URL secret=$RPB_SECRET"

# Index to Elasticsearch:
unset http_proxy # for posting to weywot3
Expand Down

0 comments on commit 08c5fa4

Please sign in to comment.