diff --git a/conf/rpb-titel-to-lobid.flux b/conf/rpb-titel-to-lobid.flux index c0ca0400..1bdee378 100644 --- a/conf/rpb-titel-to-lobid.flux +++ b/conf/rpb-titel-to-lobid.flux @@ -1,4 +1,5 @@ default outfile = "conf/output/bulk/bulk-${i}.ndjson"; +default index = "resources-rpb-test"; "conf/output/output-strapi.ndjson" | open-file | as-lines @@ -6,6 +7,6 @@ default outfile = "conf/output/bulk/bulk-${i}.ndjson"; | fix(FLUX_DIR + "rpb-titel-to-lobid.fix") | batch-reset(batchsize="1000") | encode-json(prettyPrinting="false") -| json-to-elasticsearch-bulk(idkey="id", type="resource", index="resources-rpb-test") +| json-to-elasticsearch-bulk(idkey="id", type="resource", index=index) | write(outfile) ; diff --git a/transformAndIndex.sh b/transformAndIndex.sh index d6d86ac1..1c2d94ca 100644 --- a/transformAndIndex.sh +++ b/transformAndIndex.sh @@ -2,22 +2,35 @@ set -eu IFS=$'\n\t' +TIME=$(date "+%Y%m%d-%H%M") +INDEX="resources-rpb-$TIME" +ALIAS="resources-rpb-test" + # Get the daily Allegro dump: cd conf wget http://www.rpb-rlp.de/rpb/rpb04/intern/RPBEXP.zip unzip -o RPBEXP.zip -mv RPBEXP.zip RPBEXP/RPBEXP-$(date "+%Y%m%d-%H%M").zip +mv RPBEXP.zip RPBEXP/RPBEXP-$TIME.zip cd .. # Transform the data: sbt "runMain rpb.ETL conf/rpb-sw.flux" sbt "runMain rpb.ETL conf/rpb-titel-to-strapi.flux" -sbt "runMain rpb.ETL conf/rpb-titel-to-lobid.flux" +sbt "runMain rpb.ETL conf/rpb-titel-to-lobid.flux index=$INDEX" # Index to Elasticsearch: unset http_proxy # for posting to weywot3 +curl -XPUT -H "Content-Type: application/json" weywot3:9200/$INDEX?pretty -d @../lobid-resources-rpb/src/main/resources/alma/index-config.json for filename in conf/output/bulk/bulk-*.ndjson do echo "$filename" curl -XPOST --header 'Content-Type: application/x-ndjson' --data-binary @"$filename" 'weywot3:9200/_bulk' done +curl -X POST "weywot3:9200/_aliases?pretty" -H 'Content-Type: application/json' -d' +{ + "actions" : [ + { "remove" : { "index" : "*", "alias" : "'"$ALIAS"'" } }, + { "add" : { "index" : "'"$INDEX"'", "alias" : "'"$ALIAS"'" } } + ] +} +'