-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_dark_blue.rb
235 lines (205 loc) · 7.44 KB
/
run_dark_blue.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
require "optparse"
require_relative "services"
config = S.config
DB = config.database && S.dbconnect
require_relative "lib/archivematica"
require_relative "lib/bag_repository"
require_relative "lib/bag_validator"
require_relative "lib/data_transfer"
require_relative "lib/dispatcher"
require_relative "lib/metrics"
require_relative "lib/remote_client"
require_relative "lib/repository_package_repository"
require_relative "lib/status_event_repository"
class DarkBlueError < StandardError
end
class DarkBlueJob
include DarkBlueLogger
module ExtraBagInfoData
CONTENT_TYPE_KEY = "Dark-Blue-Content-Type"
LOCATION_UUID_KEY = "AIP-Storage-Location-UUID"
end
def initialize(config)
@package_repo = RepositoryPackageRepository::RepositoryPackageRepositoryFactory.for(use_db: DB)
@bag_repo = BagRepository::BagRepositoryFactory.for(use_db: DB)
@status_event_repo = S.status_event_repo
@settings_config = config.settings
@repository_config = config.repository
@aptrust_config = config.aptrust
@arch_configs = config.dark_blue.archivematicas
@object_size_limit = config.settings.object_size_limit
@detect_hidden = config.settings.detect_hidden
end
def create_dispatcher(context:, extra_bag_info_data:)
Dispatcher::APTrustDispatcher.new(
settings: @settings_config.workflow,
repository: @repository_config,
target_client: RemoteClient::RemoteClientFactory.from_config(
type: @aptrust_config.remote.type,
settings: @aptrust_config.remote.settings
),
context: context,
extra_bag_info_data: extra_bag_info_data,
detect_hidden: @detect_hidden,
status_event_repo: @status_event_repo,
bag_repo: @bag_repo
)
end
def prepare_arch_service(name:, api_config:)
Archivematica::ArchivematicaService.new(
name: name,
api: Archivematica::ArchivematicaAPI.from_config(
base_url: api_config.base_url,
api_key: api_config.api_key,
username: api_config.username
),
location_uuid: api_config.location_uuid
)
end
private :prepare_arch_service
def create_extra_bag_info_data(content_type:, location_uuid:)
{
ExtraBagInfoData::CONTENT_TYPE_KEY => content_type,
ExtraBagInfoData::LOCATION_UUID_KEY => location_uuid
}
end
def deliver_package(dispatcher:, package_data:, remote_client:)
courier = dispatcher.dispatch(
object_metadata: package_data.metadata,
data_transfer: DataTransfer::RemoteClientDataTransfer.new(
remote_client: remote_client,
remote_path: package_data.remote_path
),
validator: InnerBagValidator.new(
inner_bag_name: package_data.dir_name, detect_hidden: @detect_hidden
)
)
logger.measure_info("Delivered package #{package_data.metadata.id}.") do
courier.deliver
end
end
private :deliver_package
def redeliver_package(identifier)
logger.info("Re-delivering Archivematica package #{identifier}")
package = @package_repo.get_by_identifier(identifier)
unless package
message = "No repository package was found with identifier #{identifier}"
raise DarkBlueError, message
end
arch_config = @arch_configs.find { |ac| ac.repository_name == package.repository_name }
unless arch_config
message = "No configured Archivematica instance was found " \
"with name #{package.repository_name}."
raise DarkBlueError, message
end
extra_bag_info_data = create_extra_bag_info_data(
content_type: arch_config.name, location_uuid: arch_config.api.location_uuid
)
dispatcher = create_dispatcher(context: arch_config.name, extra_bag_info_data: extra_bag_info_data)
arch_service = prepare_arch_service(name: arch_config.name, api_config: arch_config.api)
package_data = arch_service.get_package_data_object(package.identifier)
unless package_data
message = "No package with identifier #{package.identifier} was found " \
"in #{arch_config.name} Archivematica instance."
raise DarkBlueError, message
end
source_remote_client = RemoteClient::RemoteClientFactory.from_config(
type: arch_config.remote.type,
settings: arch_config.remote.settings
)
deliver_package(
dispatcher: dispatcher, package_data: package_data, remote_client: source_remote_client
)
end
private :redeliver_package
def redeliver_packages(package_identifiers)
package_identifiers.each { |pi| redeliver_package(pi) }
end
def process_arch_instance(arch_config)
logger.info(
"Starting search and delivery process for new packages " \
"in Archivematica instance #{arch_config.name}"
)
extra_bag_info_data = create_extra_bag_info_data(
content_type: arch_config.name, location_uuid: arch_config.api.location_uuid
)
dispatcher = create_dispatcher(context: arch_config.name, extra_bag_info_data: extra_bag_info_data)
arch_service = prepare_arch_service(name: arch_config.name, api_config: arch_config.api)
source_remote_client = RemoteClient::RemoteClientFactory.from_config(
type: arch_config.remote.type,
settings: arch_config.remote.settings
)
max_updated_at = @package_repo.get_max_updated_at_for_repository(arch_config.repository_name)
object_size_limit = @settings_config.object_size_limit
num_objects_per_repo = @settings_config.num_objects_per_repo
package_data_objs = arch_service.get_package_data_objects(
stored_date: max_updated_at,
**(object_size_limit ? {package_filter: Archivematica::SizePackageFilter.new(object_size_limit)} : {})
)
if num_objects_per_repo && package_data_objs.length > num_objects_per_repo
package_data_objs = package_data_objs.take(num_objects_per_repo)
end
package_data_objs.each do |package_data|
logger.debug(package_data)
created = @package_repo.create(
identifier: package_data.metadata.id,
repository_name: arch_config.repository_name,
updated_at: package_data.stored_time
)
if !created
@package_repo.update_updated_at(
identifier: package_data.metadata.id,
updated_at: package_data.stored_time
)
end
deliver_package(
dispatcher: dispatcher, package_data: package_data, remote_client: source_remote_client
)
end
end
def process
@arch_configs.each { |ac| process_arch_instance(ac) }
end
end
DarkBlueOptions = Struct.new(:packages)
class DarkBlueParser
def self.parse(options)
args = DarkBlueOptions.new(options)
opt_parser = OptionParser.new do |parser|
parser.banner = "Usage: run_dark_blue.rb [options]"
parser.on(
"-pPACKAGES",
"--packages=PACKAGES",
Array,
"List of comma-separated package identifiers"
) do |p|
args.packages = p
end
parser.on("-h", "--help", "Prints this help") do
puts parser
exit
end
end
opt_parser.parse!(options)
args
end
end
dark_blue_job = DarkBlueJob.new(config)
options = DarkBlueParser.parse ARGV
start_time, end_time = Metrics::Timer.time_processing {
if options.packages.length > 0
dark_blue_job.redeliver_packages(options.packages)
else
dark_blue_job.process
end
}
if config.metrics
metrics = Metrics::MetricsProvider.new(
start_time: start_time,
end_time: end_time,
status_event_repo: S.status_event_repo,
push_gateway_url: config.metrics.push_gateway_url,
cluster_namespace: config.metrics.cluster_namespace
)
metrics.collect
end