From 840f486bc29435aa16c9d1bfb6f032af9d1aab50 Mon Sep 17 00:00:00 2001 From: "Michael J. Giarlo" Date: Wed, 26 Oct 2022 15:11:10 -0700 Subject: [PATCH] Log when total zip part size is less than the moab size Fixes #1742 This commit adds a new audit result to the catalog-to-archive audit service such that, when the total zip part size for a zipped moab version is less than the size of the moab on disk, this condition is reported. Reporting this information will help us find and remediate replication errors. As part of this work, as a convenience, add a new `#total_part_size` method to the `ZippedMoabVersion` model. --- .rubocop_todo.yml | 23 ++++++------ app/models/audit_results.rb | 4 +++ app/models/druid_version_zip.rb | 8 ++--- app/models/preserved_object.rb | 6 ++++ app/models/zipped_moab_version.rb | 6 +++- app/services/audit/catalog_to_archive.rb | 9 +++++ spec/models/druid_version_zip_spec.rb | 10 +++++- spec/models/preserved_object_spec.rb | 35 +++++++++++++++++++ spec/models/zipped_moab_version_spec.rb | 25 +++++++++++++ .../services/audit/catalog_to_archive_spec.rb | 23 +++++++++++- 10 files changed, 129 insertions(+), 20 deletions(-) diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 1f6a8ad11..d966debed 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,6 +1,6 @@ # This configuration was generated by # `rubocop --auto-gen-config --auto-gen-only-exclude` -# on 2022-10-21 20:17:00 UTC using RuboCop version 1.36.0. +# on 2022-10-26 21:57:32 UTC using RuboCop version 1.37.1. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new @@ -14,7 +14,7 @@ Layout/LineLength: Exclude: - 'app/models/druid_version_zip.rb' -# Offense count: 32 +# Offense count: 27 # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods. Lint/AmbiguousBlockAssociation: Exclude: @@ -23,7 +23,6 @@ Lint/AmbiguousBlockAssociation: - 'spec/services/complete_moab_service/shared_examples.rb' - 'spec/services/complete_moab_service/update_version_after_validation_spec.rb' - 'spec/services/complete_moab_service/update_version_spec.rb' - - 'spec/services/shared_examples_complete_moab_handler.rb' # Offense count: 2 Lint/MissingSuper: @@ -34,7 +33,7 @@ Lint/MissingSuper: # Offense count: 23 # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes. Metrics/AbcSize: - Max: 66 + Max: 43 # Offense count: 2 # Configuration parameters: CountBlocks, Max. @@ -49,25 +48,23 @@ Metrics/ClassLength: - 'app/controllers/objects_controller.rb' - 'app/models/druid_version_zip.rb' -# Offense count: 3 +# Offense count: 2 # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, Max. Metrics/CyclomaticComplexity: Exclude: - - 'app/services/complete_moab_service/check_existence.rb' - - 'app/services/complete_moab_service/update_version.rb' + - 'app/services/audit/catalog_to_archive.rb' - 'app/services/complete_moab_service/update_version_after_validation.rb' -# Offense count: 40 +# Offense count: 38 # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods. Metrics/MethodLength: Max: 33 -# Offense count: 3 +# Offense count: 2 # Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, Max. Metrics/PerceivedComplexity: Exclude: - - 'app/services/complete_moab_service/check_existence.rb' - - 'app/services/complete_moab_service/update_version.rb' + - 'app/services/audit/catalog_to_archive.rb' - 'app/services/complete_moab_service/update_version_after_validation.rb' # Offense count: 3 @@ -89,7 +86,7 @@ RSpec/InstanceVariable: Exclude: - 'spec/models/moab_storage_directory_spec.rb' -# Offense count: 378 +# Offense count: 381 # Configuration parameters: AllowSubject. RSpec/MultipleMemoizedHelpers: Max: 21 @@ -116,7 +113,7 @@ RSpec/SubjectDeclaration: - 'spec/services/reporters/honeybadger_reporter_spec.rb' - 'spec/services/reporters/logger_reporter_spec.rb' -# Offense count: 17 +# Offense count: 18 # Configuration parameters: EnforcedStyle. # SupportedStyles: slashes, arguments Rails/FilePath: diff --git a/app/models/audit_results.rb b/app/models/audit_results.rb index c8340f201..2b877fa40 100644 --- a/app/models/audit_results.rb +++ b/app/models/audit_results.rb @@ -32,6 +32,7 @@ class AuditResults ZIP_PART_NOT_FOUND = :zip_part_not_found ZIP_PARTS_COUNT_DIFFERS_FROM_ACTUAL = :zip_parts_count_differs_from_actual ZIP_PARTS_COUNT_INCONSISTENCY = :zip_parts_count_inconsistency + ZIP_PARTS_SIZE_INCONSISTENCY = :zip_parts_size_inconsistency ZIP_PARTS_NOT_ALL_REPLICATED = :zip_parts_not_all_replicated ZIP_PARTS_NOT_CREATED = :zip_parts_not_created @@ -75,6 +76,9 @@ class AuditResults 'number of zip parts rows (%{actual_count})', ZIP_PARTS_COUNT_INCONSISTENCY => '%{version} on %{endpoint_name}: ' \ 'ZippedMoabVersion has variation in child parts_counts: %{child_parts_counts}', + ZIP_PARTS_SIZE_INCONSISTENCY => '%{version} on %{endpoint_name}: ' \ + 'Sum of ZippedMoabVersion child part sizes (%{total_part_size}) is less than what is in ' \ + 'the Moab: %{moab_version_size}', ZIP_PARTS_NOT_ALL_REPLICATED => '%{version} on %{endpoint_name}: not all ' \ 'ZippedMoabVersion parts are replicated yet: %{unreplicated_parts_list}', ZIP_PARTS_NOT_CREATED => '%{version} on %{endpoint_name}: no zip_parts exist yet for this ZippedMoabVersion' diff --git a/app/models/druid_version_zip.rb b/app/models/druid_version_zip.rb index 27669360a..4b6af653d 100644 --- a/app/models/druid_version_zip.rb +++ b/app/models/druid_version_zip.rb @@ -166,6 +166,10 @@ def zip_split_size '10g' end + def moab_version_size + moab_version_files.sum { |f| File.size(f) } + end + private # Throws an error if any of the files in the moab are not yet readable. For example due to @@ -192,10 +196,6 @@ def total_part_size part_paths.sum { |part_path| File.size(part_path) } end - def moab_version_size - moab_version_files.sum { |f| File.size(f) } - end - def moab_version_files raise "Moab version does not exist: #{moab_version_path}" unless File.exist?(moab_version_path) Dir diff --git a/app/models/preserved_object.rb b/app/models/preserved_object.rb index e00541697..4e35f5fbe 100644 --- a/app/models/preserved_object.rb +++ b/app/models/preserved_object.rb @@ -61,6 +61,12 @@ def audit_moab_version_replication! MoabReplicationAuditJob.perform_later(self) end + def total_size_of_moab_version(version) + return 0 unless complete_moab + + DruidVersionZip.new(druid, version, complete_moab.moab_storage_root.storage_location).moab_version_size + end + # Number of PreservedObjects to audit on a daily basis. def self.daily_check_count PreservedObject.count / (PreservationPolicy.default_policy.fixity_ttl / (60 * 60 * 24)) diff --git a/app/models/zipped_moab_version.rb b/app/models/zipped_moab_version.rb index d2b6f2320..64f77ea54 100644 --- a/app/models/zipped_moab_version.rb +++ b/app/models/zipped_moab_version.rb @@ -6,7 +6,7 @@ # ZippedMoabVersion objects should be: # preserved_object.current_version * number_of_zip_endpoints_for_preservation_policy # -# @note Does not have size independent of part(s) +# @note Does not have size independent of part(s), see `#total_part_size` class ZippedMoabVersion < ApplicationRecord belongs_to :preserved_object, inverse_of: :zipped_moab_versions belongs_to :zip_endpoint, inverse_of: :zipped_moab_versions @@ -37,4 +37,8 @@ def all_parts_replicated? # has been created and completely written to disk. see DruidVersionZip. zip_parts.count.positive? && zip_parts.all?(&:ok?) end + + def total_part_size + zip_parts.sum(&:size) + end end diff --git a/app/services/audit/catalog_to_archive.rb b/app/services/audit/catalog_to_archive.rb index 768d5960c..b50ce7ee0 100644 --- a/app/services/audit/catalog_to_archive.rb +++ b/app/services/audit/catalog_to_archive.rb @@ -15,6 +15,15 @@ def self.check_child_zip_part_attributes(zmv, results) return false # everything else relies on checking parts, nothing left to do end + total_part_size = zmv.total_part_size + moab_version_size = zmv.preserved_object.total_size_of_moab_version(zmv.version) + if total_part_size < moab_version_size + results.add_result( + AuditResults::ZIP_PARTS_SIZE_INCONSISTENCY, + base_hash.merge(total_part_size: total_part_size, moab_version_size: moab_version_size) + ) + end + child_parts_counts = zmv.child_parts_counts if child_parts_counts.length > 1 results.add_result( diff --git a/spec/models/druid_version_zip_spec.rb b/spec/models/druid_version_zip_spec.rb index 3367cfe97..f08fa015d 100644 --- a/spec/models/druid_version_zip_spec.rb +++ b/spec/models/druid_version_zip_spec.rb @@ -101,7 +101,7 @@ after { FileUtils.rm_rf('/tmp/bj') } # cleanup context 'when zip size is less than the moab size' do - let(:moab_version_size) { dvz.send(:moab_version_size) } + let(:moab_version_size) { dvz.moab_version_size } let(:total_part_size) { moab_version_size / 2 } before do @@ -266,6 +266,14 @@ end end + describe '#moab_version_size' do + let(:dvz) { described_class.new(druid, version, 'spec/fixtures/storage_root01/sdr2objects') } + + it 'returns the sum of all file sizes in a moab version' do + expect(dvz.moab_version_size).to eq(1_928_387) + end + end + describe '#part_keys' do let(:druid) { 'dc048cw1328' } diff --git a/spec/models/preserved_object_spec.rb b/spec/models/preserved_object_spec.rb index a89aec96f..063c21939 100644 --- a/spec/models/preserved_object_spec.rb +++ b/spec/models/preserved_object_spec.rb @@ -199,4 +199,39 @@ expect(described_class.daily_check_count).to eq 11 end end + + describe '#total_size_of_moab_version' do + let(:druid) { 'bz514sm9647' } + let(:preserved_object) { create(:preserved_object, druid: druid, current_version: current_version) } + let(:current_version) { 3 } + + context 'when complete moab is nil' do + it 'returns 0' do + expect(preserved_object.total_size_of_moab_version(current_version)).to eq(0) + end + end + + context 'when complete moab exists' do + let!(:cm1) { create(:complete_moab, preserved_object: preserved_object, version: current_version, moab_storage_root: msr) } # rubocop:disable RSpec/LetSetup + + context 'when moab version path does not exist' do + let(:msr) { create(:moab_storage_root) } + + it 'raises a runtime error' do + expect { preserved_object.total_size_of_moab_version(current_version) }.to raise_error( + RuntimeError, + /Moab version does not exist:/ + ) + end + end + + context 'when moab version exists' do + let(:msr) { MoabStorageRoot.find_by!(storage_location: 'spec/fixtures/storage_root01/sdr2objects') } + + it 'returns the sum of the file sizes' do + expect(preserved_object.total_size_of_moab_version(current_version)).to eq(37_989) + end + end + end + end end diff --git a/spec/models/zipped_moab_version_spec.rb b/spec/models/zipped_moab_version_spec.rb index d13844367..d77e64181 100644 --- a/spec/models/zipped_moab_version_spec.rb +++ b/spec/models/zipped_moab_version_spec.rb @@ -33,6 +33,31 @@ end end + describe '#total_part_size' do + context 'there are no parts' do + it 'returns 0' do + expect(zmv.total_part_size).to eq(0) + end + end + + context 'there are parts' do + before do + args = attributes_for(:zip_part) + zmv.zip_parts.create!( + [ + args.merge(status: 'ok', parts_count: 1, suffix: '.zip', size: 1234), + args.merge(status: 'ok', parts_count: 1, suffix: '.z01', size: 1234), + args.merge(status: 'ok', parts_count: 1, suffix: '.z02', size: 1234) + ] + ) + end + + it 'returns the sum of the part sizes' do + expect(zmv.total_part_size).to eq(3702) + end + end + end + describe '#child_parts_counts' do context 'there are no parts' do it 'returns an empty list' do diff --git a/spec/services/audit/catalog_to_archive_spec.rb b/spec/services/audit/catalog_to_archive_spec.rb index 8827429ce..7f3c9c5dc 100644 --- a/spec/services/audit/catalog_to_archive_spec.rb +++ b/spec/services/audit/catalog_to_archive_spec.rb @@ -3,7 +3,7 @@ require 'rails_helper' RSpec.describe Audit::CatalogToArchive do - let(:zmv) { create(:zipped_moab_version) } + let(:zmv) { create(:zipped_moab_version, preserved_object: create(:preserved_object_fixture, druid: 'bz514sm9647')) } let(:results) { AuditResults.new(druid: zmv.preserved_object.druid, moab_storage_root: zmv.zip_endpoint, check_name: 'CatalogToArchiveSpec') } let(:version) { zmv.version } let(:endpoint_name) { zmv.zip_endpoint.endpoint_name } @@ -90,6 +90,27 @@ end end + context 'when total part size is less than the moab size' do + before do + args = attributes_for(:zip_part) + zmv.zip_parts.create!( + [ + args.merge(status: 'ok', parts_count: 3, suffix: '.zip', size: 111), + args.merge(status: 'ok', parts_count: 3, suffix: '.z01', size: 222), + args.merge(status: 'ok', parts_count: 3, suffix: '.z02', size: 333) + ] + ) + end + + it 'logs the discrepancy' do + described_class.check_child_zip_part_attributes(zmv, results) + msg = "#{result_prefix}: Sum of ZippedMoabVersion child part sizes (666) is less than what is in the Moab: 202938" + expect(results.results).to include( + a_hash_including(AuditResults::ZIP_PARTS_SIZE_INCONSISTENCY => msg) + ) + end + end + context 'zip parts have been created' do context 'some parts are unreplicated' do before do