From a604518de849a83428bab5814d443da2221915a9 Mon Sep 17 00:00:00 2001 From: "Michael J. Giarlo" Date: Wed, 26 Aug 2020 15:20:25 -0700 Subject: [PATCH] Revert indexers work from #461 Because it breaks production and people have work to do. Re-opens #457 which will be discussed on Tuesday. --- Gemfile | 1 - Gemfile.lock | 9 - app/controllers/hydrus_solr_controller.rb | 3 +- ...inistrative_metadata_datastream_indexer.rb | 13 -- app/indexers/administrative_tag_indexer.rb | 50 ----- app/indexers/composite_indexer.rb | 26 --- .../content_metadata_datastream_indexer.rb | 60 ------ app/indexers/data_indexer.rb | 30 --- ...efault_object_rights_datastream_indexer.rb | 13 -- app/indexers/describable_indexer.rb | 54 ------ ...descriptive_metadata_datastream_indexer.rb | 13 -- .../embargo_metadata_datastream_indexer.rb | 28 --- app/indexers/identifiable_indexer.rb | 123 ------------ .../identity_metadata_datastream_indexer.rb | 46 ----- app/indexers/object_profile_indexer.rb | 18 -- app/indexers/process_indexer.rb | 56 ------ app/indexers/processable_indexer.rb | 66 ------- .../provenance_metadata_datastream_indexer.rb | 13 -- app/indexers/releasable_indexer.rb | 37 ---- .../rights_metadata_datastream_indexer.rb | 13 -- .../role_metadata_datastream_indexer.rb | 25 --- app/indexers/solr_doc_helper.rb | 11 -- .../version_metadata_datastream_indexer.rb | 13 -- app/indexers/workflow_indexer.rb | 45 ----- app/indexers/workflows_indexer.rb | 32 ---- app/models/workflow_solr_document.rb | 91 --------- app/services/indexer.rb | 84 -------- app/services/workflow_client_factory.rb | 9 - .../hydrus_solr_controller_spec.rb | 16 +- spec/features/hydrus_solr_controller_spec.rb | 6 - .../administrative_tag_indexer_spec.rb | 59 ------ spec/indexers/composite_indexer_spec.rb | 82 -------- ...ontent_metadata_datastream_indexer_spec.rb | 59 ------ spec/indexers/data_indexer_spec.rb | 26 --- ...t_object_rights_datastream_indexer_spec.rb | 33 ---- spec/indexers/describable_indexer_spec.rb | 122 ------------ ...mbargo_metadata_datastream_indexer_spec.rb | 50 ----- spec/indexers/identifiable_indexer_spec.rb | 169 ----------------- ...entity_metadata_datastream_indexer_spec.rb | 59 ------ spec/indexers/object_profile_indexer_spec.rb | 29 --- spec/indexers/processable_indexer_spec.rb | 144 -------------- spec/indexers/releasable_indexer_spec.rb | 29 --- ...rights_metadata_datastream_indexer_spec.rb | 123 ------------ .../role_metadata_datastream_indexer_spec.rb | 58 ------ spec/indexers/workflow_indexer_spec.rb | 179 ------------------ spec/indexers/workflows_indexer_spec.rb | 138 -------------- spec/services/indexer_service_spec.rb | 12 -- spec/spec_helper.rb | 2 - 48 files changed, 5 insertions(+), 2372 deletions(-) delete mode 100644 app/indexers/administrative_metadata_datastream_indexer.rb delete mode 100644 app/indexers/administrative_tag_indexer.rb delete mode 100644 app/indexers/composite_indexer.rb delete mode 100644 app/indexers/content_metadata_datastream_indexer.rb delete mode 100644 app/indexers/data_indexer.rb delete mode 100644 app/indexers/default_object_rights_datastream_indexer.rb delete mode 100644 app/indexers/describable_indexer.rb delete mode 100644 app/indexers/descriptive_metadata_datastream_indexer.rb delete mode 100644 app/indexers/embargo_metadata_datastream_indexer.rb delete mode 100644 app/indexers/identifiable_indexer.rb delete mode 100644 app/indexers/identity_metadata_datastream_indexer.rb delete mode 100644 app/indexers/object_profile_indexer.rb delete mode 100644 app/indexers/process_indexer.rb delete mode 100644 app/indexers/processable_indexer.rb delete mode 100644 app/indexers/provenance_metadata_datastream_indexer.rb delete mode 100644 app/indexers/releasable_indexer.rb delete mode 100644 app/indexers/rights_metadata_datastream_indexer.rb delete mode 100644 app/indexers/role_metadata_datastream_indexer.rb delete mode 100644 app/indexers/solr_doc_helper.rb delete mode 100644 app/indexers/version_metadata_datastream_indexer.rb delete mode 100644 app/indexers/workflow_indexer.rb delete mode 100644 app/indexers/workflows_indexer.rb delete mode 100644 app/models/workflow_solr_document.rb delete mode 100644 app/services/indexer.rb delete mode 100644 app/services/workflow_client_factory.rb delete mode 100644 spec/indexers/administrative_tag_indexer_spec.rb delete mode 100644 spec/indexers/composite_indexer_spec.rb delete mode 100644 spec/indexers/content_metadata_datastream_indexer_spec.rb delete mode 100644 spec/indexers/data_indexer_spec.rb delete mode 100644 spec/indexers/default_object_rights_datastream_indexer_spec.rb delete mode 100644 spec/indexers/describable_indexer_spec.rb delete mode 100644 spec/indexers/embargo_metadata_datastream_indexer_spec.rb delete mode 100644 spec/indexers/identifiable_indexer_spec.rb delete mode 100644 spec/indexers/identity_metadata_datastream_indexer_spec.rb delete mode 100644 spec/indexers/object_profile_indexer_spec.rb delete mode 100644 spec/indexers/processable_indexer_spec.rb delete mode 100644 spec/indexers/releasable_indexer_spec.rb delete mode 100644 spec/indexers/rights_metadata_datastream_indexer_spec.rb delete mode 100644 spec/indexers/role_metadata_datastream_indexer_spec.rb delete mode 100644 spec/indexers/workflow_indexer_spec.rb delete mode 100644 spec/indexers/workflows_indexer_spec.rb delete mode 100644 spec/services/indexer_service_spec.rb diff --git a/Gemfile b/Gemfile index fd373078a..8ba97f8ab 100644 --- a/Gemfile +++ b/Gemfile @@ -54,7 +54,6 @@ group :development, :test do gem 'rubocop', '~> 0.58.1' # gem 'rubocop-rspec', '~> 1.5' gem 'rails-controller-testing' - gem 'webmock' end group :development do diff --git a/Gemfile.lock b/Gemfile.lock index f3989443a..d58f999e1 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -154,8 +154,6 @@ GEM config (2.2.1) deep_merge (~> 1.2, >= 1.2.1) dry-validation (~> 1.0, >= 1.0.0) - crack (0.4.3) - safe_yaml (~> 1.0.0) crass (1.0.6) daemons (1.3.1) deep_merge (1.2.1) @@ -280,7 +278,6 @@ GEM haml (5.1.2) temple (>= 0.8.0) tilt - hashdiff (1.0.1) honeybadger (4.7.2) hooks (0.4.1) uber (~> 0.0.14) @@ -488,7 +485,6 @@ GEM mime-types nokogiri rest-client - safe_yaml (1.0.5) sass (3.7.4) sass-listen (~> 4.0.0) sass-listen (4.0.0) @@ -557,10 +553,6 @@ GEM i18n warden (1.2.8) rack (>= 2.0.6) - webmock (3.8.3) - addressable (>= 2.3.6) - crack (>= 0.3.2) - hashdiff (>= 0.4.0, < 2.0.0) websocket-driver (0.7.3) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) @@ -623,7 +615,6 @@ DEPENDENCIES sqlite3 (~> 1.3.13) uglifier (>= 1.0.3) validates_email_format_of - webmock whenever (~> 0.9) BUNDLED WITH diff --git a/app/controllers/hydrus_solr_controller.rb b/app/controllers/hydrus_solr_controller.rb index 04b1fa8fc..e89e92452 100644 --- a/app/controllers/hydrus_solr_controller.rb +++ b/app/controllers/hydrus_solr_controller.rb @@ -44,8 +44,7 @@ def reindex render(plain: msg) elsif is_hydrus_object(obj) # It's a Hydrus object: re-solrize it and render the SOLR document. - indexer = Indexer.for(obj) - solr_doc = indexer.to_solr + solr_doc = obj.to_solr solr.add(solr_doc, add_attributes: { commitWithin: 5000 }) msg = "#{msg}: updated SOLR index: class=#{obj.class}" index_logger.info(msg) diff --git a/app/indexers/administrative_metadata_datastream_indexer.rb b/app/indexers/administrative_metadata_datastream_indexer.rb deleted file mode 100644 index fbbd17ec8..000000000 --- a/app/indexers/administrative_metadata_datastream_indexer.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class AdministrativeMetadataDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for administrativeMetadata - def to_solr - resource.administrativeMetadata.to_solr - end -end diff --git a/app/indexers/administrative_tag_indexer.rb b/app/indexers/administrative_tag_indexer.rb deleted file mode 100644 index 974c2cc2b..000000000 --- a/app/indexers/administrative_tag_indexer.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -# Index administrative tags for an object. -# NOTE: Most of this code was extracted from the dor-services gem: -# https://github.com/sul-dlss/dor-services/blob/v9.0.0/lib/dor/datastreams/identity_metadata_ds.rb#L196-L218 -class AdministrativeTagIndexer - TAG_PART_DELIMITER = ' : ' - TAGS_TO_INDEX = ['Project', 'Registered By'].freeze - - attr_reader :resource - - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for administrative tags - def to_solr - solr_doc = { 'tag_ssim' => [], 'exploded_tag_ssim' => [] } - administrative_tags.each do |tag| - solr_doc['tag_ssim'] << tag - solr_doc['exploded_tag_ssim'] += exploded_tags_from(tag) - - tag_prefix, rest = tag.split(TAG_PART_DELIMITER, 2) - next if !TAGS_TO_INDEX.include?(tag_prefix) || rest.nil? - - prefix = tag_prefix.downcase.strip.gsub(/\s/, '_') - (solr_doc["#{prefix}_tag_ssim"] ||= []) << rest.strip - end - solr_doc - end - - private - - # solrize each possible prefix for the tag, inclusive of the full tag. - # e.g., for a tag such as "A : B : C", this will solrize to an _ssim field - # that contains ["A", "A : B", "A : B : C"]. - def exploded_tags_from(tag) - tag_parts = tag.split(TAG_PART_DELIMITER) - - 1.upto(tag_parts.count).map do |i| - tag_parts.take(i).join(TAG_PART_DELIMITER) - end - end - - def administrative_tags - Dor::Services::Client.object(resource.pid).administrative_tags.list - rescue Dor::Services::Client::NotFoundResponse - [] - end -end diff --git a/app/indexers/composite_indexer.rb b/app/indexers/composite_indexer.rb deleted file mode 100644 index cfc380c7a..000000000 --- a/app/indexers/composite_indexer.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -# Borrowed from https://github.com/samvera/valkyrie/blob/master/lib/valkyrie/persistence/solr/composite_indexer.rb -class CompositeIndexer - attr_reader :indexers - def initialize(*indexers) - @indexers = indexers - end - - def new(resource:) - Instance.new(indexers, resource: resource) - end - - class Instance - attr_reader :indexers, :resource - def initialize(indexers, resource:) - @resource = resource - @indexers = indexers.map { |i| i.new(resource: resource) } - end - - # @return [Hash] the merged solr document for all the sub-indexers - def to_solr - indexers.map(&:to_solr).inject({}, &:merge) - end - end -end diff --git a/app/indexers/content_metadata_datastream_indexer.rb b/app/indexers/content_metadata_datastream_indexer.rb deleted file mode 100644 index 7b05d74f4..000000000 --- a/app/indexers/content_metadata_datastream_indexer.rb +++ /dev/null @@ -1,60 +0,0 @@ -# frozen_string_literal: true - -class ContentMetadataDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for contentMetadata - def to_solr - return {} unless doc.root['type'] - - preserved_size = 0 - shelved_size = 0 - counts = Hash.new(0) # default count is zero - resource_type_counts = Hash.new(0) # default count is zero - file_roles = ::Set.new - mime_types = ::Set.new - first_shelved_image = nil - - doc.xpath('contentMetadata/resource').sort { |a, b| a['sequence'].to_i <=> b['sequence'].to_i }.each do |resource| - counts['resource'] += 1 - resource_type_counts[resource['type']] += 1 if resource['type'] - resource.xpath('file').each do |file| - counts['content_file'] += 1 - preserved_size += file['size'].to_i if file['preserve'] == 'yes' - shelved_size += file['size'].to_i if file['shelve'] == 'yes' - if file['shelve'] == 'yes' - counts['shelved_file'] += 1 - first_shelved_image ||= file['id'] if file['id'].end_with?('jp2') - end - mime_types << file['mimetype'] - file_roles << file['role'] if file['role'] - end - end - solr_doc = { - 'content_type_ssim' => doc.root['type'], - 'content_file_mimetypes_ssim' => mime_types.to_a, - 'content_file_count_itsi' => counts['content_file'], - 'shelved_content_file_count_itsi' => counts['shelved_file'], - 'resource_count_itsi' => counts['resource'], - 'preserved_size_dbtsi' => preserved_size, # double (trie) to support very large sizes - 'shelved_size_dbtsi' => shelved_size # double (trie) to support very large sizes - } - solr_doc['resource_types_ssim'] = resource_type_counts.keys unless resource_type_counts.empty? - solr_doc['content_file_roles_ssim'] = file_roles.to_a unless file_roles.empty? - resource_type_counts.each do |key, count| - solr_doc["#{key}_resource_count_itsi"] = count - end - # first_shelved_image is neither indexed nor multiple - solr_doc['first_shelved_image_ss'] = first_shelved_image unless first_shelved_image.nil? - solr_doc - end - - private - - def doc - @doc ||= resource.contentMetadata.ng_xml - end -end diff --git a/app/indexers/data_indexer.rb b/app/indexers/data_indexer.rb deleted file mode 100644 index 44645a3d1..000000000 --- a/app/indexers/data_indexer.rb +++ /dev/null @@ -1,30 +0,0 @@ -# frozen_string_literal: true - -# Indexing provided by ActiveFedora -class DataIndexer - include ActiveFedora::Indexing - - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # we need to override this until https://github.com/samvera/active_fedora/pull/1371 - # has been released - def to_solr(solr_doc = {}) - c_time = create_date - c_time = Time.parse(c_time) unless c_time.is_a?(Time) - m_time = modified_date - m_time = Time.parse(m_time) unless m_time.is_a?(Time) - Solrizer.set_field(solr_doc, 'system_create', c_time, :stored_sortable) - Solrizer.set_field(solr_doc, 'system_modified', m_time, :stored_sortable) - Solrizer.set_field(solr_doc, 'object_state', state, :stored_sortable) - Solrizer.set_field(solr_doc, 'active_fedora_model', has_model, :stored_sortable) - solr_doc[SOLR_DOCUMENT_ID.to_sym] = pid - solr_doc = solrize_relationships(solr_doc) - solr_doc - end - - delegate :create_date, :modified_date, :state, :pid, :inner_object, - :datastreams, :relationships, :has_model, to: :resource -end diff --git a/app/indexers/default_object_rights_datastream_indexer.rb b/app/indexers/default_object_rights_datastream_indexer.rb deleted file mode 100644 index 01df1adce..000000000 --- a/app/indexers/default_object_rights_datastream_indexer.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class DefaultObjectRightsDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for defaultObjectRights - def to_solr - resource.defaultObjectRights.to_solr - end -end diff --git a/app/indexers/describable_indexer.rb b/app/indexers/describable_indexer.rb deleted file mode 100644 index 29c6655fd..000000000 --- a/app/indexers/describable_indexer.rb +++ /dev/null @@ -1,54 +0,0 @@ -# frozen_string_literal: true - -class DescribableIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for describable concerns - def to_solr - add_metadata_format_to_solr_doc.merge(add_mods_to_solr_doc) - end - - def add_metadata_format_to_solr_doc - { 'metadata_format_ssim' => 'mods' } - end - - # rubocop:disable Style/SymbolArray - def add_mods_to_solr_doc - solr_doc = {} - mods_sources = { - sw_title_display: %w[sw_display_title_tesim], - main_author_w_date: %w[sw_author_ssim sw_author_tesim], - sw_language_facet: %w[sw_language_ssim], - sw_genre: %w[sw_genre_ssim], - format_main: %w[sw_format_ssim], - topic_facet: %w[sw_topic_ssim], - era_facet: %w[sw_subject_temporal_ssim], - geographic_facet: %w[sw_subject_geographic_ssim], - %i[term_values typeOfResource] => %w[mods_typeOfResource_ssim], - pub_year_sort_str: %w[sw_pub_date_sort_ssi], - pub_year_display_str: %w[sw_pub_date_facet_ssi] - } - - mods_sources.each_pair do |meth, solr_keys| - vals = meth.is_a?(Array) ? resource.stanford_mods.send(meth.shift, *meth) : resource.stanford_mods.send(meth) - - next if vals.nil? || (vals.respond_to?(:empty?) && vals.empty?) - - solr_keys.each do |key| - solr_doc[key] ||= [] - solr_doc[key].push(*vals) - end - # asterisk to avoid multi-dimensional array: push values, not the array - end - - # convert multivalued fields to single value - %w[sw_pub_date_sort_ssi sw_pub_date_facet_ssi].each do |key| - solr_doc[key] = solr_doc[key].first unless solr_doc[key].nil? - end - solr_doc - end - # rubocop:enable Style/SymbolArray -end diff --git a/app/indexers/descriptive_metadata_datastream_indexer.rb b/app/indexers/descriptive_metadata_datastream_indexer.rb deleted file mode 100644 index e96820e08..000000000 --- a/app/indexers/descriptive_metadata_datastream_indexer.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class DescriptiveMetadataDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for descMetadata - def to_solr - resource.descMetadata.to_solr - end -end diff --git a/app/indexers/embargo_metadata_datastream_indexer.rb b/app/indexers/embargo_metadata_datastream_indexer.rb deleted file mode 100644 index 1b27811c2..000000000 --- a/app/indexers/embargo_metadata_datastream_indexer.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true - -class EmbargoMetadataDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for embargoMetadata - def to_solr - { - 'embargo_status_ssim' => embargo_status, - 'twenty_pct_status_ssim' => Array(twenty_pct_status) - }.tap do |solr_doc| - rd20 = twenty_pct_release_date - solr_doc['embargo_release_dtsim'] = Array(release_date.utc.strftime('%FT%TZ')) if release_date.present? - solr_doc['twenty_pct_release_embargo_release_dtsim'] = Array(rd20.utc.strftime('%FT%TZ')) if rd20.present? - end - end - - # rubocop:disable Lint/UselessAccessModifier - private - - # rubocop:enable Lint/UselessAccessModifier - - delegate :embargoMetadata, to: :resource - delegate :embargo_status, :twenty_pct_status, :twenty_pct_release_date, :release_date, to: :embargoMetadata -end diff --git a/app/indexers/identifiable_indexer.rb b/app/indexers/identifiable_indexer.rb deleted file mode 100644 index ed7d1410d..000000000 --- a/app/indexers/identifiable_indexer.rb +++ /dev/null @@ -1,123 +0,0 @@ -# frozen_string_literal: true - -# rubocop:disable Style/ClassVars -class IdentifiableIndexer - include SolrDocHelper - - INDEX_VERSION_FIELD = 'dor_services_version_ssi' - NS_HASH = { 'hydra' => 'http://projecthydra.org/ns/relations#', - 'fedora' => 'info:fedora/fedora-system:def/relations-external#', - 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' }.freeze - - FIELDS = { - collection: { - hydrus: 'hydrus_collection_title', - non_hydrus: 'nonhydrus_collection_title', - union: 'collection_title' - }, - apo: { - hydrus: 'hydrus_apo_title', - non_hydrus: 'nonhydrus_apo_title', - union: 'apo_title' - } - }.freeze - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - ## Module-level variables, shared between ALL mixin includers (and ALL *their* includers/extenders)! - ## used for caching found values - @@collection_hash = {} - @@apo_hash = {} - - # @return [Hash] the partial solr document for identifiable concerns - def to_solr - solr_doc = {} - solr_doc[INDEX_VERSION_FIELD] = Dor::VERSION - solr_doc['indexer_host_ssi'] = Socket.gethostname - solr_doc['indexed_at_dtsi'] = Time.now.utc.xmlschema - - add_solr_value(solr_doc, 'title_sort', resource.label, :string, [:stored_sortable]) - - rels_doc = Nokogiri::XML(resource.datastreams['RELS-EXT'].content) - apos = rels_doc.search('//rdf:RDF/rdf:Description/hydra:isGovernedBy', NS_HASH) - collections = rels_doc.search('//rdf:RDF/rdf:Description/fedora:isMemberOfCollection', NS_HASH) - solrize_related_obj_titles(solr_doc, apos, @@apo_hash, :apo) - solrize_related_obj_titles(solr_doc, collections, @@collection_hash, :collection) - solr_doc['public_dc_relation_tesim'] ||= solr_doc['collection_title_tesim'] if solr_doc['collection_title_tesim'] - solr_doc['metadata_source_ssi'] = identity_metadata_source - # This used to be added to the index by https://github.com/sul-dlss/dor-services/blob/8.x-stable/lib/dor/datastreams/identity_metadata_ds.rb#L14 - solr_doc['objectId_ssim'] = [resource.pid, resource.pid.split(':').last] - solr_doc - end - - # @return [String] calculated value for Solr index - def identity_metadata_source - if resource.identityMetadata.otherId('catkey').first || - resource.identityMetadata.otherId('barcode').first - 'Symphony' - else - 'DOR' - end - end - - # Clears out the cache of items. Used primarily in testing. - def self.reset_cache! - @@collection_hash = {} - @@apo_hash = {} - end - - private - - def related_object_tags(object) - return [] unless object - - Dor::Services::Client.object(object.pid).administrative_tags.list - end - - # @param [Hash] solr_doc - # @param [Array] relationships - # @param [Hash] title_hash a cache for titles - # @param [Symbol] type either :apo or :collection - def solrize_related_obj_titles(solr_doc, relationships, title_hash, type) - # TODO: if you wanted to get a little fancier, you could also solrize a 2 level hierarchy and display using hierarchial facets, like - # ["SOURCE", "SOURCE : TITLE"] (e.g. ["Hydrus", "Hydrus : Special Collections"], see (exploded) tags in IdentityMetadataDS#to_solr). - title_type = :symbol # we'll get an _ssim because of the type - title_attrs = [:stored_searchable] # we'll also get a _tesim from this attr - relationships.each do |rel_node| - rel_druid = rel_node['rdf:resource'] - next unless rel_druid # TODO: warning here would also be useful - - rel_druid = rel_druid.gsub('info:fedora/', '') - - # populate cache if necessary - unless title_hash.key?(rel_druid) - begin - related_obj = Dor.find(rel_druid) - related_obj_title = related_obj_display_title(related_obj, rel_druid) - is_from_hydrus = related_object_tags(related_obj).include?('Project : Hydrus') - title_hash[rel_druid] = { 'related_obj_title' => related_obj_title, 'is_from_hydrus' => is_from_hydrus } - rescue ActiveFedora::ObjectNotFoundError - # This may happen if the given APO or Collection does not exist (bad data) - title_hash[rel_druid] = { 'related_obj_title' => rel_druid, 'is_from_hydrus' => false } - end - end - - # cache should definitely be populated, so just use that to write solr field - if title_hash[rel_druid]['is_from_hydrus'] - add_solr_value(solr_doc, FIELDS.dig(type, :hydrus), title_hash[rel_druid]['related_obj_title'], title_type, title_attrs) - else - add_solr_value(solr_doc, FIELDS.dig(type, :non_hydrus), title_hash[rel_druid]['related_obj_title'], title_type, title_attrs) - end - add_solr_value(solr_doc, FIELDS.dig(type, :union), title_hash[rel_druid]['related_obj_title'], title_type, title_attrs) - end - end - - def related_obj_display_title(related_obj, default_title) - return default_title unless related_obj - - related_obj.full_title || default_title - end -end -# rubocop:enable Style/ClassVars diff --git a/app/indexers/identity_metadata_datastream_indexer.rb b/app/indexers/identity_metadata_datastream_indexer.rb deleted file mode 100644 index 76b85fc1d..000000000 --- a/app/indexers/identity_metadata_datastream_indexer.rb +++ /dev/null @@ -1,46 +0,0 @@ -# frozen_string_literal: true - -class IdentityMetadataDatastreamIndexer - include SolrDocHelper - - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for identityMetadata - def to_solr - solr_doc = {} - solr_doc['objectType_ssim'] = resource.identityMetadata.objectType - - plain_identifiers = [] - ns_identifiers = [] - if source_id.present? - (name, id) = source_id.split(/:/, 2) - plain_identifiers << id - ns_identifiers << source_id - solr_doc['source_id_ssim'] = [source_id] - end - - resource.identityMetadata.otherId.compact.each do |qid| - # this section will solrize barcode and catkey, which live in otherId - (name, id) = qid.split(/:/, 2) - plain_identifiers << id - ns_identifiers << qid - next unless %w[barcode catkey].include?(name) - - solr_doc["#{name}_id_ssim"] = [id] - end - solr_doc['dor_id_tesim'] = plain_identifiers - solr_doc['identifier_tesim'] = ns_identifiers - solr_doc['identifier_ssim'] = ns_identifiers - - solr_doc - end - - private - - def source_id - @source_id ||= resource.identityMetadata.sourceId - end -end diff --git a/app/indexers/object_profile_indexer.rb b/app/indexers/object_profile_indexer.rb deleted file mode 100644 index ae76e8c55..000000000 --- a/app/indexers/object_profile_indexer.rb +++ /dev/null @@ -1,18 +0,0 @@ -# frozen_string_literal: true - -class ObjectProfileIndexer - include SolrDocHelper - - attr_reader :resource - - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for releasable concerns - def to_solr - {}.tap do |solr_doc| - add_solr_value(solr_doc, 'obj_label', resource.label, :symbol, [:stored_searchable]) - end - end -end diff --git a/app/indexers/process_indexer.rb b/app/indexers/process_indexer.rb deleted file mode 100644 index 96eb00c9d..000000000 --- a/app/indexers/process_indexer.rb +++ /dev/null @@ -1,56 +0,0 @@ -# frozen_string_literal: true - -# Indexes the process for a workflow -class ProcessIndexer - ERROR_OMISSION = '... (continued)' - private_constant :ERROR_OMISSION - - # see https://lucene.apache.org/core/7_3_1/core/org/apache/lucene/util/BytesRefHash.MaxBytesLengthExceededException.html - MAX_ERROR_LENGTH = 32_768 - 2 - ERROR_OMISSION.length - private_constant :MAX_ERROR_LENGTH - - # @param [WorkflowSolrDocument] solr_doc - # @param [String] workflow_name - # @param [Dor::Workflow::Response::Process] process - def initialize(solr_doc:, workflow_name:, process:) - @solr_doc = solr_doc - @workflow_name = workflow_name - @process = process - end - - # @return [Hash] the partial solr document for the workflow document - def to_solr - return unless status - - # add a record of the robot having operated on this item, so we can track robot activity - solr_doc.add_process_time(workflow_name, name, Time.parse(process.datetime)) if time? - - index_error_message - - # workflow name, process status then process name - solr_doc.add_wsp("#{workflow_name}:#{status}", "#{workflow_name}:#{status}:#{name}") - - # workflow name, process name then process status - solr_doc.add_wps("#{workflow_name}:#{name}", "#{workflow_name}:#{name}:#{status}") - - # process status, workflowname then process name - solr_doc.add_swp(process.status.to_s, "#{status}:#{workflow_name}", "#{status}:#{workflow_name}:#{name}") - end - - private - - attr_reader :process, :workflow_name, :solr_doc - delegate :status, :name, :state, :error_message, :datetime, to: :process - - def time? - datetime && (status == 'completed' || status == 'error') - end - - # index the error message without the druid so we hopefully get some overlap - # truncate to avoid org.apache.lucene.util.BytesRefHash$MaxBytesLengthExceededException - def index_error_message - return unless error_message - - solr_doc.error = "#{workflow_name}:#{name}:#{error_message}".truncate(MAX_ERROR_LENGTH, omission: ERROR_OMISSION) - end -end diff --git a/app/indexers/processable_indexer.rb b/app/indexers/processable_indexer.rb deleted file mode 100644 index a56501c0b..000000000 --- a/app/indexers/processable_indexer.rb +++ /dev/null @@ -1,66 +0,0 @@ -# frozen_string_literal: true - -class ProcessableIndexer - include SolrDocHelper - - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for processable concerns - def to_solr - {}.tap do |solr_doc| - solr_doc['current_version_isi'] = current_version.to_i # Argo Facet field "Version" - - add_sortable_milestones(solr_doc) - solr_doc['modified_latest_dttsi'] = resource.modified_date.to_datetime.utc.strftime('%FT%TZ') - add_solr_value(solr_doc, 'rights', resource.rights, :string, [:symbol]) if resource.respond_to? :rights - add_status(solr_doc) - end - end - - private - - def status_service - @status_service ||= WorkflowClientFactory.build.status(druid: resource.pid, version: resource.current_version) - end - - def current_version - @current_version ||= begin - resource.current_version - rescue StandardError - '1' - end - end - - def add_status(solr_doc) - solr_doc['status_ssi'] = status_service.display - return unless status_service.info[:status_code] - - # This is used for Argo's "Processing Status" facet - add_solr_value(solr_doc, 'processing_status_text', status_service.display_simplified, :string, [:stored_sortable]) - end - - def sortable_milestones - status_service.milestones.each_with_object({}) do |milestone, sortable| - sortable[milestone[:milestone]] ||= [] - sortable[milestone[:milestone]] << milestone[:at].utc.xmlschema - end - end - - def add_sortable_milestones(solr_doc) - sortable_milestones.each do |milestone, unordered_dates| - dates = unordered_dates.sort - # create the published_dttsi and published_day fields and the like - dates.each do |date| - solr_doc["#{milestone}_dttsim"] ||= [] - solr_doc["#{milestone}_dttsim"] << date unless solr_doc["#{milestone}_dttsim"].include?(date) - end - # fields for OAI havester to sort on: _dttsi is trie date +stored +indexed (single valued, i.e. sortable) - # TODO: we really only need accessioned_earliest and registered_earliest - solr_doc["#{milestone}_earliest_dttsi"] = dates.first - solr_doc["#{milestone}_latest_dttsi"] = dates.last - end - end -end diff --git a/app/indexers/provenance_metadata_datastream_indexer.rb b/app/indexers/provenance_metadata_datastream_indexer.rb deleted file mode 100644 index 163cc8b45..000000000 --- a/app/indexers/provenance_metadata_datastream_indexer.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class ProvenanceMetadataDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for provenanceMetadata - def to_solr - resource.provenanceMetadata.to_solr - end -end diff --git a/app/indexers/releasable_indexer.rb b/app/indexers/releasable_indexer.rb deleted file mode 100644 index 4c797bce3..000000000 --- a/app/indexers/releasable_indexer.rb +++ /dev/null @@ -1,37 +0,0 @@ -# frozen_string_literal: true - -class ReleasableIndexer - include SolrDocHelper - - attr_reader :resource - - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for releasable concerns - def to_solr - solr_doc = {} - - # TODO: sort of worried about the performance impact in bulk reindex - # situations, since released_for recurses all parent collections. jmartin 2015-07-14 - released_for.each do |release_target, release_info| - add_solr_value(solr_doc, 'released_to', release_target, :symbol, []) if release_info['release'] - end - - # TODO: need to solrize whether item is released to purl? does released_for return that? - # logic is: "True when there is a published lifecycle and Access Rights is anything but Dark" - - solr_doc - end - - private - - def released_for - object_client.release_tags.list - end - - def object_client - Dor::Services::Client.object(resource.pid) - end -end diff --git a/app/indexers/rights_metadata_datastream_indexer.rb b/app/indexers/rights_metadata_datastream_indexer.rb deleted file mode 100644 index 9f41ab42c..000000000 --- a/app/indexers/rights_metadata_datastream_indexer.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class RightsMetadataDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for rightsMetadata - def to_solr - resource.rightsMetadata.to_solr - end -end diff --git a/app/indexers/role_metadata_datastream_indexer.rb b/app/indexers/role_metadata_datastream_indexer.rb deleted file mode 100644 index 85feac1ac..000000000 --- a/app/indexers/role_metadata_datastream_indexer.rb +++ /dev/null @@ -1,25 +0,0 @@ -# frozen_string_literal: true - -class RoleMetadataDatastreamIndexer - include SolrDocHelper - - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for roleMetadata - def to_solr - {}.tap do |solr_doc| - # rubocop:disable Style/SymbolArray - resource.roleMetadata.find_by_xpath('/roleMetadata/role/*').each do |actor| - role_type = actor.parent['type'] - val = [actor.at_xpath('identifier/@type'), actor.at_xpath('identifier/text()')].join ':' - add_solr_value(solr_doc, "apo_role_#{actor.name}_#{role_type}", val, :string, [:symbol]) - add_solr_value(solr_doc, "apo_role_#{role_type}", val, :string, [:symbol]) - add_solr_value(solr_doc, 'apo_register_permissions', val, :string, %i[symbol stored_searchable]) if %w[dor-apo-manager dor-apo-depositor].include? role_type - end - # rubocop:enable Style/SymbolArray - end - end -end diff --git a/app/indexers/solr_doc_helper.rb b/app/indexers/solr_doc_helper.rb deleted file mode 100644 index 826fd3723..000000000 --- a/app/indexers/solr_doc_helper.rb +++ /dev/null @@ -1,11 +0,0 @@ -# frozen_string_literal: true - -module SolrDocHelper - def add_solr_value(solr_doc, field_name, value, field_type = :default, index_types = [:searchable]) - case field_type - when :symbol - index_types << field_type - end - ::Solrizer.insert_field(solr_doc, field_name, value, *index_types) - end -end diff --git a/app/indexers/version_metadata_datastream_indexer.rb b/app/indexers/version_metadata_datastream_indexer.rb deleted file mode 100644 index 362449f33..000000000 --- a/app/indexers/version_metadata_datastream_indexer.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -class VersionMetadataDatastreamIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for versionMetadata - def to_solr - resource.versionMetadata.to_solr - end -end diff --git a/app/indexers/workflow_indexer.rb b/app/indexers/workflow_indexer.rb deleted file mode 100644 index 35551bd98..000000000 --- a/app/indexers/workflow_indexer.rb +++ /dev/null @@ -1,45 +0,0 @@ -# frozen_string_literal: true - -# Indexes the objects position in workflows -class WorkflowIndexer - # @param [Workflow::Response::Workflow] workflow the workflow document to index - def initialize(workflow:) - @workflow = workflow - end - - # @return [Hash] the partial solr document for the workflow document - def to_solr - WorkflowSolrDocument.new do |solr_doc| - solr_doc.name = workflow_name - - errors = 0 # The error count is used by the Report class in Argo - processes.each do |process| - ProcessIndexer.new(solr_doc: solr_doc, workflow_name: workflow_name, process: process).to_solr - errors += 1 if process.status == 'error' - end - solr_doc.status = [workflow_name, workflow_status, errors].join('|') - end - end - - private - - attr_reader :workflow - delegate :workflow_name, to: :workflow - - def definition_process_names - @definition_process_names ||= begin - definition = WorkflowClientFactory.build.workflow_template(workflow_name) - definition['processes'].map { |p| p['name'] } - end - end - - def processes - @processes ||= definition_process_names.map do |process_name| - workflow.process_for_recent_version(name: process_name) - end - end - - def workflow_status - workflow.complete? ? 'completed' : 'active' - end -end diff --git a/app/indexers/workflows_indexer.rb b/app/indexers/workflows_indexer.rb deleted file mode 100644 index d2ddfc2eb..000000000 --- a/app/indexers/workflows_indexer.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true - -# Indexes the objects position in workflows -class WorkflowsIndexer - attr_reader :resource - def initialize(resource:) - @resource = resource - end - - # @return [Hash] the partial solr document for workflow concerns - def to_solr - WorkflowSolrDocument.new do |combined_doc| - workflows.each do |wf| - doc = WorkflowIndexer.new(workflow: wf).to_solr - combined_doc.merge!(doc) - end - end.to_h - end - - private - - # @return [Array] - def workflows - all_workflows.workflows - end - - # TODO: remove Dor::Workflow::Document - # @return [Workflow::Response::Workflows] - def all_workflows - @all_workflows ||= WorkflowClientFactory.build.workflow_routes.all_workflows pid: resource.pid - end -end diff --git a/app/models/workflow_solr_document.rb b/app/models/workflow_solr_document.rb deleted file mode 100644 index 940904602..000000000 --- a/app/models/workflow_solr_document.rb +++ /dev/null @@ -1,91 +0,0 @@ -# frozen_string_literal: true - -# Represents that part of the solr document that holds workflow data -class WorkflowSolrDocument - WORKFLOW_SOLR = 'wf_ssim' - # field that indexes workflow name, process status then process name - WORKFLOW_WPS_SOLR = 'wf_wps_ssim' - # field that indexes workflow name, process name then process status - WORKFLOW_WSP_SOLR = 'wf_wsp_ssim' - # field that indexes process status, workflowname then process name - WORKFLOW_SWP_SOLR = 'wf_swp_ssim' - WORKFLOW_ERROR_SOLR = 'wf_error_ssim' - WORKFLOW_STATUS_SOLR = 'workflow_status_ssim' - - KEYS_TO_MERGE = [ - WORKFLOW_SOLR, - WORKFLOW_WPS_SOLR, - WORKFLOW_WSP_SOLR, - WORKFLOW_SWP_SOLR, - WORKFLOW_STATUS_SOLR, - WORKFLOW_ERROR_SOLR - ].freeze - - def initialize - @data = empty_document - yield self if block_given? - end - - def name=(wf_name) - data[WORKFLOW_SOLR] += [wf_name] - data[WORKFLOW_WPS_SOLR] += [wf_name] - data[WORKFLOW_WSP_SOLR] += [wf_name] - end - - def status=(status) - data[WORKFLOW_STATUS_SOLR] += [status] - end - - def error=(message) - data[WORKFLOW_ERROR_SOLR] += [message] - end - - # Add to the field that indexes workflow name, process status then process name - def add_wps(*messages) - data[WORKFLOW_WPS_SOLR] += messages - end - - # Add to the field that indexes workflow name, process name then process status - def add_wsp(*messages) - data[WORKFLOW_WSP_SOLR] += messages - end - - # Add to the field that indexes process status, workflow name then process name - def add_swp(*messages) - data[WORKFLOW_SWP_SOLR] += messages - end - - # Add the processes data_time attribute to the solr document - # @param [String] wf_name - # @param [String] process_name - # @param [Time] time - def add_process_time(wf_name, process_name, time) - data["wf_#{wf_name}_#{process_name}_dttsi"] = time.utc.iso8601 - end - - def to_h - KEYS_TO_MERGE.each { |k| data[k].uniq! } - data - end - - delegate :except, :[], to: :data - - # @param [WorkflowSolrDocument] doc - def merge!(doc) - # This is going to get the date fields, e.g. `wf_assemblyWF_jp2-create_dttsi' - @data.merge!(doc.except(*KEYS_TO_MERGE)) - - # Combine the non-unique fields together - KEYS_TO_MERGE.each do |k| - data[k] += doc[k] - end - end - - private - - attr_reader :data - - def empty_document - KEYS_TO_MERGE.each_with_object({}) { |k, obj| obj[k] = [] } - end -end diff --git a/app/services/indexer.rb b/app/services/indexer.rb deleted file mode 100644 index 8d95d965d..000000000 --- a/app/services/indexer.rb +++ /dev/null @@ -1,84 +0,0 @@ -# frozen_string_literal: true - -class Indexer - ADMIN_POLICY_INDEXER = CompositeIndexer.new( - AdministrativeTagIndexer, - DataIndexer, - RoleMetadataDatastreamIndexer, - AdministrativeMetadataDatastreamIndexer, - DefaultObjectRightsDatastreamIndexer, - ProvenanceMetadataDatastreamIndexer, - RightsMetadataDatastreamIndexer, - VersionMetadataDatastreamIndexer, - ObjectProfileIndexer, - IdentityMetadataDatastreamIndexer, - DescriptiveMetadataDatastreamIndexer, - DescribableIndexer, - IdentifiableIndexer, - ProcessableIndexer, - WorkflowsIndexer - ) - - COLLECTION_INDEXER = CompositeIndexer.new( - AdministrativeTagIndexer, - DataIndexer, - ProvenanceMetadataDatastreamIndexer, - RightsMetadataDatastreamIndexer, - VersionMetadataDatastreamIndexer, - ObjectProfileIndexer, - IdentityMetadataDatastreamIndexer, - DescriptiveMetadataDatastreamIndexer, - DescribableIndexer, - IdentifiableIndexer, - ProcessableIndexer, - ReleasableIndexer, - WorkflowsIndexer - ) - - ITEM_INDEXER = CompositeIndexer.new( - AdministrativeTagIndexer, - DataIndexer, - ProvenanceMetadataDatastreamIndexer, - RightsMetadataDatastreamIndexer, - VersionMetadataDatastreamIndexer, - ObjectProfileIndexer, - IdentityMetadataDatastreamIndexer, - DescriptiveMetadataDatastreamIndexer, - EmbargoMetadataDatastreamIndexer, - ContentMetadataDatastreamIndexer, - DescribableIndexer, - IdentifiableIndexer, - ProcessableIndexer, - ReleasableIndexer, - WorkflowsIndexer - ) - - SET_INDEXER = CompositeIndexer.new( - AdministrativeTagIndexer, - DataIndexer, - ProvenanceMetadataDatastreamIndexer, - RightsMetadataDatastreamIndexer, - VersionMetadataDatastreamIndexer, - ObjectProfileIndexer, - IdentityMetadataDatastreamIndexer, - DescriptiveMetadataDatastreamIndexer, - DescribableIndexer, - IdentifiableIndexer, - ProcessableIndexer, - WorkflowsIndexer - ) - - INDEXERS = { - Dor::Agreement => ITEM_INDEXER, # Agreement uses same indexer as Dor::Item - Dor::AdminPolicyObject => ADMIN_POLICY_INDEXER, - Hydrus::Collection => COLLECTION_INDEXER, - Hydrus::Item => ITEM_INDEXER, - Hydrus::AdminPolicyObject => ADMIN_POLICY_INDEXER, - Dor::Item => ITEM_INDEXER, - Dor::Set => SET_INDEXER - }.freeze - - def self.for(obj) - INDEXERS.fetch(obj.class).new(resource: obj) - end -end diff --git a/app/services/workflow_client_factory.rb b/app/services/workflow_client_factory.rb deleted file mode 100644 index f222001c4..000000000 --- a/app/services/workflow_client_factory.rb +++ /dev/null @@ -1,9 +0,0 @@ -# frozen_string_literal: true - -# This initializes the workflow client with values from settings -class WorkflowClientFactory - def self.build - logger = Logger.new(Settings.workflow.logfile, Settings.workflow.shift_age) - Dor::Workflow::Client.new(url: Settings.workflow.url, logger: logger, timeout: Settings.workflow.timeout) - end -end diff --git a/spec/controllers/hydrus_solr_controller_spec.rb b/spec/controllers/hydrus_solr_controller_spec.rb index 58268a6bf..948d0786a 100644 --- a/spec/controllers/hydrus_solr_controller_spec.rb +++ b/spec/controllers/hydrus_solr_controller_spec.rb @@ -6,14 +6,6 @@ let(:pid) { 'druid:bc123df4567' } describe 'reindex' do - before do - allow(Indexer).to receive(:for).with(mock_hydrus_obj).and_return(mock_indexer) - end - - let(:mock_hydrus_obj) { instance_double(Hydrus::Item, to_solr: { id: 'x' }, pid: pid) } - let(:mock_indexer) { instance_double(CompositeIndexer::Instance, to_solr: mock_solr_doc) } - let(:mock_solr_doc) { { id: pid } } - context 'when an object is not found in Fedora' do it 'responds with 404' do allow(ActiveFedora::Base).to receive(:find).and_return(nil) @@ -51,8 +43,8 @@ it 'indexes the object' do allow(ActiveFedora::Base).to receive(:find) - .and_return(mock_hydrus_obj) - expect(ActiveFedora.solr.conn).to receive(:add).with({ id: pid }, add_attributes: { commitWithin: 5000 }).and_return(true) + .and_return(instance_double(Hydrus::Item, to_solr: { id: 'x' }, pid: pid)) + expect(ActiveFedora.solr.conn).to receive(:add).with({ id: 'x' }, add_attributes: { commitWithin: 5000 }).and_return(true) get :reindex, params: { id: 'druid:bc123df4567' } expect(response.status).to eq(200) end @@ -69,8 +61,8 @@ it 'indexes the object' do allow(ActiveFedora::Base).to receive(:find) - .and_return(mock_hydrus_obj) - expect(ActiveFedora.solr.conn).to receive(:add).with({ id: pid }, add_attributes: { commitWithin: 5000 }).and_return(true) + .and_return(instance_double(Hydrus::Item, to_solr: { id: 'x' }, pid: pid)) + expect(ActiveFedora.solr.conn).to receive(:add).with({ id: 'x' }, add_attributes: { commitWithin: 5000 }).and_return(true) get :reindex, params: { id: 'druid:bc123df4567' } expect(response.status).to eq(200) end diff --git a/spec/features/hydrus_solr_controller_spec.rb b/spec/features/hydrus_solr_controller_spec.rb index b2c7bd2f8..b05cf3ed2 100644 --- a/spec/features/hydrus_solr_controller_spec.rb +++ b/spec/features/hydrus_solr_controller_spec.rb @@ -14,17 +14,11 @@ let(:fake_tags_client) { instance_double(Dor::Services::Client::AdministrativeTags, list: tags) } let(:tags) { ['Project : Hydrus'] } - let(:mock_hydrus_obj) { instance_double(Hydrus::Item, to_solr: { id: 'x' }, pid: druid) } - let(:mock_indexer) { instance_double(CompositeIndexer::Instance, to_solr: mock_solr_doc) } - let(:mock_solr_doc) { { id: druid } } - before do allow(Dor::Services::Client).to receive(:object).with(druid).and_return(fake_object_client) - allow(Indexer).to receive(:for).with(mock_hydrus_obj).and_return(mock_indexer) end it 'indexes an item into solr' do - allow(ActiveFedora::Base).to receive(:find).and_return(mock_hydrus_obj) expect(ActiveFedora.solr.conn).to receive(:add).with(hash_including(id: druid), anything) visit "/hydrus_solr/reindex/#{druid}" end diff --git a/spec/indexers/administrative_tag_indexer_spec.rb b/spec/indexers/administrative_tag_indexer_spec.rb deleted file mode 100644 index 5faf23d35..000000000 --- a/spec/indexers/administrative_tag_indexer_spec.rb +++ /dev/null @@ -1,59 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe AdministrativeTagIndexer do - describe '#to_solr' do - subject(:document) { indexer.to_solr } - - let(:indexer) { described_class.new(resource: object) } - let(:object) { Dor::Abstract.new(pid: 'druid:rt923jk234') } - let(:tags) do - [ - 'Google Books : Phase 1', - 'Google Books : Scan source STANFORD', - 'Project : Beautiful Books', - 'Registered By : blalbrit', - 'DPG : Beautiful Books : Octavo : newpri', - 'Remediated By : 4.15.4' - ] - end - - before do - # Don't actually hit the dor-services-app API endpoint - allow(indexer).to receive(:administrative_tags).and_return(tags) - end - - it 'indexes all administrative tags' do - expect(document).to include('tag_ssim' => tags) - end - - it 'indexes exploded tags' do - expect(document['exploded_tag_ssim']).to match_array( - [ - 'Google Books', - 'Google Books : Phase 1', - 'Google Books', - 'Google Books : Scan source STANFORD', - 'Project', - 'Project : Beautiful Books', - 'Registered By', - 'Registered By : blalbrit', - 'DPG', - 'DPG : Beautiful Books', - 'DPG : Beautiful Books : Octavo', - 'DPG : Beautiful Books : Octavo : newpri', - 'Remediated By', - 'Remediated By : 4.15.4' - ] - ) - end - - it 'indexes prefixed tags' do - expect(document).to include( - 'project_tag_ssim' => ['Beautiful Books'], - 'registered_by_tag_ssim' => ['blalbrit'] - ) - end - end -end diff --git a/spec/indexers/composite_indexer_spec.rb b/spec/indexers/composite_indexer_spec.rb deleted file mode 100644 index b9b65a75b..000000000 --- a/spec/indexers/composite_indexer_spec.rb +++ /dev/null @@ -1,82 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe CompositeIndexer do - let(:model) { Dor::Abstract } - let(:mods) do - double('mods', sw_title_display: 'foo', sw_genre: ['test genre'], - main_author_w_date: '1999', - sw_sort_author: 'baz', - sw_language_facet: 'en', - format_main: 'foofmt', - topic_facet: 'topicbar', - era_facet: ['17th century', '18th century'], - geographic_facet: %w[Europe Europe], - term_values: 'huh?', - pub_year_sort_str: '1600', - pub_year_int: 1600, - pub_year_display_str: '1600') - end - let(:obj) do - instance_double(Dor::Item, - pid: 'druid:mx123ms3333', - stanford_mods: mods, - datastreams: datastreams, - label: 'obj label', - identityMetadata: identity_metadata, - versionMetadata: version_metadata, - current_version: '7', - modified_date: '1999-12-30') - end - let(:datastreams) do - { 'RELS-EXT' => double('datastream', datastream_spec_string: 'huh', new?: false, content: '') } - end - let(:identity_metadata) do - instance_double(Dor::IdentityMetadataDS, otherId: 'foo') - end - let(:version_metadata) do - instance_double(Dor::VersionMetadataDS, tag_for_version: 'tag7', description_for_version: 'desc7', current_version_id: '7') - end - - let(:indexer) do - described_class.new( - DescribableIndexer, - IdentifiableIndexer, - ProcessableIndexer - ) - end - - describe 'to_solr' do - let(:status) do - instance_double(Dor::Workflow::Client::Status, milestones: {}, info: {}, display: 'bad') - end - let(:workflow_client) { instance_double(Dor::Workflow::Client, status: status) } - let(:doc) { indexer.new(resource: obj).to_solr } - - before do - allow(Dor::Workflow::Client).to receive(:new).and_return(workflow_client) - end - - it 'searchworks date-fu: temporal periods and pub_dates' do - expect(doc).to match a_hash_including( - 'sw_subject_temporal_ssim' => a_collection_containing_exactly('18th century', '17th century'), - 'sw_pub_date_sort_ssi' => '1600', - 'sw_pub_date_facet_ssi' => '1600' - ) - end - - it 'subject geographic fields' do - expect(doc).to match a_hash_including( - 'sw_subject_geographic_ssim' => %w[Europe Europe] - ) - end - - it 'genre fields' do - genre_list = obj.stanford_mods.sw_genre - expect(doc).to match a_hash_including( - 'sw_genre_ssim' => genre_list - ) - end - end -end diff --git a/spec/indexers/content_metadata_datastream_indexer_spec.rb b/spec/indexers/content_metadata_datastream_indexer_spec.rb deleted file mode 100644 index 13dcba9d9..000000000 --- a/spec/indexers/content_metadata_datastream_indexer_spec.rb +++ /dev/null @@ -1,59 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe ContentMetadataDatastreamIndexer do - let(:xml) do - <<~XML - - - - - - 3d3ff46d98f3d517d0bf086571e05c18 - ca1eb0edd09a21f9dd9e3a89abc790daf4d04916 - - - - 406d5d80fdd9ecc0352d339badb4a8fb - 61940d4fad097cba98a3e9dd9f12a90dde0be1ac - - - - 81ccd17bccf349581b779615e82a0366 - 12586b624540031bfa3d153299160c4885c3508c - - - - XML - end - - let(:obj) { Dor::Item.new } - - let(:indexer) do - described_class.new(resource: obj) - end - - before do - obj.contentMetadata.content = xml - end - - describe '#to_solr' do - subject(:doc) { indexer.to_solr } - - it 'has the fields used by argo' do - expect(doc).to include( - 'content_type_ssim' => 'map', - 'content_file_mimetypes_ssim' => ['image/jp2', 'image/gif', 'image/tiff'], - 'content_file_roles_ssim' => ['derivative'], - 'shelved_content_file_count_itsi' => 1, - 'resource_count_itsi' => 1, - 'content_file_count_itsi' => 3, - 'image_resource_count_itsi' => 1, - 'first_shelved_image_ss' => 'gw177fc7976_05_0001.jp2', - 'preserved_size_dbtsi' => 86_774_303, - 'shelved_size_dbtsi' => 5_143_883 - ) - end - end -end diff --git a/spec/indexers/data_indexer_spec.rb b/spec/indexers/data_indexer_spec.rb deleted file mode 100644 index c97bb31d7..000000000 --- a/spec/indexers/data_indexer_spec.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe DataIndexer do - let(:obj) do - Dor::AdminPolicyObject.new(pid: 'druid:999') - end - - let(:indexer) do - described_class.new(resource: obj) - end - - describe '#to_solr' do - let(:indexer) do - CompositeIndexer.new( - described_class - ).new(resource: obj) - end - let(:doc) { indexer.to_solr } - - it 'makes a solr doc' do - expect(doc).to match a_hash_including(id: 'druid:999') - end - end -end diff --git a/spec/indexers/default_object_rights_datastream_indexer_spec.rb b/spec/indexers/default_object_rights_datastream_indexer_spec.rb deleted file mode 100644 index 65aa2c51d..000000000 --- a/spec/indexers/default_object_rights_datastream_indexer_spec.rb +++ /dev/null @@ -1,33 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe DefaultObjectRightsDatastreamIndexer do - let(:obj) do - Dor::AdminPolicyObject.new - end - - let(:indexer) do - described_class.new(resource: obj) - end - - describe '#to_solr' do - let(:indexer) do - CompositeIndexer.new( - described_class - ).new(resource: obj) - end - let(:doc) { indexer.to_solr } - - before do - obj.use_statement = 'Rights are owned by Stanford University Libraries.' - obj.copyright_statement = 'Additional copyright info' - end - - it 'makes a solr doc' do - expect(doc).to match a_hash_including('use_statement_ssim' => - ['Rights are owned by Stanford University Libraries.']) - expect(doc).to match a_hash_including('copyright_ssim' => ['Additional copyright info']) - end - end -end diff --git a/spec/indexers/describable_indexer_spec.rb b/spec/indexers/describable_indexer_spec.rb deleted file mode 100644 index 9736ba2f5..000000000 --- a/spec/indexers/describable_indexer_spec.rb +++ /dev/null @@ -1,122 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe DescribableIndexer do - let(:xml) do - <<~XML - - - - The - complete works of Henry George - - - George, Henry - - 1839-1897 - - creator - - - - George, Henry - - 1862-1916 - - text - - - xx - - - - Garden City, N. Y - - Doubleday, Page - 1911 - 1911 - [Library ed.] - - monographic - - - eng - - - - print - - 10 v. fronts (v. 1-9) ports. 21 cm. - - - YNG - 731210 - 19900625062034.0 - 68184 - - 757655 - - - - electronic - preservation - reformatted digital - - - I. Progress and poverty.--II. Social problems.--III. The land question. Property in land. The condition of labor.--IV. Protection or free trade.--V. A perplexed philosopher [Herbert Spencer]--VI. The science of political economy, books I and II.--VII. The science of political economy, books III to V. "Moses": a lecture.--VIII. Our land and land policy.--IX-X. The life of Henry George, by his son Henry George, jr. - On cover: Complete works of Henry George. Fels fund. Library edition. - - Economics - 1800-1900 - - - - DOR_MARC2MODS3-3.xsl Revision 1.1 - 2011-02-25T18:20:23.132-08:00 - 36105010700545 - - druid:pz263ny9658 - - Stanford University Libraries - - http://purl.stanford.edu/pz263ny9658 - - - XML - end - let(:obj) { Dor::Abstract.new } - - let(:indexer) do - described_class.new(resource: obj) - end - - describe '#to_solr' do - let(:doc) { indexer.to_solr } - - before do - obj.datastreams['descMetadata'].content = xml - end - - it 'includes values from stanford_mods' do - expect(doc).to match a_hash_including( - 'sw_language_ssim' => ['English'], - 'sw_format_ssim' => ['Book'], - 'sw_subject_temporal_ssim' => ['1800-1900'], - 'sw_pub_date_sort_ssi' => '1911', - 'sw_pub_date_facet_ssi' => '1911' - ) - end - - it 'does not include empty values' do - doc.keys.sort_by(&:to_s).each do |k| - expect(doc).to include(k) - expect(doc).to match hash_excluding(k => nil) - expect(doc).to match hash_excluding(k => []) - end - end - end -end diff --git a/spec/indexers/embargo_metadata_datastream_indexer_spec.rb b/spec/indexers/embargo_metadata_datastream_indexer_spec.rb deleted file mode 100644 index 5c7bd7d28..000000000 --- a/spec/indexers/embargo_metadata_datastream_indexer_spec.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe EmbargoMetadataDatastreamIndexer do - let(:xml) do - <<~XML - - - embargoed - 2011-10-12T15:47:52-07:00 - released - 2016-10-12T15:47:52-07:00 - - - - - - - - - - - - - - XML - end - - let(:obj) { Hydrus::Item.new } - - let(:indexer) do - described_class.new(resource: obj) - end - - before do - obj.embargoMetadata.content = xml - end - - describe '#to_solr' do - subject(:doc) { indexer.to_solr } - - it 'has the fields used by argo' do - expect(doc).to eq('embargo_release_dtsim' => ['2011-10-12T22:47:52Z'], - 'embargo_status_ssim' => ['embargoed'], - 'twenty_pct_status_ssim' => ['released'], - 'twenty_pct_release_embargo_release_dtsim' => ['2016-10-12T22:47:52Z']) - end - end -end diff --git a/spec/indexers/identifiable_indexer_spec.rb b/spec/indexers/identifiable_indexer_spec.rb deleted file mode 100644 index dc77bd96f..000000000 --- a/spec/indexers/identifiable_indexer_spec.rb +++ /dev/null @@ -1,169 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe IdentifiableIndexer do - let(:xml) do - <<~XML - - druid:rt923jk342 - item - google download barcode 36105049267078 - DOR - Squirrels of North America - Eder, Tamara, 1974- - STANFORD_342837261527 - 36105049267078 - 129483625 - 7f3da130-7b02-11de-8a39-0800200c9a66 - Google Books : Phase 1 - Google Books : Scan source STANFORD - Project : Beautiful Books - Registered By : blalbrit - DPG : Beautiful Books : Octavo : newpri - Remediated By : 4.15.4 - true - true - - XML - end - - let(:obj) { Dor::Abstract.new(pid: 'druid:rt923jk342') } - - let(:indexer) do - described_class.new(resource: obj) - end - - before do - obj.identityMetadata.content = xml - described_class.reset_cache! - end - - describe '#identity_metadata_source' do - it 'depends on remove_other_Id' do - obj.identityMetadata.remove_other_Id('catkey', '129483625') - obj.identityMetadata.remove_other_Id('barcode', '36105049267078') - obj.identityMetadata.add_other_Id('catkey', '129483625') - expect(indexer.identity_metadata_source).to eq 'Symphony' - obj.identityMetadata.remove_other_Id('catkey', '129483625') - obj.identityMetadata.add_other_Id('barcode', '36105049267078') - expect(indexer.identity_metadata_source).to eq 'Symphony' - obj.identityMetadata.remove_other_Id('barcode', '36105049267078') - expect(indexer.identity_metadata_source).to eq 'DOR' - obj.identityMetadata.remove_other_Id('foo', 'bar') - expect(indexer.identity_metadata_source).to eq 'DOR' - end - - it 'indexes metadata source' do - expect(indexer.identity_metadata_source).to eq 'Symphony' - end - end - - describe '#to_solr' do - let(:doc) { indexer.to_solr } - - context 'with related objects' do - let(:mock_rel_druid) { 'druid:does_not_exist' } - let(:mock_rels_ext_xml) do - %( - - - - - - ) - end - - before do - allow(obj.datastreams['RELS-EXT']).to receive(:content).and_return(mock_rels_ext_xml) - end - - context 'when related collection and APOs are not found' do - before do - allow(Dor).to receive(:find).with(mock_rel_druid).and_raise(ActiveFedora::ObjectNotFoundError) - end - - it 'generate collections and apo title fields' do - expect(doc[Solrizer.solr_name('collection_title', :symbol)].first).to eq mock_rel_druid - expect(doc[Solrizer.solr_name('collection_title', :stored_searchable)].first).to eq mock_rel_druid - expect(doc[Solrizer.solr_name('apo_title', :symbol)].first).to eq mock_rel_druid - expect(doc[Solrizer.solr_name('apo_title', :stored_searchable)].first).to eq mock_rel_druid - expect(doc[Solrizer.solr_name('nonhydrus_apo_title', :symbol)].first).to eq mock_rel_druid - expect(doc[Solrizer.solr_name('nonhydrus_apo_title', :stored_searchable)].first).to eq mock_rel_druid - end - end - - context 'when related collection and APOs are found' do - let(:mock_obj) { instance_double(Dor::Item, full_title: 'Test object') } - - before do - allow(Dor).to receive(:find).with(mock_rel_druid).and_return(mock_obj) - allow(indexer).to receive(:related_object_tags).and_return([]) - end - - it 'generate collections and apo title fields' do - expect(doc[Solrizer.solr_name('collection_title', :symbol)].first).to eq 'Test object' - expect(doc[Solrizer.solr_name('collection_title', :stored_searchable)].first).to eq 'Test object' - expect(doc[Solrizer.solr_name('apo_title', :symbol)].first).to eq 'Test object' - expect(doc[Solrizer.solr_name('apo_title', :stored_searchable)].first).to eq 'Test object' - expect(doc[Solrizer.solr_name('nonhydrus_apo_title', :symbol)].first).to eq 'Test object' - expect(doc[Solrizer.solr_name('nonhydrus_apo_title', :stored_searchable)].first).to eq 'Test object' - end - end - end - - it 'indexes metadata source' do - expect(doc).to match a_hash_including('metadata_source_ssi' => 'Symphony') - end - end - - describe '#related_object_tags' do - context 'with a nil' do - let(:object) { nil } - - it 'returns an empty array' do - expect(indexer.send(:related_object_tags, object)).to eq([]) - end - end - - context 'with an object that responds to #pid' do - before do - allow(Dor::Services::Client).to receive(:object).with(object.pid).and_return(fake_object_client) - end - - let(:fake_object_client) { instance_double(Dor::Services::Client::Object, administrative_tags: fake_tags_client) } - let(:fake_tags_client) { instance_double(Dor::Services::Client::AdministrativeTags, list: nil) } - let(:object) { obj } - - it 'makes a dor-services-client call' do - indexer.send(:related_object_tags, object) - expect(fake_tags_client).to have_received(:list).once - end - end - end - - describe '#related_obj_display_title' do - subject { indexer.send(:related_obj_display_title, mock_apo_obj, mock_default_title) } - - let(:mock_default_title) { 'druid:zy098xw7654' } - - context 'when the main title is available' do - let(:mock_apo_obj) { instance_double(Dor::AdminPolicyObject, full_title: 'apo title') } - - it { is_expected.to eq 'apo title' } - end - - context 'when the first descMetadata main title entry is empty string' do - let(:mock_apo_obj) { instance_double(Dor::AdminPolicyObject, full_title: nil) } - - it { is_expected.to eq mock_default_title } - end - - context 'when the related object is nil' do - let(:mock_apo_obj) { nil } - - it { is_expected.to eq mock_default_title } - end - end -end diff --git a/spec/indexers/identity_metadata_datastream_indexer_spec.rb b/spec/indexers/identity_metadata_datastream_indexer_spec.rb deleted file mode 100644 index 69d1647e2..000000000 --- a/spec/indexers/identity_metadata_datastream_indexer_spec.rb +++ /dev/null @@ -1,59 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe IdentityMetadataDatastreamIndexer do - let(:xml) do - <<~XML - - druid:rt923jk342 - item - google download barcode 36105049267078 - DOR - Squirrels of North America - Eder, Tamara, 1974- - STANFORD_342837261527 - 36105049267078 - 129483625 - 7f3da130-7b02-11de-8a39-0800200c9a66 - Google Books : Phase 1 - Google Books : Scan source STANFORD - Project : Beautiful Books - Registered By : blalbrit - DPG : Beautiful Books : Octavo : newpri - Remediated By : 4.15.4 - true - true - - XML - end - - let(:obj) { Dor::Item.new(pid: 'druid:rt923jk342') } - - let(:indexer) do - described_class.new(resource: obj) - end - - before do - obj.identityMetadata.content = xml - end - - describe '#to_solr' do - subject(:doc) { indexer.to_solr } - - it 'has the fields used by argo' do - expect(doc).to include( - 'barcode_id_ssim' => ['36105049267078'], - 'catkey_id_ssim' => ['129483625'], - 'dor_id_tesim' => %w[STANFORD_342837261527 36105049267078 129483625 - 7f3da130-7b02-11de-8a39-0800200c9a66], - 'identifier_ssim' => ['google:STANFORD_342837261527', 'barcode:36105049267078', - 'catkey:129483625', 'uuid:7f3da130-7b02-11de-8a39-0800200c9a66'], - 'identifier_tesim' => ['google:STANFORD_342837261527', 'barcode:36105049267078', - 'catkey:129483625', 'uuid:7f3da130-7b02-11de-8a39-0800200c9a66'], - 'objectType_ssim' => ['item'], - 'source_id_ssim' => ['google:STANFORD_342837261527'] - ) - end - end -end diff --git a/spec/indexers/object_profile_indexer_spec.rb b/spec/indexers/object_profile_indexer_spec.rb deleted file mode 100644 index 13bb31eb7..000000000 --- a/spec/indexers/object_profile_indexer_spec.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe ObjectProfileIndexer do - let(:obj) do - Dor::Item.new(label: 'test label') - end - - let(:indexer) do - described_class.new(resource: obj) - end - - describe '#to_solr' do - let(:indexer) do - CompositeIndexer.new( - described_class - ).new(resource: obj) - end - let(:doc) { indexer.to_solr } - - it 'makes a solr doc' do - expect(doc).to match a_hash_including( - 'obj_label_tesim' => ['test label'], - 'obj_label_ssim' => ['test label'] - ) - end - end -end diff --git a/spec/indexers/processable_indexer_spec.rb b/spec/indexers/processable_indexer_spec.rb deleted file mode 100644 index 00feb7e90..000000000 --- a/spec/indexers/processable_indexer_spec.rb +++ /dev/null @@ -1,144 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe ProcessableIndexer do - let(:indexer) { described_class.new(resource: obj) } - - describe '#to_solr' do - let(:obj) do - instance_double(Dor::Item, - current_version: '4', - pid: '99', - modified_date: '1999-12-20') - end - - let(:solr_doc) { indexer.to_solr } - - context 'with rights set' do - let(:obj) do - instance_double(Dor::Item, - pid: '99', - rights: 'World', - modified_date: '1999-12-20', - current_version: '7') - end - - describe '#to_solr' do - let(:indexer) do - CompositeIndexer.new( - described_class - ).new(resource: obj) - end - - let(:status) do - instance_double(Dor::Workflow::Client::Status, - milestones: {}, - info: { status_code: 0 }, - display: 'v1 blah (parenthetical)', - display_simplified: 'blah') - end - - let(:workflow_client) { instance_double(Dor::Workflow::Client, status: status) } - - before do - allow(Dor::Workflow::Client).to receive(:new).and_return(workflow_client) - end - - it 'includes a rights facet' do - expect(solr_doc).to match a_hash_including('rights_ssim' => ['World']) - end - - it 'does not error if there is nothing in the datastream' do - allow(obj).to receive(:rightsMetadata).and_return(Dor::RightsMetadataDS.new) - expect { solr_doc }.not_to raise_error - end - end - end - - context 'with milestones' do - let(:dsxml) do - ' - - - Initial version - - - Replacing main PDF - - - Fixed title typo - - - Another typo - - - ' - end - - let(:milestones) do - [ - { milestone: 'published', at: Time.zone.parse('2012-01-26 21:06:54 -0800'), version: '2' }, - { milestone: 'opened', at: Time.zone.parse('2012-10-29 16:30:07 -0700'), version: '2' }, - { milestone: 'submitted', at: Time.zone.parse('2012-11-06 16:18:24 -0800'), version: '2' }, - { milestone: 'published', at: Time.zone.parse('2012-11-06 16:19:07 -0800'), version: '2' }, - { milestone: 'accessioned', at: Time.zone.parse('2012-11-06 16:19:10 -0800'), version: '2' }, - { milestone: 'described', at: Time.zone.parse('2012-11-06 16:19:15 -0800'), version: '2' }, - { milestone: 'opened', at: Time.zone.parse('2012-11-06 16:21:02 -0800'), version: nil }, - { milestone: 'submitted', at: Time.zone.parse('2012-11-06 16:30:03 -0800'), version: nil }, - { milestone: 'described', at: Time.zone.parse('2012-11-06 16:35:00 -0800'), version: nil }, - { milestone: 'published', at: Time.zone.parse('2012-11-06 16:59:39 -0800'), version: '3' }, - { milestone: 'published', at: Time.zone.parse('2012-11-06 16:59:39 -0800'), version: nil } - ] - end - let(:version_metadata) { Dor::VersionMetadataDS.from_xml(dsxml) } - - let(:status) do - instance_double(Dor::Workflow::Client::Status, - milestones: milestones, - info: { status_code: 4 }, - display: 'v4 In accessioning (described, published)', - display_simplified: 'In accessioning') - end - - let(:workflow_client) { instance_double(Dor::Workflow::Client, status: status) } - - before do - allow(Dor::Workflow::Client).to receive(:new).and_return(workflow_client) - allow(obj).to receive(:versionMetadata).and_return(version_metadata) - end - - it 'includes the semicolon delimited version, an earliest published date and a status' do - # published date should be the first published date - expect(solr_doc['status_ssi']).to eq 'v4 In accessioning (described, published)' - expect(solr_doc['processing_status_text_ssi']).to eq 'In accessioning' - expect(solr_doc).to match a_hash_including('opened_dttsim' => including('2012-11-07T00:21:02Z')) - expect(solr_doc['published_earliest_dttsi']).to eq('2012-01-27T05:06:54Z') - expect(solr_doc['published_latest_dttsi']).to eq('2012-11-07T00:59:39Z') - expect(solr_doc['published_dttsim'].first).to eq(solr_doc['published_earliest_dttsi']) - expect(solr_doc['published_dttsim'].last).to eq(solr_doc['published_latest_dttsi']) - expect(solr_doc['published_dttsim'].size).to eq(3) # not 4 because 1 deduplicated value removed! - expect(solr_doc['opened_earliest_dttsi']).to eq('2012-10-29T23:30:07Z') # 2012-10-29T16:30:07-0700 - expect(solr_doc['opened_latest_dttsi']).to eq('2012-11-07T00:21:02Z') # 2012-11-06T16:21:02-0800 - end - - context 'when a new version has not been opened' do - let(:milestones) do - [{ milestone: 'submitted', at: Time.zone.parse('2012-11-06 16:30:03 -0800'), version: nil }, - { milestone: 'described', at: Time.zone.parse('2012-11-06 16:35:00 -0800'), version: nil }, - { milestone: 'published', at: Time.zone.parse('2012-11-06 16:59:39 -0800'), version: '3' }, - { milestone: 'published', at: Time.zone.parse('2012-11-06 16:59:39 -0800'), version: nil }] - end - - it 'skips the versioning related steps if a new version has not been opened' do - expect(solr_doc['opened_dttsim']).to be_nil - end - end - - it 'creates a modified_latest date field' do - # the facet field should have a date in it. - expect(solr_doc['modified_latest_dttsi']).to match(/^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ$/) - end - end - end -end diff --git a/spec/indexers/releasable_indexer_spec.rb b/spec/indexers/releasable_indexer_spec.rb deleted file mode 100644 index 3d3681214..000000000 --- a/spec/indexers/releasable_indexer_spec.rb +++ /dev/null @@ -1,29 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe ReleasableIndexer do - let(:obj) { instance_double(Dor::Abstract, pid: 'druid:pz263ny9658') } - - describe 'to_solr' do - let(:doc) { described_class.new(resource: obj).to_solr } - let(:released_for_info) do - { - 'Project' => { 'release' => true }, - 'test_target' => { 'release' => true }, - 'test_nontarget' => { 'release' => false } - } - end - let(:released_to_field_name) { Solrizer.solr_name('released_to', :symbol) } - let(:object_client) { instance_double(Dor::Services::Client::Object, release_tags: tags_client) } - let(:tags_client) { instance_double(Dor::Services::Client::ReleaseTags, list: released_for_info) } - - before do - allow(Dor::Services::Client).to receive(:object).and_return(object_client) - end - - it 'indexes release tags' do - expect(doc).to eq(released_to_field_name => %w[Project test_target]) - end - end -end diff --git a/spec/indexers/rights_metadata_datastream_indexer_spec.rb b/spec/indexers/rights_metadata_datastream_indexer_spec.rb deleted file mode 100644 index a514ed8a2..000000000 --- a/spec/indexers/rights_metadata_datastream_indexer_spec.rb +++ /dev/null @@ -1,123 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe RightsMetadataDatastreamIndexer do - let(:xml) do - <<~XML - - - - - - - - - - - - - - Official WTO documents are free for public use. - - by-nc-nd - - - Copyright © World Trade Organization - - - XML - end - - let(:obj) { Dor::Item.new(pid: 'druid:rt923jk342') } - let(:rights_md_ds) { obj.rightsMetadata } - - let(:indexer) do - described_class.new(resource: obj) - end - - before do - rights_md_ds.content = xml - end - - describe '#to_solr' do - subject(:doc) { indexer.to_solr } - - it 'has the fields used by argo' do - expect(doc).to include( - 'copyright_ssim' => ['Copyright © World Trade Organization'], - 'use_statement_ssim' => ['Official WTO documents are free for public use.'], - 'use_license_machine_ssi' => 'by-nc-nd', - 'rights_descriptions_ssim' => ['world'] - ) - end - - describe 'legacy tests to_solr' do - let(:mock_dra_obj) { instance_double(Dor::RightsAuth, index_elements: index_elements) } - - before do - allow(rights_md_ds).to receive(:dra_object).and_return(mock_dra_obj) - end - - context 'when access is restricted' do - let(:index_elements) do - { - primary: 'access_restricted', - errors: [], - terms: [], - obj_locations_qualified: [{ location: 'someplace', rule: 'somerule' }], - file_groups_qualified: [{ group: 'somegroup', rule: 'someotherrule' }] - } - end - - it 'filters access_restricted from what gets aggregated into rights_descriptions_ssim' do - expect(doc).to match a_hash_including( - 'rights_primary_ssi' => 'access_restricted', - 'rights_descriptions_ssim' => ['location: someplace (somerule)', 'somegroup (file) (someotherrule)'] - ) - end - end - - context 'when it is world qualified' do - let(:index_elements) do - { - primary: 'world_qualified', - errors: [], - terms: [], - obj_world_qualified: [{ rule: 'somerule' }] - } - end - - it 'filters world_qualified from what gets aggregated into rights_descriptions_ssim' do - expect(doc).to match a_hash_including( - 'rights_primary_ssi' => 'world_qualified', - 'rights_descriptions_ssim' => ['world (somerule)'] - ) - end - end - - context 'with file_rights' do - let(:index_elements) do - { - primary: 'access_restricted', - errors: [], - terms: [], - obj_locations: ['location'], - file_locations: ['file_specific_location'], - obj_agents: ['agent'], - file_agents: ['file_specific_agent'] - } - end - - it 'includes the simple fields that are present' do - expect(doc).to match a_hash_including( - 'obj_rights_locations_ssim' => ['location'], - 'file_rights_locations_ssim' => ['file_specific_location'], - 'obj_rights_agents_ssim' => ['agent'], - 'file_rights_agents_ssim' => ['file_specific_agent'] - ) - end - end - end - end -end diff --git a/spec/indexers/role_metadata_datastream_indexer_spec.rb b/spec/indexers/role_metadata_datastream_indexer_spec.rb deleted file mode 100644 index dfab5148f..000000000 --- a/spec/indexers/role_metadata_datastream_indexer_spec.rb +++ /dev/null @@ -1,58 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe RoleMetadataDatastreamIndexer do - let(:obj) { Dor::AdminPolicyObject.new } - - let(:indexer) do - described_class.new(resource: obj) - end - - before do - obj.roleMetadata.content = xml - end - - describe '#to_solr' do - subject(:doc) { indexer.to_solr } - - context 'when there are non-Hydrus roles' do - let(:xml) do - <<~XML - - - - - dlss:dor-admin - - - - XML - end - - it 'has the fields used by argo' do - expect(doc['apo_register_permissions_ssim']).to eq ['workgroup:dlss:dor-admin'] - expect(doc['apo_register_permissions_tesim']).to eq ['workgroup:dlss:dor-admin'] - end - end - - context 'when there are hydrus roles' do - let(:xml) do - <<~XML - - - - dlss:dor-admin - - - - XML - end - - it 'does not index apo_register_permissions' do - expect(doc).not_to have_key('apo_register_permissions_ssim') - expect(doc).not_to have_key('apo_register_permissions_tesim') - end - end - end -end diff --git a/spec/indexers/workflow_indexer_spec.rb b/spec/indexers/workflow_indexer_spec.rb deleted file mode 100644 index 344d458b8..000000000 --- a/spec/indexers/workflow_indexer_spec.rb +++ /dev/null @@ -1,179 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' -# require 'webmock/rspec' - -RSpec.describe WorkflowIndexer do - before do - stub_request(:get, 'https://localhost/workflow_templates/accessionWF') - .to_return(status: 200, body: workflow_template_json) - end - - let(:document) { Dor::Workflow::Response::Workflow.new(xml: xml) } - let(:indexer) { described_class.new(workflow: document) } - - let(:workflow_template_json) do - '{"processes":[{"name":"hello"},{"name":"goodbye"},{"name":"technical-metadata"},{"name":"some-other-step"}]}' - end - - let(:step1) { 'hello' } - let(:step2) { 'goodbye' } - let(:step3) { 'technical-metadata' } - let(:step4) { 'some-other-step' } - - describe '#to_solr' do - subject(:solr_doc) { indexer.to_solr.to_h } - - context 'when not all of the steps are completed' do - let(:xml) do - <<-XML - - - - - - XML - end - - it 'creates the workflow_status field with the workflow repository included, and indicates that the workflow is still active' do - expect(solr_doc[Solrizer.solr_name('workflow_status', :symbol)].first).to eq('accessionWF|active|0') - end - end - - context 'when the template has been changed to have new steps, but the workflow service indicates all steps are completed' do - let(:workflow_template_json) do - '{"processes":[{"name":"hello"},{"name":"goodbye"},{"name":"technical-metadata"},{"name":"some-other-step"}]}' - end - - let(:xml) do - <<-XML - - - - - - XML - end - - it 'indicates that the workflow is complete' do - expect(solr_doc[Solrizer.solr_name('workflow_status', :symbol)].first).to eq('accessionWF|completed|0') - end - end - - context 'when all steps are completed or skipped' do - let(:xml) do - <<-XML - - - - - - - - XML - end - - it 'indexes the right workflow status (completed)' do - expect(solr_doc).to match a_hash_including('workflow_status_ssim' => ['accessionWF|completed|0']) - end - end - - context 'when the xml has dates for completed and errored steps' do - let(:xml) do - <<-XML - - - - - - - - XML - end - - it 'indexes the iso8601 UTC dates' do - expect(solr_doc).to match a_hash_including('wf_accessionWF_start-accession_dttsi' => '2012-11-07T00:18:24Z') - expect(solr_doc).to match a_hash_including('wf_accessionWF_technical-metadata_dttsi' => '2012-11-07T00:18:58Z') - end - end - - context 'when the xml does not have dates for completed and errored steps' do - let(:xml) do - <<-XML - - - - - - - - XML - end - - it 'only indexes the dates on steps that include a date' do - expect(solr_doc).to match a_hash_including('wf_accessionWF_technical-metadata_dttsi') - expect(solr_doc).not_to match a_hash_including('wf_accessionWF_start_dttsi') - expect(solr_doc).not_to match a_hash_including('wf_accessionWF_goodbye_dttsi') - end - end - - context 'when there are error messages' do - let(:xml) do - <<-XML - - - - - - XML - end - - let(:wf_error) { solr_doc[Solrizer.solr_name('wf_error', :symbol)] } - - it 'indexes the error messages' do - expect(wf_error).to eq ['accessionWF:technical-metadata:druid:gv054hp4128 - Item error; caused by 413 Request Entity Too Large:'] - end - end - - context 'when the error messages are crazy long' do - let(:error_length) { 40_000 } - let(:error) { (0...error_length).map { rand(65..90).chr }.join } - let(:xml) do - <<-XML - - - - - - XML - end - - let(:wf_error) { solr_doc[Solrizer.solr_name('wf_error', :symbol)] } - - it "truncates the error messages to below Solr's limit" do - # 31 is the leader - expect(wf_error.first.length).to be < 32_766 - end - end - end -end diff --git a/spec/indexers/workflows_indexer_spec.rb b/spec/indexers/workflows_indexer_spec.rb deleted file mode 100644 index f98333c77..000000000 --- a/spec/indexers/workflows_indexer_spec.rb +++ /dev/null @@ -1,138 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe WorkflowsIndexer do - let(:obj) { instance_double(Dor::Item, pid: 'druid:ab123cd4567') } - - let(:indexer) { described_class.new(resource: obj) } - - describe '#to_solr' do - let(:solr_doc) { indexer.to_solr } - let(:xml) do - <<~XML - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - XML - end - - let(:accession_json) do - { 'processes' => [ - { 'name' => 'start-accession' }, - { 'name' => 'descriptive-metadata' }, - { 'name' => 'rights-metadata' }, - { 'name' => 'content-metadata' }, - { 'name' => 'technical-metadata' }, - { 'name' => 'remediate-object' }, - { 'name' => 'shelve' }, - { 'name' => 'publish' }, - { 'name' => 'provenance-metadata' }, - { 'name' => 'sdr-ingest-transfer' }, - { 'name' => 'sdr-ingest-received' }, - { 'name' => 'reset-workspace' }, - { 'name' => 'end-accession' } - ] } - end - - let(:assembly_json) do - { 'processes' => [ - { 'name' => 'start-assembly' }, - { 'name' => 'content-metadata-create' }, - { 'name' => 'jp2-create' }, - { 'name' => 'checksum-compute' }, - { 'name' => 'exif-collect' }, - { 'name' => 'accessioning-initiate' } - ] } - end - - let(:dissemination_json) do - { - 'processes' => [ - { 'name' => 'cleanup' } - ] - } - end - - let(:hydrus_json) do - { 'processes' => [ - { 'name' => 'start-deposit' }, - { 'name' => 'submit' }, - { 'name' => 'approve' }, - { 'name' => 'start-assembly' } - ] } - end - - let(:versioning_json) do - { 'processes' => [ - { 'name' => 'start-version' }, - { 'name' => 'submit-version' }, - { 'name' => 'start-accession' } - ] } - end - let(:workflow_client) { instance_double(Dor::Workflow::Client, workflow_routes: workflow_routes) } - let(:workflow_routes) do - instance_double(Dor::Workflow::Client::WorkflowRoutes, all_workflows: Dor::Workflow::Response::Workflows.new(xml: xml)) - end - - before do - allow(Dor::Workflow::Client).to receive(:new).and_return(workflow_client) - - allow(workflow_client).to receive(:workflow_template).with('accessionWF').and_return(accession_json) - allow(workflow_client).to receive(:workflow_template).with('assemblyWF').and_return(assembly_json) - allow(workflow_client).to receive(:workflow_template).with('disseminationWF').and_return(dissemination_json) - allow(workflow_client).to receive(:workflow_template).with('hydrusAssemblyWF').and_return(hydrus_json) - allow(workflow_client).to receive(:workflow_template).with('versioningWF').and_return(versioning_json) - end - - describe 'workflow_status_ssim' do - subject { solr_doc['workflow_status_ssim'] } - - it { is_expected.to eq ['accessionWF|completed|0', 'assemblyWF|active|1', 'disseminationWF|completed|0', 'hydrusAssemblyWF|completed|0', 'versioningWF|completed|0'] } - end - end -end diff --git a/spec/services/indexer_service_spec.rb b/spec/services/indexer_service_spec.rb deleted file mode 100644 index e8d539d80..000000000 --- a/spec/services/indexer_service_spec.rb +++ /dev/null @@ -1,12 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Indexer do - subject { described_class.for(obj) } - - context 'for a collection' do - let(:obj) { Hydrus::Collection.new } - it { is_expected.not_to be_nil } - end -end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index ac1149f61..1ab37d282 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -15,8 +15,6 @@ require 'equivalent-xml/rspec_matchers' require 'factory_bot' require 'hydrus' -require 'webmock/rspec' -WebMock.disable_net_connect!(allow_localhost: true, allow: ['https://purl.stanford.edu']) # Requires supporting ruby files with custom matchers and macros, etc, in # spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are