Skip to content

Commit

Permalink
Refactor after PR review to improve reliability, readability and redu…
Browse files Browse the repository at this point in the history
…ce redundancy
  • Loading branch information
ellohez committed Jan 29, 2025
1 parent 39261eb commit 31dc945
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 145 deletions.
24 changes: 21 additions & 3 deletions lib/tasks/world_taxon_update.rake
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,33 @@ WORLD_ROOT_CONTENT_ID = "91b8ef20-74e7-4552-880c-50e6d73c2ff9".freeze
namespace :worldwide do
desc "Update worldwide taxon titles (external name) to include the name of the country they relate to"

task :add_country_name_to_title, %i[log_file_path] => :environment do |_, args|
task :create_title_adding_country_name, %i[log_file_path] => :environment do |_, args|
log_file = nil
log_file_path = args[:log_file_path]
WorldTaxonUpdateHelper.new.add_country_names(log_file_path)
if log_file_path
log_file = File.open(log_file_path, "w")
end

WorldTaxonUpdateHelper.new(log_file).add_country_names
rescue StandardError => e
warn e.full_message
ensure
log_file&.close
end

# Necessary for testing before release to revert all changed titles back to their original state
desc "Revert worldwide taxon titles (external name) to original text (remove the name of the country they relate to)"
task :remove_country_name_from_title, %i[log_file_path] => :environment do |_, _args|
log_file = nil
log_file_path = args[:log_file_path]
WorldTaxonUpdateHelper.new.remove_country_names(log_file_path)
if log_file_path
log_file = File.open(log_file_path, "w")
end

WorldTaxonUpdateHelper.new(log_file).remove_country_names
rescue StandardError => e
warn e.full_message
ensure
log_file&.close
end
end
168 changes: 96 additions & 72 deletions lib/world_taxon_update_helper.rb
Original file line number Diff line number Diff line change
@@ -1,109 +1,102 @@
# frozen_string_literal: true

class WorldTaxonUpdateHelper
def add_country_names(log_file_path)
log_file = nil
if log_file_path
log_file = File.open(log_file_path, "w")
log_rake_progress(log_file, "Updating each worldwide taxon to include country name in their title")
end
def initialize(log_file)
@log_file = log_file
end

def add_country_names
log_rake_progress("Updating each worldwide taxon to include country name in their title")
total_taxon_updates = 0

# Build a taxonomy tree with the grandparent
# (common ancestor e.g. /world/all - Help and services around the world) as the root
taxonomy = Taxonomy::ExpandedTaxonomy.new(WORLD_ROOT_CONTENT_ID).build.child_expansion
log_rake_progress(log_file, "Taxonomy has size #{taxonomy.tree.size}")
log_rake_progress("Taxonomy has size #{taxonomy.tree.size}")

taxonomy.tree.each do |linked_item|
# As this is a tree, we reach all grandchildren without another loop (not a nested array)
# example grandchild url /world/passports-and-emergency-travel-documents-cape-verde

next if skip_tree_item?(log_file, linked_item)
next if skip_tree_item?(linked_item)

message, new_title = create_new_taxon_title(linked_item.internal_name)
log_rake_progress(log_file, message)
new_title = create_title_adding_country_name(linked_item.internal_name)
next if new_title == linked_item.title

# Fetch the taxon and update accordingly
if new_title
new_taxon = Taxonomy::BuildTaxon.call(content_id: linked_item.content_id)
new_taxon.title = new_title

# Save the taxon with the new title
Taxonomy::UpdateTaxon.call(taxon: new_taxon)
message = "Updated taxon #{linked_item.title} to #{new_title}"
end
log_rake_progress(log_file, message)
new_taxon = Taxonomy::BuildTaxon.call(content_id: linked_item.content_id)
new_taxon.title = new_title

# Save the taxon with the new title
Taxonomy::UpdateTaxon.call(taxon: new_taxon)
message = "Updated taxon #{linked_item.title} to #{new_title}"

log_rake_progress(message)
total_taxon_updates += 1
rescue Taxonomy::UpdateTaxon::InvalidTaxonError => e
log_rake_error(log_file, "An error occurred while processing taxon #{linked_item.internal_name}: #{e.message}")
log_rake_error("An error occurred while processing taxon #{linked_item.internal_name}: #{e.message}")
end

# Need to publish all the drafts we have created above (if latest edition is published)
# - Draft editions are updated straight away.
log_rake_progress(log_file, "Publishing all updated taxons")
log_rake_progress("Publishing all updated taxons")
Taxonomy::BulkPublishTaxon.call(WORLD_ROOT_CONTENT_ID)
log_rake_progress(log_file, "Total number of taxons updated - #{total_taxon_updates}")
log_rake_progress("Total number of taxons updated - #{total_taxon_updates}")
rescue GdsApi::HTTPConflict, GdsApi::HTTPGatewayTimeout, GdsApi::TimedOutException => e
log_rake_error(log_file, "An error occurred while publishing taxons: #{e.full_message}")
log_rake_error("An error occurred while publishing taxons: #{e.full_message}")
rescue StandardError => e
log_rake_error(log_file, "An error occurred while publishing taxons: #{e.full_message}")
ensure
log_file&.close
log_rake_error("An error occurred while publishing taxons: #{e.full_message}")
end

def remove_country_names(log_file_path)
log_file = nil
if log_file_path
log_file = File.open(log_file_path, "w")
log_rake_progress(log_file, "Updating each worldwide taxon to include country name in their title")
end

def remove_country_names
log_rake_progress("Updating each worldwide taxon to remove the country name from their title")
total_taxon_updates = 0
taxonomy = Taxonomy::ExpandedTaxonomy.new(WORLD_ROOT_CONTENT_ID).build.child_expansion
log_rake_progress("Taxonomy has size #{taxonomy.tree.size}")

taxonomy.tree.each do |linked_item|
next if skip_tree_item?(log_file, linked_item)
next if skip_tree_item?(linked_item)

title = linked_item.title
suffix_index = nil
if title.start_with?("Coming to the UK from")
log_rake_progress(log_file, "removing - ...from COUNTRY_NAME from #{title}")
suffix_index = title.index(" from ")
elsif title.start_with?("Trade and invest:")
log_rake_progress(log_file, "removing - ...: COUNTRY_NAME from #{title}")
suffix_index = title.index(": ")
elsif title.include?(" in ")
log_rake_progress(log_file, "removing - ...in COUNTRY_NAME from #{title}")
suffix_index = title.index(" in ")
end

next unless suffix_index

new_title = title[0..(suffix_index - 1)]
log_rake_progress(log_file, "New title = #{new_title}")
new_title = create_title_removing_country_name(title)

next if new_title.empty? || title == new_title

new_taxon = Taxonomy::BuildTaxon.call(content_id: linked_item.content_id)
new_taxon.title = new_title

Taxonomy::UpdateTaxon.call(taxon: new_taxon)
message = "Updated taxon #{linked_item.title} to #{new_title}"

log_rake_progress(message)
total_taxon_updates += 1
rescue Taxonomy::UpdateTaxon::InvalidTaxonError => e
log_rake_error(log_file, "An error occurred while processing taxon #{linked_item.internal_name}: #{e.message}")
log_rake_error("An error occurred while processing taxon #{linked_item.internal_name}: #{e.message}")
end

log_rake_progress(log_file, "Publishing all updated taxons")
log_rake_progress("Publishing all updated taxons")
Taxonomy::BulkPublishTaxon.call(WORLD_ROOT_CONTENT_ID)
rescue GdsApi::HTTPConflict, GdsApi::HTTPGatewayTimeout, GdsApi::TimedOutException => e
log_rake_error(log_file, "An error occurred while publishing taxons: #{e.full_message}")
log_rake_error("An error occurred while publishing taxons: #{e.full_message}")
rescue StandardError => e
log_rake_error(log_file, "An error occurred while publishing taxons: #{e.full_message}")
ensure
log_file&.close
log_rake_error("An error occurred while publishing taxons: #{e.full_message}")
end

private

def create_new_taxon_title(internal_name)
def log_rake_progress(message)
@log_file&.puts(message)
puts(message)
end

def log_rake_error(message)
@log_file&.puts(message)
warn(message)
end

def create_title_adding_country_name(internal_name)
# -------------
# Takes the country name from the internal_name and adds it to the title
# Adding the appropriate suffix as follows:
# Coming to the UK from COUNTRY_NAME
# Trade and invest: COUNTRY_NAME
Expand All @@ -115,8 +108,12 @@ def create_new_taxon_title(internal_name)
# Passports and emergency travel documents in COUNTRY_NAME
# Tax, benefits, pensions and working abroad in COUNTRY_NAME
# --------------
# Example:
# internal_name = 'Coming to the UK (Argentina)'
# title = 'Coming to the UK'
# Becomes:
# new_title = 'Coming to the UK from Argentina'

new_title = ""
if internal_name.start_with?("Coming to the UK")
message = "adding - ...from COUNTRY_NAME"
new_title = internal_name.gsub("(", "from ")
Expand All @@ -128,32 +125,59 @@ def create_new_taxon_title(internal_name)
new_title = internal_name.gsub("(", "in ")
end
new_title.gsub!(")", "")
[message, new_title]
end

def log_rake_progress(log_file, message)
log_file&.puts(message)
puts(message)
log_rake_progress(message)
log_rake_progress("New title: #{new_title}")

new_title
end

def log_rake_error(log_file, message)
log_file&.puts(message)
warn(message)
def create_title_removing_country_name(title)
# -------------
# Takes the country name from the title and removes it
# Removing the appropriate suffix to leave the title as it was
# prior to the COUNTRY_NAME being added
# --------------
# Example:
# title = 'Coming to the UK from Argentina'
# new_title = 'Coming to the UK'

suffix_index = nil
if title.start_with?("Coming to the UK from")
message = "removing - ...from COUNTRY_NAME from #{title}"
suffix_index = title.index(" from ")
elsif title.start_with?("Trade and invest:")
message = "removing - ...: COUNTRY_NAME from #{title}"
suffix_index = title.index(": ")
elsif title.include?(" in ")
message = "removing - ...in COUNTRY_NAME from #{title}"
suffix_index = title.index(" in ")
end
log_rake_progress(message)

if suffix_index.nil?
log_rake_progress("No change to title: #{title}")
return ""
end

new_title = title[0..(suffix_index - 1)]
log_rake_progress("New title: #{new_title}")
new_title
end

def skip_tree_item?(log_file, linked_item)
def skip_tree_item?(linked_item)
# Tree includes root (world/all) - skip that
if linked_item.content_id == WORLD_ROOT_CONTENT_ID
log_rake_progress(log_file, "Skipping world root taxon")
log_rake_progress("Skipping world root taxon")
return true
end

# Skip titles where the country name is already included at the end
# Skip internal names where the country name is already included at the end
# e.g. If child - country pages (parent e.g. /world/argentina - UK help and services in Argentina)
# or if the taxon is a GENERIC (template) version
if linked_item.internal_name.start_with?("UK help and services in ") || linked_item.internal_name.start_with?("Living in ") ||
linked_item.internal_name.start_with?("Travelling to ") || linked_item.internal_name.include?("(GENERIC)")
log_rake_progress(log_file, "Skipping #{linked_item.internal_name}")
if linked_item.internal_name.start_with?("UK help and services in ", "Living in ", "Travelling to ") \
|| linked_item.internal_name.include?("(GENERIC)")
log_rake_progress("Skipping #{linked_item.internal_name}")
return true
end

Expand Down
Loading

0 comments on commit 31dc945

Please sign in to comment.