diff --git a/bin/sync_with_devto b/bin/sync_with_devto index a0657086f..e96bae1a4 100755 --- a/bin/sync_with_devto +++ b/bin/sync_with_devto @@ -6,9 +6,11 @@ $LOAD_PATH.unshift(lib_dir) unless $LOAD_PATH.include?(lib_dir) require "sync/app" require "sync/sync_script" +require "sync/fetchers/dev_to_article_fetcher" App.configure do |config| config.logger = Logger.new(STDOUT, level: :warn) + config.fetcher_class = Sync::Fetchers::DevToArticleFetcher end App.new(args: ARGV).run diff --git a/lib/sync/dev_to_article_fetcher.rb b/lib/sync/dev_to_article_fetcher.rb index 8acbc1785..7feb2a0e9 100644 --- a/lib/sync/dev_to_article_fetcher.rb +++ b/lib/sync/dev_to_article_fetcher.rb @@ -1,11 +1,13 @@ require "uri" require "json" +require "sync/fetcher" require "sync/retryable" require "sync/logging" module Sync class DevToArticleFetcher + include Fetcher include Logging include Retryable USERNAME = "jetthoughts".freeze diff --git a/lib/sync/fetcher.rb b/lib/sync/fetcher.rb new file mode 100644 index 000000000..99cf57ab1 --- /dev/null +++ b/lib/sync/fetcher.rb @@ -0,0 +1,29 @@ +require "sync/logging" +require "sync/retryable" + +module Sync + class Fetcher + include Logging + include Retryable + + def fetch_articles + raise NotImplementedError, "Subclasses must implement #fetch_articles" + end + + def fetch(id) + raise NotImplementedError, "Subclasses must implement #fetch" + end + + def fetch_image(url) + raise NotImplementedError, "Subclasses must implement #fetch_image" + end + + def need_to_update_remote?(article_data, article_sync_data) + raise NotImplementedError, "Subclasses must implement #need_to_update_remote?" + end + + def update_meta_on_dev_to(id, data) + raise NotImplementedError, "Subclasses must implement #update_meta_on_dev_to" + end + end +end diff --git a/lib/sync/local_folder_fetcher.rb b/lib/sync/local_folder_fetcher.rb new file mode 100644 index 000000000..147b9fd68 --- /dev/null +++ b/lib/sync/local_folder_fetcher.rb @@ -0,0 +1,91 @@ +require "yaml" +require "sync/fetcher" +require "sync/logging" + +module Sync + module LocalFolderFetcher + include Fetcher + include Logging + + def fetch_articles + articles = [] + Dir.glob(File.join(folder_path, "*")).select { |f| File.directory?(f) }.each do |dir| + article_id = File.basename(dir) + article = fetch(article_id) + articles << article if article + end + articles + end + + def fetch(id) + path = File.join(folder_path, id.to_s) + return nil unless Dir.exist?(path) + + content_file = File.join(path, "content.md") + return nil unless File.exist?(content_file) + + content = File.read(content_file) + metadata = extract_metadata(content) + + process_article({ + "id" => id, + "title" => metadata["title"] || id, + "description" => metadata["description"], + "body_markdown" => strip_frontmatter(content), + "cover_image" => find_cover_image(path), + "slug" => id, + "canonical_url" => metadata["canonical_url"] + }) + end + + def fetch_image(path) + return nil unless File.exist?(path) + File.binread(path) + end + + def need_to_update_remote?(article_data, article_sync_data) + # Local folders don't need remote updates + false + end + + def update_meta_on_dev_to(id, data) + # No-op for local folders + nil + end + + private + + def folder_path + raise NotImplementedError, "Implementers must define #folder_path" + end + + def find_cover_image(dir) + %w[thumbnail.jpeg cover.jpg cover.jpeg thumbnail.jpg].each do |name| + path = File.join(dir, name) + return path if File.exist?(path) + end + nil + end + + def extract_metadata(content) + frontmatter = content.match(/\A---\n(.*?)\n---/m) + return {} unless frontmatter + + begin + YAML.safe_load(frontmatter[1]) || {} + rescue + {} + end + end + + def strip_frontmatter(content) + content.sub(/\A---\n.*?\n---\n/m, '') + end + + def process_article(article) + article["devto_slug"] = article["slug"] + article["slug"] = article["slug"] + article + end + end +end diff --git a/lib/sync/zip_folder_fetcher.rb b/lib/sync/zip_folder_fetcher.rb new file mode 100644 index 000000000..09862c095 --- /dev/null +++ b/lib/sync/zip_folder_fetcher.rb @@ -0,0 +1,119 @@ +require "zip" +require "yaml" +require "sync/fetcher" +require "sync/logging" + +module Sync + module ZipFolderFetcher + include Fetcher + include Logging + + def fetch_articles + articles = [] + + Zip::File.open(zip_path) do |zip_file| + folder_entries = zip_file.entries.select { |e| e.ftype == :directory } + + folder_entries.each do |folder_entry| + article_id = folder_entry.name.chomp('/') + article = fetch_from_zip(zip_file, article_id) + articles << article if article + end + end + + articles + end + + def fetch(id) + Zip::File.open(zip_path) do |zip_file| + fetch_from_zip(zip_file, id) + end + rescue => e + logger.error("Error fetching article from zip: #{e.message}") + nil + end + + def fetch_image(path) + return nil unless path && path.start_with?("zip://") + + path = path.gsub("zip://", "") + + Zip::File.open(zip_path) do |zip_file| + entry = zip_file.find_entry(path) + return nil unless entry + + entry.get_input_stream.read + end + rescue => e + logger.error("Error fetching image from zip: #{e.message}") + nil + end + + def need_to_update_remote?(article_data, article_sync_data) + # Zip archives don't need remote updates + false + end + + def update_meta_on_dev_to(id, data) + # No-op for zip archives + nil + end + + private + + def zip_path + raise NotImplementedError, "Implementers must define #zip_path" + end + + def fetch_from_zip(zip_file, id) + folder_name = "#{id}/" + content_entry = zip_file.find_entry("#{folder_name}content.md") + return nil unless content_entry + + content = content_entry.get_input_stream.read + metadata = extract_metadata(content) + + cover_image = find_cover_image_in_zip(zip_file, folder_name) + + process_article({ + "id" => id, + "title" => metadata["title"] || id, + "description" => metadata["description"], + "body_markdown" => strip_frontmatter(content), + "cover_image" => cover_image ? "zip://#{cover_image.name}" : nil, + "slug" => id, + "canonical_url" => metadata["canonical_url"] + }) + end + + def find_cover_image_in_zip(zip_file, folder_name) + %w[thumbnail.jpeg cover.jpg cover.jpeg thumbnail.jpg].each do |name| + path = "#{folder_name}#{name}" + entry = zip_file.find_entry(path) + return entry if entry + end + nil + end + + def extract_metadata(content) + frontmatter = content.match(/\A---\n(.*?)\n---/m) + return {} unless frontmatter + + begin + YAML.safe_load(frontmatter[1]) || {} + rescue + {} + end + end + + def strip_frontmatter(content) + content.sub(/\A---\n.*?\n---\n/m, '') + end + + def process_article(article) + article["devto_slug"] = article["slug"] + article["slug"] = article["slug"] + article + end + end +end