From 14ea3f6d0dc519e9be2ca6b1abe948622837ead2 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 19 Dec 2023 12:59:17 +0100 Subject: [PATCH] Use rubocop template api to extract markdown code blocks --- lib/rubocop/markdown.rb | 4 +- lib/rubocop/markdown/preprocess.rb | 84 +++++++-------- lib/rubocop/markdown/rubocop_ext.rb | 95 ----------------- lib/rubocop/markdown/ruby_extractor.rb | 48 +++++++++ test/preprocess_test.rb | 136 ++++++++----------------- test/test_helper.rb | 2 - 6 files changed, 130 insertions(+), 239 deletions(-) delete mode 100644 lib/rubocop/markdown/rubocop_ext.rb create mode 100644 lib/rubocop/markdown/ruby_extractor.rb diff --git a/lib/rubocop/markdown.rb b/lib/rubocop/markdown.rb index af5da64..295f4eb 100644 --- a/lib/rubocop/markdown.rb +++ b/lib/rubocop/markdown.rb @@ -11,6 +11,8 @@ module Markdown require_relative "markdown/inject" require_relative "markdown/preprocess" - require_relative "markdown/rubocop_ext" if defined?(::RuboCop::ProcessedSource) + require_relative "markdown/ruby_extractor" + + RuboCop::Markdown::Inject.defaults! end end diff --git a/lib/rubocop/markdown/preprocess.rb b/lib/rubocop/markdown/preprocess.rb index 1210103..eefc68e 100644 --- a/lib/rubocop/markdown/preprocess.rb +++ b/lib/rubocop/markdown/preprocess.rb @@ -4,8 +4,8 @@ module RuboCop module Markdown - # Transform source Markdown file into valid Ruby file - # by commenting out all non-code lines + # Transform markdown into multiple ProcessedSources with offsets + # from the original markdown for further use in RuboCop class Preprocess # This is a regexp to parse code blocks from .md files. # @@ -17,11 +17,8 @@ class Preprocess ([\w[[:blank:]]+]*)?\n # Match the code block syntax ([\s\S]+?) # Match everything inside the code block (^[[:blank:]]*\1[[:blank:]]*\n?) # Match closing backticks - |(^.*$) # If we are not in a codeblock, match the whole line /x.freeze - MARKER = "<--rubocop/md-->" - # See https://github.com/github/linguist/blob/v5.3.3/lib/linguist/languages.yml#L3925 RUBY_TYPES = %w[ ruby @@ -32,56 +29,51 @@ class Preprocess rbx ].freeze - class << self - # Revert preprocess changes. - # - # When autocorrect is applied, RuboCop re-writes the file - # using preproccessed source buffer. - # - # We have to restore it. - def restore_and_save!(file) - contents = File.read(file) - restore!(contents) - File.write(file, contents) - end - - def restore!(src) - src.gsub!(/^##{MARKER}/m, "") - end - end - - attr_reader :config + attr_reader :original_processed_source - def initialize(file) - @config = Markdown.config_store.for(file) + def initialize(original_processed_source) + @original_processed_source = original_processed_source end # rubocop:disable Metrics/MethodLength - def call(src) - src.gsub(MD_REGEXP) do |full_match| + def call + original_processed_source.raw_source.to_enum(:scan, MD_REGEXP).map do m = Regexp.last_match open_backticks = m[1] syntax = m[2] code = m[3] - close_backticks = m[4] - markdown = m[5] - - if markdown - # We got markdown outside of a codeblock - comment_lines(markdown) - elsif ruby_codeblock?(syntax, code) - # The codeblock we parsed is assumed ruby, keep as is and append markers to backticks - "#{comment_lines(open_backticks + syntax)}\n#{code}#{comment_lines(close_backticks)}" - else - # The codeblock is not relevant, comment it out - comment_lines(full_match) - end - end + + next unless ruby_codeblock?(syntax, code) + + # The codeblock we parsed is assumed ruby + code_indent = open_backticks.index("`") + { + offset: m.begin(3) + code_indent, + processed_source: new_processed_source(code, code_indent, original_processed_source) + } + end.compact end # rubocop:enable Metrics/MethodLength private + def new_processed_source(code, code_indent, original_processed_source) + processed_source = RuboCop::ProcessedSource.new( + strip_indent(code, code_indent), + original_processed_source.ruby_version, + original_processed_source.path + ) + + processed_source.config = original_processed_source.config + processed_source.registry = original_processed_source.registry + processed_source + end + + # Strip indentation from code inside codeblocks + def strip_indent(code, code_indent) + code.gsub(/^[[:blank:]]{#{code_indent}}/, "") + end + def ruby_codeblock?(syntax, src) maybe_ruby?(syntax) && valid_syntax?(syntax, src) end @@ -97,6 +89,10 @@ def ruby?(syntax) RUBY_TYPES.include?(syntax) end + def config + original_processed_source.config + end + # Try to parse with Ripper # Invalid Ruby code (or non-Ruby) returns `nil`. # Return true if it's explicit Ruby and warn_invalid? @@ -116,10 +112,6 @@ def warn_invalid? def autodetect? config["Markdown"]&.fetch("Autodetect", true) end - - def comment_lines(src) - src.gsub(/^/, "##{MARKER}") - end end end end diff --git a/lib/rubocop/markdown/rubocop_ext.rb b/lib/rubocop/markdown/rubocop_ext.rb deleted file mode 100644 index d7fa0ca..0000000 --- a/lib/rubocop/markdown/rubocop_ext.rb +++ /dev/null @@ -1,95 +0,0 @@ -# frozen_string_literal: true - -module RuboCop - module Markdown # :nodoc: - # According to Linguist. mdx was dropped but is being kept for backwards compatibility. - # See https://github.com/github-linguist/linguist/blob/8c380f360ce00b95fa08d14ce0ebccd481af1b33/lib/linguist/languages.yml#L4088-L4098 - # Keep in sync with config/default.yml - MARKDOWN_EXTENSIONS = %w[ - .md - .livemd - .markdown - .mdown - .mdwn - .mdx - .mkd - .mkdn - .mkdown - .ronn - .scd - .workbook - ].freeze - - # A list of cops that could produce offenses in commented lines - MARKDOWN_OFFENSE_COPS = %w[Lint/Syntax].freeze - - class << self - attr_accessor :config_store - - def markdown_file?(file) - MARKDOWN_EXTENSIONS.include?(File.extname(file)) - end - end - end -end - -RuboCop::Markdown::Inject.defaults! - -RuboCop::Runner.prepend(Module.new do - # Set config store for Markdown - def get_processed_source(*args) - RuboCop::Markdown.config_store = @config_store unless RuboCop::Markdown.config_store - - super - end - - # Do not cache markdown files, 'cause cache doesn't know about processing. - # NOTE: we should involve preprocessing in RuboCop::CachedData#deserialize_offenses - def file_offense_cache(file) - return yield if RuboCop::Markdown.markdown_file?(file) - - super - end - - def file_finished(file, offenses) - return super unless RuboCop::Markdown.markdown_file?(file) - - # Run Preprocess.restore if file has been autocorrected - if @options[:auto_correct] || @options[:autocorrect] - RuboCop::Markdown::Preprocess.restore_and_save!(file) - end - - super(file, offenses) - end -end) - -RuboCop::Cop::Commissioner::InvestigationReport.prepend(Module.new do - # Skip offenses reported for ignored MD source (trailing whitespaces, etc.) - def offenses - @offenses ||= begin - marker_comment = "##{RuboCop::Markdown::Preprocess::MARKER}" - offenses_per_cop.flatten(1).reject do |offense| - next if RuboCop::Markdown::MARKDOWN_OFFENSE_COPS.include?(offense.cop_name) - - offense.location.source_line.start_with?(marker_comment) - end - end - end -end) - -# Allow Rubocop to analyze markdown files -RuboCop::TargetFinder.prepend(Module.new do - def ruby_file?(file) - super || RuboCop::Markdown.markdown_file?(file) - end -end) - -# Extend ProcessedSource#parse with pre-processing -RuboCop::ProcessedSource.prepend(Module.new do - def parse(src, *args) - # only process Markdown files - src = RuboCop::Markdown::Preprocess.new(path).call(src) if - path && RuboCop::Markdown.markdown_file?(path) - super(src, *args) - end -end) diff --git a/lib/rubocop/markdown/ruby_extractor.rb b/lib/rubocop/markdown/ruby_extractor.rb new file mode 100644 index 0000000..250d116 --- /dev/null +++ b/lib/rubocop/markdown/ruby_extractor.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module RuboCop + module Markdown + # Used by RuboCop to get parsed ruby from markdown + class RubyExtractor + # According to Linguist. mdx was dropped but is being kept for backwards compatibility. + # See https://github.com/github-linguist/linguist/blob/8c380f360ce00b95fa08d14ce0ebccd481af1b33/lib/linguist/languages.yml#L4088-L4098 + # Keep in sync with config/default.yml + MARKDOWN_EXTENSIONS = %w[ + .md + .livemd + .markdown + .mdown + .mdwn + .mdx + .mkd + .mkdn + .mkdown + .ronn + .scd + .workbook + ].freeze + + class << self + def call(processed_source) + new(processed_source).call + end + end + + def initialize(processed_source) + @processed_source = processed_source + end + + def call + return unless markdown_file? + + Preprocess.new(@processed_source).call + end + + private + + def markdown_file? + MARKDOWN_EXTENSIONS.include?(File.extname(@processed_source.path || "")) + end + end + end +end diff --git a/test/preprocess_test.rb b/test/preprocess_test.rb index 90a692e..e2b4e65 100644 --- a/test/preprocess_test.rb +++ b/test/preprocess_test.rb @@ -3,12 +3,19 @@ require "test_helper" class RuboCop::Markdown::PreprocessTest < Minitest::Test - def subject(warn_invalid: false) - obj = RuboCop::Markdown::Preprocess.new("test.md") + def subject(source, warn_invalid: false) + dummy_processed_source = RuboCop::ProcessedSource.new(source, 2.6, "test.md") + dummy_processed_source.config = RuboCop::ConfigStore.new.for("test.md") + obj = RuboCop::Markdown::Preprocess.new(dummy_processed_source) obj.define_singleton_method(:warn_invalid?) { warn_invalid } obj end + def assert_parsed(raw_source, parsed, source_code) + assert_equal source_code, parsed[:processed_source].raw_source, "Expected the processed_source to contain the code block" + assert_equal raw_source.index(source_code, parsed[:offset]), parsed[:offset], "Expected the offset to start at the code block" + end + def test_no_code_snippets source = <<~SOURCE # Header @@ -16,13 +23,7 @@ def test_no_code_snippets Boby text SOURCE - expected = <<~SOURCE - #<--rubocop/md--># Header - #<--rubocop/md--> - #<--rubocop/md-->Boby text - SOURCE - - assert_equal expected, subject.call(source) + assert_equal 0, subject(source).call.size end def test_with_one_snippet @@ -40,21 +41,17 @@ def test_valid ``` SOURCE - expected = <<~SOURCE - #<--rubocop/md--># Header - #<--rubocop/md--> - #<--rubocop/md-->Code example: - #<--rubocop/md--> - #<--rubocop/md-->``` + code_block = <<~SOURCE class Test < Minitest::Test def test_valid assert false end end - #<--rubocop/md-->``` SOURCE - assert_equal expected, subject.call(source) + parsed = subject(source).call + assert_equal 1, parsed.size + assert_parsed source, parsed.first, code_block end def test_only_snippet @@ -68,17 +65,17 @@ def test_valid ``` SOURCE - expected = <<~SOURCE - #<--rubocop/md-->``` + code_block = <<~SOURCE class Test < Minitest::Test def test_valid assert false end end - #<--rubocop/md-->``` SOURCE - assert_equal expected, subject.call(source) + parsed = subject(source).call + assert_equal 1, parsed.size + assert_parsed source, parsed.first, code_block end def test_many_snippets @@ -106,31 +103,24 @@ def test_valid ``` SOURCE - expected = <<~SOURCE - #<--rubocop/md--># Header - #<--rubocop/md--> - #<--rubocop/md-->Code example: - #<--rubocop/md--> - #<--rubocop/md-->``` + code_block1 = <<~SOURCE class Test < Minitest::Test def test_valid assert false end end - #<--rubocop/md-->``` - #<--rubocop/md--> - #<--rubocop/md-->More texts and lists: - #<--rubocop/md-->- One - #<--rubocop/md-->- Two - #<--rubocop/md--> - #<--rubocop/md-->```ruby + SOURCE + + code_block2 = <<~SOURCE require "minitest/pride" require "minitest/autorun" - #<--rubocop/md-->``` SOURCE - assert_equal expected, subject.call(source) + parsed = subject(source).call + assert_equal 2, parsed.size + assert_parsed source, parsed[0], code_block1 + assert_parsed source, parsed[1], code_block2 end def test_invalid_syntax @@ -148,21 +138,7 @@ def test_valid ``` SOURCE - expected = <<~SOURCE - #<--rubocop/md--># Header - #<--rubocop/md--> - #<--rubocop/md-->Code example: - #<--rubocop/md--> - #<--rubocop/md-->``` - #<--rubocop/md-->class Test < Minitest::Test - #<--rubocop/md--> def test_valid - #<--rubocop/md--> ... - #<--rubocop/md--> end - #<--rubocop/md-->end - #<--rubocop/md-->``` - SOURCE - - assert_equal expected, subject.call(source) + assert_equal 0, subject(source).call.size end def test_non_ruby_snippet @@ -177,18 +153,7 @@ def test_non_ruby_snippet ``` SOURCE - expected = <<~SOURCE - #<--rubocop/md--># Header - #<--rubocop/md--> - #<--rubocop/md-->Code example: - #<--rubocop/md--> - #<--rubocop/md-->``` - #<--rubocop/md-->-module(evlms). - #<--rubocop/md-->-export([martians/0, martians/1]). - #<--rubocop/md-->``` - SOURCE - - assert_equal expected, subject.call(source) + assert_equal 0, subject(source).call.size end def test_ambigious_non_ruby_snippet @@ -197,7 +162,7 @@ def test_ambigious_non_ruby_snippet ```ruby it "is doing heavy stuff", :rprof do - ... + ... # Syntax error end ``` @@ -216,31 +181,15 @@ def test_ambigious_non_ruby_snippet ``` SOURCE - expected = <<~SOURCE - #<--rubocop/md--># Header - #<--rubocop/md--> - #<--rubocop/md-->```ruby - #<--rubocop/md-->it "is doing heavy stuff", :rprof do - #<--rubocop/md--> ... - #<--rubocop/md-->end - #<--rubocop/md-->``` - #<--rubocop/md--> - #<--rubocop/md-->Code example: - #<--rubocop/md--> - #<--rubocop/md-->```sh - #<--rubocop/md-->TEST_RUBY_PROF=call_stack bundle exec rake test - #<--rubocop/md-->``` - #<--rubocop/md--> - #<--rubocop/md-->Or in your code: - #<--rubocop/md--> - #<--rubocop/md-->```ruby + code_block = <<~SOURCE TestProf::RubyProf.configure do |config| config.printer = :call_stack end - #<--rubocop/md-->``` SOURCE - assert_equal expected, subject.call(source) + parsed = subject(source).call + assert_equal 1, parsed.size + assert_parsed source, parsed.first, code_block end def test_snippet_with_unclosed_backtick @@ -258,20 +207,17 @@ def test_snippet_with_unclosed_backtick ``` SOURCE - expected = <<~SOURCE - #<--rubocop/md--># Code example: - #<--rubocop/md--> - #<--rubocop/md-->```ruby + code_block1 = <<~SOURCE `method_call - #<--rubocop/md-->``` - #<--rubocop/md--> - #<--rubocop/md--># Other code example - #<--rubocop/md--> - #<--rubocop/md-->```ruby + SOURCE + + code_block2 = <<~SOURCE method_call - #<--rubocop/md-->``` SOURCE - assert_equal expected, subject(warn_invalid: true).call(source) + parsed = subject(source, warn_invalid: true).call + assert_equal 2, parsed.size + assert_parsed source, parsed[0], code_block1 + assert_parsed source, parsed[1], code_block2 end end diff --git a/test/test_helper.rb b/test/test_helper.rb index fa89018..1ee558f 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -14,5 +14,3 @@ require "rubocop_assertions" require "markdown_assertions" require "rubocop-md" - -RuboCop::Markdown.config_store = RuboCop::ConfigStore.new