Skip to content

Commit

Permalink
Use rubocop template api to extract markdown code blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
Earlopain committed Dec 19, 2023
1 parent ff4f73b commit 14ea3f6
Show file tree
Hide file tree
Showing 6 changed files with 130 additions and 239 deletions.
4 changes: 3 additions & 1 deletion lib/rubocop/markdown.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ module Markdown

require_relative "markdown/inject"
require_relative "markdown/preprocess"
require_relative "markdown/rubocop_ext" if defined?(::RuboCop::ProcessedSource)
require_relative "markdown/ruby_extractor"

RuboCop::Markdown::Inject.defaults!
end
end
84 changes: 38 additions & 46 deletions lib/rubocop/markdown/preprocess.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

module RuboCop
module Markdown
# Transform source Markdown file into valid Ruby file
# by commenting out all non-code lines
# Transform markdown into multiple ProcessedSources with offsets
# from the original markdown for further use in RuboCop
class Preprocess
# This is a regexp to parse code blocks from .md files.
#
Expand All @@ -17,11 +17,8 @@ class Preprocess
([\w[[:blank:]]+]*)?\n # Match the code block syntax
([\s\S]+?) # Match everything inside the code block
(^[[:blank:]]*\1[[:blank:]]*\n?) # Match closing backticks
|(^.*$) # If we are not in a codeblock, match the whole line
/x.freeze

MARKER = "<--rubocop/md-->"

# See https://github.com/github/linguist/blob/v5.3.3/lib/linguist/languages.yml#L3925
RUBY_TYPES = %w[
ruby
Expand All @@ -32,56 +29,51 @@ class Preprocess
rbx
].freeze

class << self
# Revert preprocess changes.
#
# When autocorrect is applied, RuboCop re-writes the file
# using preproccessed source buffer.
#
# We have to restore it.
def restore_and_save!(file)
contents = File.read(file)
restore!(contents)
File.write(file, contents)
end

def restore!(src)
src.gsub!(/^##{MARKER}/m, "")
end
end

attr_reader :config
attr_reader :original_processed_source

def initialize(file)
@config = Markdown.config_store.for(file)
def initialize(original_processed_source)
@original_processed_source = original_processed_source
end

# rubocop:disable Metrics/MethodLength
def call(src)
src.gsub(MD_REGEXP) do |full_match|
def call
original_processed_source.raw_source.to_enum(:scan, MD_REGEXP).map do
m = Regexp.last_match
open_backticks = m[1]
syntax = m[2]
code = m[3]
close_backticks = m[4]
markdown = m[5]

if markdown
# We got markdown outside of a codeblock
comment_lines(markdown)
elsif ruby_codeblock?(syntax, code)
# The codeblock we parsed is assumed ruby, keep as is and append markers to backticks
"#{comment_lines(open_backticks + syntax)}\n#{code}#{comment_lines(close_backticks)}"
else
# The codeblock is not relevant, comment it out
comment_lines(full_match)
end
end

next unless ruby_codeblock?(syntax, code)

# The codeblock we parsed is assumed ruby
code_indent = open_backticks.index("`")
{
offset: m.begin(3) + code_indent,
processed_source: new_processed_source(code, code_indent, original_processed_source)
}
end.compact
end
# rubocop:enable Metrics/MethodLength

private

def new_processed_source(code, code_indent, original_processed_source)
processed_source = RuboCop::ProcessedSource.new(
strip_indent(code, code_indent),
original_processed_source.ruby_version,
original_processed_source.path
)

processed_source.config = original_processed_source.config
processed_source.registry = original_processed_source.registry
processed_source
end

# Strip indentation from code inside codeblocks
def strip_indent(code, code_indent)
code.gsub(/^[[:blank:]]{#{code_indent}}/, "")
end

def ruby_codeblock?(syntax, src)
maybe_ruby?(syntax) && valid_syntax?(syntax, src)
end
Expand All @@ -97,6 +89,10 @@ def ruby?(syntax)
RUBY_TYPES.include?(syntax)
end

def config
original_processed_source.config
end

# Try to parse with Ripper
# Invalid Ruby code (or non-Ruby) returns `nil`.
# Return true if it's explicit Ruby and warn_invalid?
Expand All @@ -116,10 +112,6 @@ def warn_invalid?
def autodetect?
config["Markdown"]&.fetch("Autodetect", true)
end

def comment_lines(src)
src.gsub(/^/, "##{MARKER}")
end
end
end
end
95 changes: 0 additions & 95 deletions lib/rubocop/markdown/rubocop_ext.rb

This file was deleted.

48 changes: 48 additions & 0 deletions lib/rubocop/markdown/ruby_extractor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# frozen_string_literal: true

module RuboCop
module Markdown
# Used by RuboCop to get parsed ruby from markdown
class RubyExtractor
# According to Linguist. mdx was dropped but is being kept for backwards compatibility.
# See https://github.com/github-linguist/linguist/blob/8c380f360ce00b95fa08d14ce0ebccd481af1b33/lib/linguist/languages.yml#L4088-L4098
# Keep in sync with config/default.yml
MARKDOWN_EXTENSIONS = %w[
.md
.livemd
.markdown
.mdown
.mdwn
.mdx
.mkd
.mkdn
.mkdown
.ronn
.scd
.workbook
].freeze

class << self
def call(processed_source)
new(processed_source).call
end
end

def initialize(processed_source)
@processed_source = processed_source
end

def call
return unless markdown_file?

Preprocess.new(@processed_source).call
end

private

def markdown_file?
MARKDOWN_EXTENSIONS.include?(File.extname(@processed_source.path || ""))
end
end
end
end
Loading

0 comments on commit 14ea3f6

Please sign in to comment.