diff --git a/Gemfile b/Gemfile index 1ea8f2da..6bb9fcb8 100644 --- a/Gemfile +++ b/Gemfile @@ -4,9 +4,7 @@ gemspec gem "redcarpet", :platforms => :ruby gem "kramdown", :platforms => :jruby gem "RedCloth" -# using a tag version here because 0.18.3 was not published by the author to encourage users to upgrade. -# however we want to bump up to this version since this has a security patch -gem "commonmarker", git: "https://github.com/gjtorikian/commonmarker.git", tag: "v0.18.3" +gem "commonmarker", "~> 2.8.2" gem "rdoc", "~> 7.2.0" gem "org-ruby", "0.9.12" gem "creole", "~>0.5.0" diff --git a/Gemfile.lock b/Gemfile.lock index 8d0f756d..cd835695 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,11 +1,3 @@ -GIT - remote: https://github.com/gjtorikian/commonmarker.git - revision: 2838ebaa83ee0081d481c21f3bc0e4cb3e8de9da - tag: v0.18.3 - specs: - commonmarker (0.18.3) - ruby-enum (~> 0.5) - PATH remote: . specs: @@ -34,6 +26,13 @@ GEM builder (3.3.0) cgi (0.5.1) charlock_holmes (0.7.9) + commonmarker (2.8.2) + rb_sys (~> 0.9) + commonmarker (2.8.2-aarch64-linux) + commonmarker (2.8.2-arm-linux) + commonmarker (2.8.2-arm64-darwin) + commonmarker (2.8.2-x86_64-darwin) + commonmarker (2.8.2-x86_64-linux) concurrent-ruby (1.3.6) connection_pool (3.0.2) crass (1.0.6) @@ -85,14 +84,15 @@ GEM stringio racc (1.8.1) rake (13.4.2) + rake-compiler-dock (1.12.0) + rb_sys (0.9.128) + rake-compiler-dock (= 1.12.0) rdoc (7.2.0) erb psych (>= 4.0.0) tsort redcarpet (3.6.1) rexml (3.4.4) - ruby-enum (0.9.0) - i18n rubypants (0.7.1) rugged (1.9.0) sanitize (6.1.3) @@ -135,7 +135,7 @@ DEPENDENCIES RedCloth activesupport (~> 8.1.3) asciidoctor (~> 2.0.26) - commonmarker! + commonmarker (~> 2.8.2) creole (~> 0.5.0) github-linguist (>= 7.1.3) github-markup! diff --git a/lib/github/markup/markdown.rb b/lib/github/markup/markdown.rb index dcf93229..5b17afb7 100644 --- a/lib/github/markup/markdown.rb +++ b/lib/github/markup/markdown.rb @@ -5,9 +5,90 @@ module Markup class Markdown < Implementation MARKDOWN_GEMS = { "commonmarker" => proc { |content, options: {}| - commonmarker_opts = [:GITHUB_PRE_LANG].concat(options.fetch(:commonmarker_opts, [])) - commonmarker_exts = options.fetch(:commonmarker_exts, [:tagfilter, :autolink, :table, :strikethrough]) - CommonMarker.render_html(content, commonmarker_opts, commonmarker_exts) + legacy_opts = options.fetch(:commonmarker_opts, []) + legacy_exts = options.fetch( + :commonmarker_exts, + [:tagfilter, :autolink, :table, :strikethrough], + ) + + parse_options = {} + # commonmarker 2.x changes several render defaults that diverge from cmark-gfm 0.x: + # - hardbreaks defaults to true in 2.x but was false in 0.x. + # - escaped_char_spans defaults to true in 2.x and wraps backslash-escaped chars in + # ; 0.x emitted bare characters. + # - gfm_quirks defaults to false in 2.x; 0.x (cmark-gfm) always had the quirk on, + # which collapses ****foo**** to foo instead of nesting. + # - github_pre_lang defaults to true in 2.x; set explicitly to match the legacy contract. + render_options = { + github_pre_lang: true, + hardbreaks: false, + escaped_char_spans: false, + gfm_quirks: true, + } + extension_options = {} + + legacy_opts.each do |opt| + case opt + when :DEFAULT then nil + when :SOURCEPOS then render_options[:sourcepos] = true + when :HARDBREAKS then render_options[:hardbreaks] = true + when :NOBREAKS then render_options[:hardbreaks] = false + when :SMART then parse_options[:smart] = true + when :GITHUB_PRE_LANG then render_options[:github_pre_lang] = true + when :UNSAFE then render_options[:unsafe] = true + when :FOOTNOTES then extension_options[:footnotes] = true + when :FULL_INFO_STRING then render_options[:full_info_string] = true + # The legacy options below existed in cmark-gfm 0.x but have no direct commonmarker + # 2.x equivalent. Accept them so existing callers don't break, but they have no effect: + # :VALIDATE_UTF8 / :LIBERAL_HTML_TAG - enforced at the Rust type layer in 2.x. + # :TABLE_PREFER_STYLE_ATTRIBUTES - no 2.x render knob for inline table styles. + # :STRIKETHROUGH_DOUBLE_TILDE - 2.x always accepts both single and double tilde. + when :VALIDATE_UTF8, :LIBERAL_HTML_TAG, + :TABLE_PREFER_STYLE_ATTRIBUTES, :STRIKETHROUGH_DOUBLE_TILDE + nil + else + raise ArgumentError, "unknown commonmarker option: #{opt.inspect}" + end + end + + legacy_exts.each do |ext| + case ext + when :strikethrough, :tagfilter, :autolink, :table, :tasklist, + :shortcodes, :footnotes, :multiline_block_quotes, + :math_dollars, :math_code, :wikilinks_title_after_pipe, + :wikilinks_title_before_pipe, :underline, :subscript, :spoiler, + :greentext, :alerts, :description_lists + extension_options[ext] = true + when :header_ids + # header_ids takes a string prefix in 2.x rather than a boolean. The legacy contract + # only passed it as a symbol, so use an empty prefix to enable anchor generation. + extension_options[:header_ids] = "" + else + raise ArgumentError, "unknown commonmarker extension: #{ext.inspect}" + end + end + + # Several extensions (tagfilter, autolink, table, strikethrough, tasklist, shortcodes) + # are enabled by default in commonmarker 2.x but were strictly opt-in in 0.x. Explicitly + # disable any extension the caller did not request so behavior matches the legacy contract. + [:strikethrough, :tagfilter, :autolink, :table, :tasklist, :shortcodes].each do |ext| + extension_options[ext] = false unless extension_options[ext] + end + + # header_ids is enabled by default in commonmarker 2.x (it injects anchor tags inside + # every heading). The legacy 0.x wrapper never enabled it implicitly, so disable it + # unless the caller explicitly requested it. + extension_options[:header_ids] = nil unless extension_options.key?(:header_ids) + + Commonmarker.to_html( + content, + options: { + parse: parse_options, + render: render_options, + extension: extension_options, + }, + plugins: {syntax_highlighter: nil}, + ) }, "github/markdown" => proc { |content, options: {}| GitHub::Markdown.render(content) diff --git a/test/coverage_test.rb b/test/coverage_test.rb index c478cece..f5043b5d 100644 --- a/test/coverage_test.rb +++ b/test/coverage_test.rb @@ -241,8 +241,112 @@ def test_command_raises_when_subprocess_exits_non_zero assert_raises(GitHub::Markup::CommandError) { impl.render('README.covfail', 'payload') } end + # --- commonmarker proc legacy option/extension mapping ----------------- + + def test_commonmarker_default_option_is_a_noop + capture = capture_commonmarker_call(commonmarker_opts: [:DEFAULT]) + refute capture[:render].key?(:sourcepos) + refute capture[:parse].key?(:smart) + refute capture[:extension].key?(:footnotes) + end + + def test_commonmarker_sourcepos_option_sets_render_sourcepos + capture = capture_commonmarker_call(commonmarker_opts: [:SOURCEPOS]) + assert_equal true, capture[:render][:sourcepos] + end + + def test_commonmarker_hardbreaks_option_enables_render_hardbreaks + capture = capture_commonmarker_call(commonmarker_opts: [:HARDBREAKS]) + assert_equal true, capture[:render][:hardbreaks] + end + + def test_commonmarker_nobreaks_option_disables_render_hardbreaks + # Combine with :HARDBREAKS so the assertion observes a transition true -> false + capture = capture_commonmarker_call(commonmarker_opts: [:HARDBREAKS, :NOBREAKS]) + assert_equal false, capture[:render][:hardbreaks] + end + + def test_commonmarker_smart_option_sets_parse_smart + capture = capture_commonmarker_call(commonmarker_opts: [:SMART]) + assert_equal true, capture[:parse][:smart] + end + + def test_commonmarker_github_pre_lang_option_sets_render_github_pre_lang + capture = capture_commonmarker_call(commonmarker_opts: [:GITHUB_PRE_LANG]) + assert_equal true, capture[:render][:github_pre_lang] + end + + def test_commonmarker_unsafe_option_enables_render_unsafe + capture = capture_commonmarker_call(commonmarker_opts: [:UNSAFE]) + assert_equal true, capture[:render][:unsafe] + end + + def test_commonmarker_footnotes_option_enables_extension_footnotes + capture = capture_commonmarker_call(commonmarker_opts: [:FOOTNOTES]) + assert_equal true, capture[:extension][:footnotes] + end + + def test_commonmarker_full_info_string_option_sets_render_full_info_string + capture = capture_commonmarker_call(commonmarker_opts: [:FULL_INFO_STRING]) + assert_equal true, capture[:render][:full_info_string] + end + + def test_commonmarker_accepts_legacy_no_op_options_without_raising + [ + :VALIDATE_UTF8, + :LIBERAL_HTML_TAG, + :TABLE_PREFER_STYLE_ATTRIBUTES, + :STRIKETHROUGH_DOUBLE_TILDE, + ].each do |opt| + capture = capture_commonmarker_call(commonmarker_opts: [opt]) + refute capture[:render].key?(:sourcepos), "#{opt} should not alter render options" + refute capture[:parse].key?(:smart), "#{opt} should not alter parse options" + end + end + + def test_commonmarker_unknown_option_raises_argument_error + err = assert_raises(ArgumentError) do + capture_commonmarker_call(commonmarker_opts: [:TOTALLY_FAKE_OPT]) + end + assert_match(/unknown commonmarker option:.*TOTALLY_FAKE_OPT/, err.message) + end + + def test_commonmarker_header_ids_extension_sets_empty_string_prefix + capture = capture_commonmarker_call(commonmarker_exts: [:header_ids]) + assert_equal "", capture[:extension][:header_ids] + end + + def test_commonmarker_unknown_extension_raises_argument_error + err = assert_raises(ArgumentError) do + capture_commonmarker_call(commonmarker_exts: [:not_a_real_ext]) + end + assert_match(/unknown commonmarker extension:.*not_a_real_ext/, err.message) + end + private + # Invokes the commonmarker MARKDOWN_GEMS proc against a stubbed Commonmarker + # constant and returns the parse/render/extension options the proc passed to + # Commonmarker.to_html. + def capture_commonmarker_call(options) + captured = {} + fake = Module.new + fake.define_singleton_method(:to_html) do |content, **kwargs| + captured[:content] = content + opts = kwargs.fetch(:options, {}) + captured[:parse] = opts[:parse] || {} + captured[:render] = opts[:render] || {} + captured[:extension] = opts[:extension] || {} + "stub-html" + end + with_stub_const("Commonmarker", fake) do + GitHub::Markup::Markdown::MARKDOWN_GEMS + .fetch("commonmarker") + .call("payload", options: options) + end + captured + end + def with_stub_const(path, value) parts = path.split("::") name = parts.pop