From 3b96ec1c9e6974229796bc36aa9bc6fad6829b29 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 14 May 2026 22:23:12 +0100 Subject: [PATCH 1/9] Fix XSS fromn HTML encoded colons in hrefs --- lib/markdown2.py | 4 +++- test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 2 ++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index dc698970..6683df7c 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1537,8 +1537,10 @@ def _safe_href(self): safe = r'-\w' # omitted ['"<>] for XSS reasons less_safe = r'#/\.!#$%&\(\)\+,/:;=\?@\[\]^`\{\}\|~' + # html encoded colon in a URL still functions as a normal colon, so need to detect those + protocol_seperators = [':', ':', ':', ':'] # dot seperated hostname, optional port number, not followed by protocol seperator - domain = r'(?:[{}]+(?:\.[{}]+)*)(?:(?<code>" onerror="alert(1)//</code>

A

+ +

x

diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 4a5c25a8..12d54edb 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -2,4 +2,6 @@ ![`" onerror="alert(1)//`]() -![A](B "") \ No newline at end of file +![A](B "") + +[x](javascript:alert(origin)) \ No newline at end of file From a11ce82fbb99c3f8b72711a141ee1a511a90846b Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 14 May 2026 22:24:25 +0100 Subject: [PATCH 2/9] Fix XSS from making javascript: hrefs look like domains with ports --- lib/markdown2.py | 2 +- test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 2 ++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 6683df7c..745d91f6 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1540,7 +1540,7 @@ def _safe_href(self): # html encoded colon in a URL still functions as a normal colon, so need to detect those protocol_seperators = [':', ':', ':', ':'] # dot seperated hostname, optional port number, not followed by protocol seperator - domain = r'(?:[{}]+(?:\.[{}]+)*)(?:(?A

x

+ +

x

diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 12d54edb..26edae4e 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -4,4 +4,6 @@ ![A](B "") -[x](javascript:alert(origin)) \ No newline at end of file +[x](javascript:alert(origin)) + +[x](javascript:1/alert(origin)) \ No newline at end of file From 82b4482b70a1718eef9a4d4fb2449c059949a5f0 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Thu, 14 May 2026 22:39:11 +0100 Subject: [PATCH 3/9] Fix onerror XSS in image title attr --- lib/markdown2.py | 2 ++ test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 7 +++++++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 745d91f6..4ba78a4f 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -3271,6 +3271,8 @@ def run(self, text: str): .replace('*', self.md._escape_table['*']) .replace('_', self.md._escape_table['_']) ) + if self.md.safe_mode: + title = self.md._hash_span(title) title_str = f' title="{title}"' else: title_str = '' diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html index 20e0cd4d..ccd398e7 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html @@ -7,3 +7,10 @@

x

x

+ +
    +
  • +
      +
    • onerror=alert(origin) )
    • +
  • +
diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 26edae4e..3f025a00 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -6,4 +6,7 @@ [x](javascript:alert(origin)) -[x](javascript:1/alert(origin)) \ No newline at end of file +[x](javascript:1/alert(origin)) + +- +- ![](x '`![](`') onerror=alert(origin) ) \ No newline at end of file From 456f8a97fa105b887e3c287ccdb0cc6eb53baa46 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 23 May 2026 10:36:52 +0100 Subject: [PATCH 4/9] Fix incomplete recursive unhashing of spans Issue was a while loop comparison. We did `orig != text` but assigned `orig = text` at the end of the loop, where it should have been at the start, before any transformations take place --- lib/markdown2.py | 2 +- test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 3 +++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 5 ++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index 4ba78a4f..b09fd352 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1421,13 +1421,13 @@ def _unhash_html_spans(self, text: str, spans=True, code=False) -> str: ''' orig = '' while text != orig: + orig = text if spans: for key, sanitized in list(self.html_spans.items()): text = text.replace(key, sanitized) if code: for code, key in list(self._code_table.items()): text = text.replace(key, code) - orig = text return text def _sanitize_html(self, s: str) -> str: diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html index ccd398e7..47abd2f8 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html @@ -14,3 +14,6 @@
  • onerror=alert(origin) )
  • + +

    diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 3f025a00..5b2eeb35 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -9,4 +9,7 @@ [x](javascript:1/alert(origin)) - -- ![](x '`![](`') onerror=alert(origin) ) \ No newline at end of file +- ![](x '`![](`') onerror=alert(origin) ) + +![](``) From c173c1274419bc4a8a685ea180aa002bf172c68a Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 23 May 2026 10:56:09 +0100 Subject: [PATCH 5/9] Update github actions versions --- .github/workflows/python.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml index 40ce721a..2ca433a4 100644 --- a/.github/workflows/python.yaml +++ b/.github/workflows/python.yaml @@ -15,9 +15,9 @@ jobs: - macos-latest - windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From b0dd0b3b5a078e4f3b6e2ee2d9b6c9414fba0ce4 Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 24 May 2026 09:58:54 +0100 Subject: [PATCH 6/9] Fix smuggling XSS into link def URLs --- lib/markdown2.py | 14 ++++++-------- .../xss_smuggling_spans_in_image_attrs.html | 2 ++ .../xss_smuggling_spans_in_image_attrs.text | 3 +++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/lib/markdown2.py b/lib/markdown2.py index b09fd352..ffaa0527 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1518,6 +1518,12 @@ def _protect_url(self, url: str) -> str: mime = data_url.group('mime') or '' if mime.startswith('image/') and data_url.group('token') == ';base64': charset='base64' + else: + url = ( + self._unhash_html_spans(url, code=True) + .replace('*', self._escape_table['*']) + .replace('_', self._escape_table['_']) + ) url = _html_escape_url(url, safe_mode=self.safe_mode, charset=charset) key = _hash_text(url) self._escape_table[url] = key @@ -3236,7 +3242,6 @@ def run(self, text: str): continue text, url, title, url_end_idx = parsed - url = self.md._unhash_html_spans(url, code=True) # reference anchor or reference img else: if not self.options.get('ref', True): @@ -3255,13 +3260,6 @@ def run(self, text: str): curr_pos = p continue - # -- Encode and hash the URL and title to avoid conflicts with italics/bold - - url = ( - url - .replace('*', self.md._escape_table['*']) - .replace('_', self.md._escape_table['_']) - ) if title: if self.md.safe_mode: # expose span contents for escaping - fix #691, #703 diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html index 47abd2f8..985a1545 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html @@ -17,3 +17,5 @@

    + +

    ![x](<"`"x

    diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 5b2eeb35..3693c6c5 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -13,3 +13,6 @@ ![](`
    `) + +![x](<"`"![x][id] +[id]: x "` From c7a75f60831ab7210b3b5057cffa7308aac394ff Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 13 Jun 2026 16:37:41 +0100 Subject: [PATCH 7/9] Fix HTML block hashing messing up if open/close tags not at start of line --- lib/markdown2.py | 39 ++++++++++++++----- .../malformed_html_crash_issue584.html | 6 +-- .../xss_from_incorrect_block_hashing.html | 14 +++++++ .../xss_from_incorrect_block_hashing.opts | 1 + .../xss_from_incorrect_block_hashing.text | 5 +++ 5 files changed, 53 insertions(+), 12 deletions(-) create mode 100644 test/tm-cases/xss_from_incorrect_block_hashing.html create mode 100644 test/tm-cases/xss_from_incorrect_block_hashing.opts create mode 100644 test/tm-cases/xss_from_incorrect_block_hashing.text diff --git a/lib/markdown2.py b/lib/markdown2.py index ffaa0527..50f83f26 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -1104,16 +1104,17 @@ def _strict_tag_block_sub( block += chunk if is_markup: - if chunk.startswith('%s bool: open_index = text.find(f'<{tag_name}') return open_index != -1 and close_index != -1 and open_index < close_index + def _tag_imbalance(self, tag_name: str, text: str) -> int: + ''' + Find imbalanced HTML tags in some text + + Args: + tag_name: the name of the tag (eg: "ul") + text: the text to search + + Returns: + 0 for balanced tags, positive int for more opening tags than closing, negative int for + more closing tags than opening + ''' + count = 0 + for tag in re.finditer(r'<(/)?%s\b>?' % tag_name, text): + if tag.group(1): + count -= 1 + else: + count += 1 + return count + @mark_stage(Stage.LINK_DEFS) def _strip_link_definitions(self, text: str) -> str: # Strips link definitions from text, stores the URLs and titles in diff --git a/test/tm-cases/malformed_html_crash_issue584.html b/test/tm-cases/malformed_html_crash_issue584.html index e2071f84..00f32cdb 100644 --- a/test/tm-cases/malformed_html_crash_issue584.html +++ b/test/tm-cases/malformed_html_crash_issue584.html @@ -1,3 +1,3 @@ -

    -
    +

    diff --git a/test/tm-cases/xss_from_incorrect_block_hashing.html b/test/tm-cases/xss_from_incorrect_block_hashing.html new file mode 100644 index 00000000..db2fe827 --- /dev/null +++ b/test/tm-cases/xss_from_incorrect_block_hashing.html @@ -0,0 +1,14 @@ +
      +
    • [x] +
        +
        • +
        • [x]
        • +
      1. +
    • +
    + +
    + +

    [x](")}<img src="x" onerror="alert(origin)">

    + +
    diff --git a/test/tm-cases/xss_from_incorrect_block_hashing.opts b/test/tm-cases/xss_from_incorrect_block_hashing.opts new file mode 100644 index 00000000..54de31a8 --- /dev/null +++ b/test/tm-cases/xss_from_incorrect_block_hashing.opts @@ -0,0 +1 @@ +{"safe_mode": "escape"} \ No newline at end of file diff --git a/test/tm-cases/xss_from_incorrect_block_hashing.text b/test/tm-cases/xss_from_incorrect_block_hashing.text new file mode 100644 index 00000000..d0770a4a --- /dev/null +++ b/test/tm-cases/xss_from_incorrect_block_hashing.text @@ -0,0 +1,5 @@ +- [x] + 1. - [x] +___ +[x](`")} +___ From e7b0ba18ce76d4321385735068102686f7d88def Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sun, 14 Jun 2026 16:10:07 +0100 Subject: [PATCH 8/9] Fix links being processed within autolink syntax --- lib/markdown2.py | 8 ++++++++ test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 2 ++ test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 2 ++ 3 files changed, 12 insertions(+) diff --git a/lib/markdown2.py b/lib/markdown2.py index 50f83f26..04e8934d 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -3230,6 +3230,14 @@ def run(self, text: str): link_text = self.md._hash_html_spans(link_text) link_text = self.md._unhash_html_spans(link_text) + # check that this link is not inside an autolink + if any( + autolink.start() < start_idx < p < autolink.end() + for autolink in self.md._auto_link_re.finditer(text) + ): + curr_pos = start_idx + 1 + continue + # Possibly a footnote ref? if "footnotes" in self.md.extras and link_text.startswith("^"): normed_id = re.sub(r'\W', '-', link_text[1:]) diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html index 985a1545..ed1ca655 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html @@ -18,4 +18,6 @@

    +

    http://onclick=alert(origin)//![](x)

    +

    ![x](<"`"x

    diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text index 3693c6c5..bee50136 100644 --- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text +++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text @@ -14,5 +14,7 @@ ![](``) + + ![x](<"`"![x][id] [id]: x "` From 21eb34b9fa5624f80b41a1dff9a2283fedd4958e Mon Sep 17 00:00:00 2001 From: Crozzers Date: Sat, 20 Jun 2026 16:55:47 +0100 Subject: [PATCH 9/9] Fix tag balance checkers not accounting for void tags --- lib/markdown2.py | 11 +++++++++++ .../improper_void_tag_hashing_pr705.html | 17 +++++++++++++++++ .../improper_void_tag_hashing_pr705.opts | 1 + .../improper_void_tag_hashing_pr705.text | 10 ++++++++++ 4 files changed, 39 insertions(+) create mode 100644 test/tm-cases/improper_void_tag_hashing_pr705.html create mode 100644 test/tm-cases/improper_void_tag_hashing_pr705.opts create mode 100644 test/tm-cases/improper_void_tag_hashing_pr705.text diff --git a/lib/markdown2.py b/lib/markdown2.py index 04e8934d..6518d783 100755 --- a/lib/markdown2.py +++ b/lib/markdown2.py @@ -862,6 +862,10 @@ def _detab(self, text: str) -> str: output.append(self._detab_line(line)) return '\n'.join(output) + # https://developer.mozilla.org/en-US/docs/Glossary/Void_element + # technically "self closing tags" (eg:
    ) are not real HTML but noone cares + _void_tags = 'area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr' + # I broke out the html5 tags here and add them to _block_tags_a and # _block_tags_b. This way html5 tags are easy to keep track of. _html5tags = '|address|article|aside|canvas|figcaption|figure|footer|header|main|nav|section|video' @@ -906,6 +910,7 @@ def _detab(self, text: str) -> str: _html_markdown_attr_re = re.compile( # markdown attr, with optional assignment to true, must be followed by whitespace/boundary/closing tag chars r'''\s+markdown(?:="1"|='1'|=1)?(?![^\s/>\b])''') + def _hash_html_block_sub( self, match: Union[re.Match[str], str], @@ -1128,6 +1133,9 @@ def _strict_tag_block_sub( return result def _tag_is_closed(self, tag_name: str, text: str) -> bool: + if re.match(self._void_tags, tag_name): + return True + # check if number of open tags == number of close tags if len(re.findall('<%s(?:.*?)>' % tag_name, text)) != text.count('' % tag_name): return False @@ -1149,6 +1157,9 @@ def _tag_imbalance(self, tag_name: str, text: str) -> int: 0 for balanced tags, positive int for more opening tags than closing, negative int for more closing tags than opening ''' + if re.match(self._void_tags, tag_name): + return 0 + count = 0 for tag in re.finditer(r'<(/)?%s\b>?' % tag_name, text): if tag.group(1): diff --git a/test/tm-cases/improper_void_tag_hashing_pr705.html b/test/tm-cases/improper_void_tag_hashing_pr705.html new file mode 100644 index 00000000..2da86384 --- /dev/null +++ b/test/tm-cases/improper_void_tag_hashing_pr705.html @@ -0,0 +1,17 @@ +
    + +
      +
    • ```

      + +
        +
      • ```
      • +
      + +

      x

      + +

      ```

    • +
    + +

    ) <script>alert(origin)</script>

    + +

    "

    diff --git a/test/tm-cases/improper_void_tag_hashing_pr705.opts b/test/tm-cases/improper_void_tag_hashing_pr705.opts new file mode 100644 index 00000000..ad487c04 --- /dev/null +++ b/test/tm-cases/improper_void_tag_hashing_pr705.opts @@ -0,0 +1 @@ +{"safe_mode": "escape"} diff --git a/test/tm-cases/improper_void_tag_hashing_pr705.text b/test/tm-cases/improper_void_tag_hashing_pr705.text new file mode 100644 index 00000000..223862cf --- /dev/null +++ b/test/tm-cases/improper_void_tag_hashing_pr705.text @@ -0,0 +1,10 @@ +--- +* ``` + * ``` + + x +``` +--- +```) ``` +" +---