Date: Sat, 13 Jun 2026 16:37:41 +0100
Subject: [PATCH 7/9] Fix HTML block hashing messing up if open/close tags not
at start of line
---
lib/markdown2.py | 39 ++++++++++++++-----
.../malformed_html_crash_issue584.html | 6 +--
.../xss_from_incorrect_block_hashing.html | 14 +++++++
.../xss_from_incorrect_block_hashing.opts | 1 +
.../xss_from_incorrect_block_hashing.text | 5 +++
5 files changed, 53 insertions(+), 12 deletions(-)
create mode 100644 test/tm-cases/xss_from_incorrect_block_hashing.html
create mode 100644 test/tm-cases/xss_from_incorrect_block_hashing.opts
create mode 100644 test/tm-cases/xss_from_incorrect_block_hashing.text
diff --git a/lib/markdown2.py b/lib/markdown2.py
index ffaa0527..50f83f26 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1104,16 +1104,17 @@ def _strict_tag_block_sub(
block += chunk
if is_markup:
- if chunk.startswith('%s' % is_markup.group(1)):
- tag_count -= 1
+ if self._tag_is_closed(is_markup.group(3), chunk):
+ # if close tag is in same line we must ignore these
+ is_markup = None
else:
- # if close tag is in same line
- if self._tag_is_closed(is_markup.group(3), chunk):
- # we must ignore these
- is_markup = None
- else:
- tag_count += 1
- current_tag = is_markup.group(3)
+ # add up all the open/close tags possibly in the same line and add that to the total
+ current_tag = is_markup.group(3)
+ tag_count += self._tag_imbalance(current_tag, chunk)
+ elif current_tag != html_tags_re and current_tag in chunk:
+ # if we're looking for a specific tag then check for any opens/closes later on in the
+ # line that may throw off our count
+ tag_count += self._tag_imbalance(current_tag, chunk)
if tag_count == 0:
if is_markup:
@@ -1136,6 +1137,26 @@ def _tag_is_closed(self, tag_name: str, text: str) -> bool:
open_index = text.find(f'<{tag_name}')
return open_index != -1 and close_index != -1 and open_index < close_index
+ def _tag_imbalance(self, tag_name: str, text: str) -> int:
+ '''
+ Find imbalanced HTML tags in some text
+
+ Args:
+ tag_name: the name of the tag (eg: "ul")
+ text: the text to search
+
+ Returns:
+ 0 for balanced tags, positive int for more opening tags than closing, negative int for
+ more closing tags than opening
+ '''
+ count = 0
+ for tag in re.finditer(r'<(/)?%s\b>?' % tag_name, text):
+ if tag.group(1):
+ count -= 1
+ else:
+ count += 1
+ return count
+
@mark_stage(Stage.LINK_DEFS)
def _strip_link_definitions(self, text: str) -> str:
# Strips link definitions from text, stores the URLs and titles in
diff --git a/test/tm-cases/malformed_html_crash_issue584.html b/test/tm-cases/malformed_html_crash_issue584.html
index e2071f84..00f32cdb 100644
--- a/test/tm-cases/malformed_html_crash_issue584.html
+++ b/test/tm-cases/malformed_html_crash_issue584.html
@@ -1,3 +1,3 @@
-
-
+
diff --git a/test/tm-cases/xss_from_incorrect_block_hashing.html b/test/tm-cases/xss_from_incorrect_block_hashing.html
new file mode 100644
index 00000000..db2fe827
--- /dev/null
+++ b/test/tm-cases/xss_from_incorrect_block_hashing.html
@@ -0,0 +1,14 @@
+
+
+
+
+[x](")}<img src="x" onerror="alert(origin)">
+
+
diff --git a/test/tm-cases/xss_from_incorrect_block_hashing.opts b/test/tm-cases/xss_from_incorrect_block_hashing.opts
new file mode 100644
index 00000000..54de31a8
--- /dev/null
+++ b/test/tm-cases/xss_from_incorrect_block_hashing.opts
@@ -0,0 +1 @@
+{"safe_mode": "escape"}
\ No newline at end of file
diff --git a/test/tm-cases/xss_from_incorrect_block_hashing.text b/test/tm-cases/xss_from_incorrect_block_hashing.text
new file mode 100644
index 00000000..d0770a4a
--- /dev/null
+++ b/test/tm-cases/xss_from_incorrect_block_hashing.text
@@ -0,0 +1,5 @@
+- [x]
+ 1. - [x]
+___
+[x](`")}
+___
From e7b0ba18ce76d4321385735068102686f7d88def Mon Sep 17 00:00:00 2001
From: Crozzers
Date: Sun, 14 Jun 2026 16:10:07 +0100
Subject: [PATCH 8/9] Fix links being processed within autolink syntax
---
lib/markdown2.py | 8 ++++++++
test/tm-cases/xss_smuggling_spans_in_image_attrs.html | 2 ++
test/tm-cases/xss_smuggling_spans_in_image_attrs.text | 2 ++
3 files changed, 12 insertions(+)
diff --git a/lib/markdown2.py b/lib/markdown2.py
index 50f83f26..04e8934d 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -3230,6 +3230,14 @@ def run(self, text: str):
link_text = self.md._hash_html_spans(link_text)
link_text = self.md._unhash_html_spans(link_text)
+ # check that this link is not inside an autolink
+ if any(
+ autolink.start() < start_idx < p < autolink.end()
+ for autolink in self.md._auto_link_re.finditer(text)
+ ):
+ curr_pos = start_idx + 1
+ continue
+
# Possibly a footnote ref?
if "footnotes" in self.md.extras and link_text.startswith("^"):
normed_id = re.sub(r'\W', '-', link_text[1:])
diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html
index 985a1545..ed1ca655 100644
--- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.html
+++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.html
@@ -18,4 +18,6 @@
"></code)
+http://onclick=alert(origin)//
+
">`)
diff --git a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text
index 3693c6c5..bee50136 100644
--- a/test/tm-cases/xss_smuggling_spans_in_image_attrs.text
+++ b/test/tm-cases/xss_smuggling_spans_in_image_attrs.text
@@ -14,5 +14,7 @@

+
+

create mode 100644 test/tm-cases/improper_void_tag_hashing_pr705.html
create mode 100644 test/tm-cases/improper_void_tag_hashing_pr705.opts
create mode 100644 test/tm-cases/improper_void_tag_hashing_pr705.text
diff --git a/lib/markdown2.py b/lib/markdown2.py
index 04e8934d..6518d783 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -862,6 +862,10 @@ def _detab(self, text: str) -> str:
output.append(self._detab_line(line))
return '\n'.join(output)
+ # https://developer.mozilla.org/en-US/docs/Glossary/Void_element
+ # technically "self closing tags" (eg:
) are not real HTML but noone cares
+ _void_tags = 'area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'
+
# I broke out the html5 tags here and add them to _block_tags_a and
# _block_tags_b. This way html5 tags are easy to keep track of.
_html5tags = '|address|article|aside|canvas|figcaption|figure|footer|header|main|nav|section|video'
@@ -906,6 +910,7 @@ def _detab(self, text: str) -> str:
_html_markdown_attr_re = re.compile(
# markdown attr, with optional assignment to true, must be followed by whitespace/boundary/closing tag chars
r'''\s+markdown(?:="1"|='1'|=1)?(?![^\s/>\b])''')
+
def _hash_html_block_sub(
self,
match: Union[re.Match[str], str],
@@ -1128,6 +1133,9 @@ def _strict_tag_block_sub(
return result
def _tag_is_closed(self, tag_name: str, text: str) -> bool:
+ if re.match(self._void_tags, tag_name):
+ return True
+
# check if number of open tags == number of close tags
if len(re.findall('<%s(?:.*?)>' % tag_name, text)) != text.count('%s>' % tag_name):
return False
@@ -1149,6 +1157,9 @@ def _tag_imbalance(self, tag_name: str, text: str) -> int:
0 for balanced tags, positive int for more opening tags than closing, negative int for
more closing tags than opening
'''
+ if re.match(self._void_tags, tag_name):
+ return 0
+
count = 0
for tag in re.finditer(r'<(/)?%s\b>?' % tag_name, text):
if tag.group(1):
diff --git a/test/tm-cases/improper_void_tag_hashing_pr705.html b/test/tm-cases/improper_void_tag_hashing_pr705.html
new file mode 100644
index 00000000..2da86384
--- /dev/null
+++ b/test/tm-cases/improper_void_tag_hashing_pr705.html
@@ -0,0 +1,17 @@
+
+
+
+
+) <script>alert(origin)</script>
+
+"
diff --git a/test/tm-cases/improper_void_tag_hashing_pr705.opts b/test/tm-cases/improper_void_tag_hashing_pr705.opts
new file mode 100644
index 00000000..ad487c04
--- /dev/null
+++ b/test/tm-cases/improper_void_tag_hashing_pr705.opts
@@ -0,0 +1 @@
+{"safe_mode": "escape"}
diff --git a/test/tm-cases/improper_void_tag_hashing_pr705.text b/test/tm-cases/improper_void_tag_hashing_pr705.text
new file mode 100644
index 00000000..223862cf
--- /dev/null
+++ b/test/tm-cases/improper_void_tag_hashing_pr705.text
@@ -0,0 +1,10 @@
+---
+* ```
+ * ```
+
+ x
+```
+---
+```) ```
+"
+---