Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ Bug Fixes

* Fix statement splitting (issue845).
* Fix a late-binding closure bug in `TokenList.token_not_matching`.
* Preserve trailing whitespace after the final ``;`` so that
``str(parse(sql)) == sql`` holds for input ending in a newline.


Release 0.5.5 (Dec 19, 2025)
Expand Down
26 changes: 25 additions & 1 deletion sqlparse/engine/statement_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,27 @@ def process(self, stream):
"""Process the stream"""
EOS_TTYPE = T.Whitespace, T.Comment.Single

# A finished statement is held back for one segment instead of being
# yielded immediately. This lets whitespace that turns out to trail the
# whole input be reattached to the statement it follows, rather than
# being split off into a dangling all-whitespace buffer that is dropped
# at end of stream (which silently broke ``str(parse(sql)) == sql`` for
# any input ending in a newline after ``;``).
held_tokens = None

# Run over all stream tokens
for ttype, value in stream:
# Yield token if we finished a statement and there's no whitespaces
# It will count newline token as a non whitespace. In this context
# whitespace ignores newlines.
# why don't multi line comments also count?
if self.consume_ws and ttype not in EOS_TTYPE:
yield sql.Statement(self.tokens)
# A new statement starts here, so the previously held one is
# now known to be complete (its trailing whitespace, if any,
# already leads this new statement) and can be emitted.
if held_tokens is not None:
yield sql.Statement(held_tokens)
held_tokens = self.tokens

# Reset filter and prepare to process next statement
self._reset()
Expand Down Expand Up @@ -191,6 +204,17 @@ def process(self, stream):
# token but not for BEGIN itself (which just set the flag)
self._seen_begin = False

# Flush the held statement and whatever remains. Any trailing tokens
# left in ``self.tokens`` after the last statement was completed are
# pure whitespace (the split was armed by ``consume_ws``); reattach
# them to that statement so the exact input is preserved on join,
# instead of dropping them as a dangling all-whitespace buffer.
if held_tokens is not None:
if self.tokens and all(t.is_whitespace for t in self.tokens):
held_tokens = held_tokens + self.tokens
self.tokens = []
yield sql.Statement(held_tokens)

# Yield pending statement (if any)
if self.tokens and not all(t.is_whitespace for t in self.tokens):
yield sql.Statement(self.tokens)
15 changes: 15 additions & 0 deletions tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,21 @@ def test_split_ignores_empty_newlines():
assert stmts[1] == 'select bar;'


@pytest.mark.parametrize('s', ['select 1;\n',
'select 1;\r\n',
'select 1;\n\n',
'select 1;\n ',
'select 1; \n',
'select 1;\nselect 2;\n',
';\n'])
def test_split_preserves_trailing_whitespace(s):
# parse() must be lossless: whitespace following the final ';' was being
# split into a dangling all-whitespace statement and dropped, so joining
# the parsed statements no longer reproduced the input whenever it ended
# in a newline after ';' (trailing spaces alone were already preserved).
assert ''.join(str(stmt) for stmt in sqlparse.parse(s)) == s


def test_split_quotes_with_new_line():
stmts = sqlparse.split('select "foo\nbar"')
assert len(stmts) == 1
Expand Down