From 5fb47cda9748b646a5360fb71e186e6137f1f8a9 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Thu, 21 May 2026 18:40:15 +0200 Subject: [PATCH 1/3] Improve accuracy of supports_string_literal_concatenation_with_newline --- src/parser/mod.rs | 9 +++++++++ tests/sqlparser_common.rs | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3c6185193..05aa9dd1f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12184,6 +12184,15 @@ impl<'a> Parser<'a> { after_newline = true; self.next_token_no_skip(); } + // Tokenizer includes the newline in the single line comment + // so we need to check for it specifically here, otherwise the newline will + // not be consumed as a separate token. + Token::Whitespace(Whitespace::SingleLineComment { comment, .. }) => { + if comment.ends_with('\n') { + after_newline = true; + } + self.next_token_no_skip(); + } Token::Whitespace(_) => { self.next_token_no_skip(); } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f470b93ca..c248154b5 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18356,6 +18356,14 @@ fn parse_adjacent_string_literal_concatenation() { -- COMMENT 'd' )"#; + dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')"); + + let sql = r#" + SELECT 'abc' in ('a' + 'b' -- COMMENT + 'c', + 'd' + )"#; dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')"); } From d2c59400d36e51dfe81f19607a2ae1a8949608a1 Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Thu, 28 May 2026 11:15:05 +0200 Subject: [PATCH 2/3] Single Line Comments: do not include the trailing newline as part of the comment text --- src/ast/comments.rs | 4 ++-- src/ast/mod.rs | 3 ++- src/parser/mod.rs | 9 --------- src/tokenizer.rs | 31 +++++++++++++++---------------- tests/sqlparser_comments.rs | 4 ++-- tests/sqlparser_oracle.rs | 7 ++----- tests/sqlparser_snowflake.rs | 6 ++++-- 7 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/ast/comments.rs b/src/ast/comments.rs index d48e4f5be..a0c25ad16 100644 --- a/src/ast/comments.rs +++ b/src/ast/comments.rs @@ -71,7 +71,7 @@ impl Comments { /// // all comments appearing before line seven, i.e. before the first statement itself /// assert_eq!( /// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::>(), - /// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]); + /// &["\n header comment ...\n ... spanning multiple lines\n", " first statement"]); /// /// // all comments appearing within the first statement /// assert_eq!( @@ -81,7 +81,7 @@ impl Comments { /// // all comments appearing within or after the first statement /// assert_eq!( /// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::>(), - /// &[" world ", " second statement\n", " trailing comment\n"]); + /// &[" world ", " second statement", " trailing comment"]); /// ``` /// /// The [Spanned](crate::ast::Spanned) trait allows you to access location diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d737cdb3d..e494553ce 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -12085,7 +12085,8 @@ impl fmt::Display for OptimizerHint { f.write_str(prefix)?; f.write_str(&self.prefix)?; f.write_str("+")?; - f.write_str(&self.text) + f.write_str(&self.text)?; + f.write_str("\n") } OptimizerHintStyle::MultiLine => { f.write_str("/*")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 05aa9dd1f..3c6185193 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12184,15 +12184,6 @@ impl<'a> Parser<'a> { after_newline = true; self.next_token_no_skip(); } - // Tokenizer includes the newline in the single line comment - // so we need to check for it specifically here, otherwise the newline will - // not be consumed as a separate token. - Token::Whitespace(Whitespace::SingleLineComment { comment, .. }) => { - if comment.ends_with('\n') { - after_newline = true; - } - self.next_token_no_skip(); - } Token::Whitespace(_) => { self.next_token_no_skip(); } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d9f131f8f..4c3668f8d 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -521,7 +521,7 @@ impl fmt::Display for Whitespace { Whitespace::Space => f.write_str(" "), Whitespace::Newline => f.write_str("\n"), Whitespace::Tab => f.write_str("\t"), - Whitespace::SingleLineComment { prefix, comment } => write!(f, "{prefix}{comment}"), + Whitespace::SingleLineComment { prefix, comment } => writeln!(f, "{prefix}{comment}"), Whitespace::MultiLineComment(s) => write!(f, "/*{s}*/"), } } @@ -2037,18 +2037,11 @@ impl<'a> Tokenizer<'a> { // Consume characters until newline fn tokenize_single_line_comment(&self, chars: &mut State) -> String { - let mut comment = peeking_take_while(chars, |ch| match ch { + peeking_take_while(chars, |ch| match ch { '\n' => false, // Always stop at \n '\r' if dialect_of!(self is PostgreSqlDialect) => false, // Stop at \r for Postgres _ => true, // Keep consuming for other characters - }); - - if let Some(ch) = chars.next() { - assert!(ch == '\n' || ch == '\r'); - comment.push(ch); - } - - comment + }) } /// Tokenize an identifier or keyword, after the first char is already consumed. @@ -3346,8 +3339,9 @@ mod tests { Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), - comment: "this is a comment\n".to_string(), + comment: "this is a comment".to_string(), }), + Token::Whitespace(Whitespace::Newline), Token::Number("1".to_string(), false), ], ), @@ -3367,8 +3361,9 @@ mod tests { Token::Number("0".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), - comment: "this is a comment\r\n".to_string(), + comment: "this is a comment\r".to_string(), }), + Token::Whitespace(Whitespace::Newline), Token::Number("1".to_string(), false), ], ), @@ -3392,8 +3387,9 @@ mod tests { Token::Number("1".to_string(), false), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), - comment: "\r".to_string(), + comment: "".to_string(), }), + Token::Whitespace(Whitespace::Newline), // Postgres treats \r as newline in single-line comments Token::Number("0".to_string(), false), ]; compare(expected, tokens); @@ -4220,16 +4216,19 @@ mod tests { vec![ Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), - comment: "\n".to_string(), + comment: "".to_string(), }), + Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), - comment: " Table structure for table...\n".to_string(), + comment: " Table structure for table...".to_string(), }), + Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::SingleLineComment { prefix: "--".to_string(), - comment: "\n".to_string(), + comment: "".to_string(), }), + Token::Whitespace(Whitespace::Newline), ], ); } diff --git a/tests/sqlparser_comments.rs b/tests/sqlparser_comments.rs index 34442ca3e..8b4164387 100644 --- a/tests/sqlparser_comments.rs +++ b/tests/sqlparser_comments.rs @@ -50,10 +50,10 @@ more*/ vec![ CommentWithSpan { comment: Comment::SingleLine { - content: " second line comment\n".into(), + content: " second line comment".into(), prefix: "--".into() }, - span: Span::new((2, 1).into(), (3, 1).into()), + span: Span::new((2, 1).into(), (2, 23).into()), }, CommentWithSpan { comment: Comment::MultiLine(" inline comment after `from` ".into()), diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 35f083111..888778e23 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -364,10 +364,7 @@ fn test_optimizer_hints() { "SELECT --+ one two three /* asdf */\n 1 FROM dual", ); assert_eq!(select.optimizer_hints.len(), 1); - assert_eq!( - select.optimizer_hints[0].text, - " one two three /* asdf */\n" - ); + assert_eq!(select.optimizer_hints[0].text, " one two three /* asdf */"); assert_eq!(select.optimizer_hints[0].prefix, ""); // inserts @@ -396,7 +393,7 @@ fn test_optimizer_hints() { ); assert_eq!(select.optimizer_hints.len(), 1); assert_eq!(select.optimizer_hints[0].prefix, "abc"); - assert_eq!(select.optimizer_hints[0].text, " text\n"); + assert_eq!(select.optimizer_hints[0].text, " text"); } #[test] diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 8cd6a3fcf..0000b0a3d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -596,8 +596,9 @@ fn test_snowflake_single_line_tokenize() { Token::make_keyword("TABLE"), Token::Whitespace(Whitespace::SingleLineComment { prefix: "#".to_string(), - comment: " this is a comment \n".to_string(), + comment: " this is a comment ".to_string(), }), + Token::Whitespace(Whitespace::Newline), Token::make_word("table_1", None), ]; @@ -613,8 +614,9 @@ fn test_snowflake_single_line_tokenize() { Token::Whitespace(Whitespace::Space), Token::Whitespace(Whitespace::SingleLineComment { prefix: "//".to_string(), - comment: " this is a comment \n".to_string(), + comment: " this is a comment ".to_string(), }), + Token::Whitespace(Whitespace::Newline), Token::make_word("table_1", None), ]; From 9e8055212b2e95c0275bf04ece11a24aa160a8cf Mon Sep 17 00:00:00 2001 From: Yoav Cohen Date: Thu, 28 May 2026 15:08:42 +0200 Subject: [PATCH 3/3] Code format --- tests/sqlparser_common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c248154b5..c2d298d04 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18358,7 +18358,7 @@ fn parse_adjacent_string_literal_concatenation() { )"#; dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')"); - let sql = r#" + let sql = r#" SELECT 'abc' in ('a' 'b' -- COMMENT 'c',