Skip to content

Commit c75f11b

Browse files
Extended placeholder syntax test and moved check in tokenizer
1 parent 52338d6 commit c75f11b

File tree

3 files changed

+29
-4
lines changed

3 files changed

+29
-4
lines changed

src/parser/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18475,7 +18475,7 @@ mod tests {
1847518475

1847618476
#[test]
1847718477
fn test_placeholder_invalid_whitespace() {
18478-
for w in [" ", "/*invalid*/"] {
18478+
for w in [" ", " ", "/*invalid*/", "\n", "\t", "\r\n", "--comment\n"] {
1847918479
let sql = format!("\nSELECT\n :{w}fooBar");
1848018480
assert!(Parser::parse_sql(&GenericDialect, &sql).is_err());
1848118481
}

src/tokenizer.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,22 @@ impl<'a> Tokenizer<'a> {
908908
Ok(Some(Token::make_word(&word, None)))
909909
}
910910

911+
/// Returns a standardized error if the previous token is a `:` and
912+
/// the method is expected to be called when a space is found after it.
913+
fn handle_colon_space_error(
914+
&self,
915+
chars: &State,
916+
prev_token: Option<&Token>,
917+
) -> Result<Option<Token>, TokenizerError> {
918+
if let Some(Token::Colon) = prev_token {
919+
return Err(TokenizerError {
920+
message: "Unexpected whitespace after ':'; did you mean ':placeholder' or '::'?".to_string(),
921+
location: chars.location(),
922+
});
923+
}
924+
Ok(None)
925+
}
926+
911927
/// Get the next token or return None
912928
fn next_token(
913929
&self,
@@ -919,6 +935,7 @@ impl<'a> Tokenizer<'a> {
919935
match chars.peek() {
920936
Some(&ch) => match ch {
921937
' ' | '\t' | '\n' | '\r' => {
938+
self.handle_colon_space_error(chars, prev_token)?;
922939
chars.next(); // consume
923940
*location = chars.location();
924941
self.next_token(location, chars, prev_token, true)
@@ -1166,7 +1183,7 @@ impl<'a> Tokenizer<'a> {
11661183
// if the prev token is not a word, then this is not a valid sql
11671184
// word or number.
11681185
if ch == '.' && chars.peekable.clone().nth(1) == Some('_') {
1169-
if let Some(Token::Word(_)) = prev_token {
1186+
if !preceded_by_whitespace {
11701187
chars.next();
11711188
return Ok(Some(Token::Period));
11721189
}
@@ -1210,7 +1227,7 @@ impl<'a> Tokenizer<'a> {
12101227
// we should yield the dot as a dedicated token so compound identifiers
12111228
// starting with digits can be parsed correctly.
12121229
if s == "." && self.dialect.supports_numeric_prefix() {
1213-
if let Some(Token::Word(_)) = prev_token {
1230+
if !preceded_by_whitespace {
12141231
return Ok(Some(Token::Period));
12151232
}
12161233
}
@@ -1300,6 +1317,7 @@ impl<'a> Tokenizer<'a> {
13001317
}
13011318

13021319
if is_comment {
1320+
self.handle_colon_space_error(chars, prev_token)?;
13031321
chars.next(); // consume second '-'
13041322
// Consume the rest of the line as comment
13051323
let _comment = self.tokenize_single_line_comment(chars);
@@ -1324,12 +1342,14 @@ impl<'a> Tokenizer<'a> {
13241342
chars.next(); // consume the '/'
13251343
match chars.peek() {
13261344
Some('*') => {
1345+
self.handle_colon_space_error(chars, prev_token)?;
13271346
chars.next(); // consume the '*', starting a multi-line comment
13281347
let _comment = self.consume_multiline_comment(chars)?;
13291348
*location = chars.location();
13301349
self.next_token(location, chars, prev_token, true)
13311350
}
13321351
Some('/') if dialect_of!(self is SnowflakeDialect) => {
1352+
self.handle_colon_space_error(chars, prev_token)?;
13331353
chars.next(); // consume the second '/', starting a snowflake single-line comment
13341354
// Consume the rest of the line as comment
13351355
let _comment = self.tokenize_single_line_comment(chars);
@@ -1534,6 +1554,7 @@ impl<'a> Tokenizer<'a> {
15341554
'}' => self.consume_and_return(chars, Token::RBrace),
15351555
'#' if dialect_of!(self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect) =>
15361556
{
1557+
self.handle_colon_space_error(chars, prev_token)?;
15371558
chars.next(); // consume the '#', starting a snowflake single-line comment
15381559
// Consume the rest of the line as comment
15391560
let _comment = self.tokenize_single_line_comment(chars);
@@ -1668,6 +1689,7 @@ impl<'a> Tokenizer<'a> {
16681689

16691690
// whitespace check (including unicode chars) should be last as it covers some of the chars above
16701691
ch if ch.is_whitespace() => {
1692+
self.handle_colon_space_error(chars, prev_token)?;
16711693
chars.next(); // consume
16721694
*location = chars.location();
16731695
self.next_token(location, chars, prev_token, true)

tests/sqlparser_bigquery.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1567,7 +1567,10 @@ fn parse_table_identifiers() {
15671567

15681568
fn test_table_ident_err(ident: &str) {
15691569
let sql = format!("SELECT 1 FROM {ident}");
1570-
assert!(bigquery().parse_sql_statements(&sql).is_err());
1570+
assert!(
1571+
bigquery().parse_sql_statements(&sql).is_err(),
1572+
"Expected error parsing identifier: `{ident}`, within SQL: `{sql}`"
1573+
);
15711574
}
15721575

15731576
test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]);

0 commit comments

Comments
 (0)