@@ -908,6 +908,22 @@ impl<'a> Tokenizer<'a> {
908908 Ok ( Some ( Token :: make_word ( & word, None ) ) )
909909 }
910910
911+ /// Returns a standardized error if the previous token is a `:` and
912+ /// the method is expected to be called when a space is found after it.
913+ fn handle_colon_space_error (
914+ & self ,
915+ chars : & State ,
916+ prev_token : Option < & Token > ,
917+ ) -> Result < Option < Token > , TokenizerError > {
918+ if let Some ( Token :: Colon ) = prev_token {
919+ return Err ( TokenizerError {
920+ message : "Unexpected whitespace after ':'; did you mean ':placeholder' or '::'?" . to_string ( ) ,
921+ location : chars. location ( ) ,
922+ } ) ;
923+ }
924+ Ok ( None )
925+ }
926+
911927 /// Get the next token or return None
912928 fn next_token (
913929 & self ,
@@ -919,6 +935,7 @@ impl<'a> Tokenizer<'a> {
919935 match chars. peek ( ) {
920936 Some ( & ch) => match ch {
921937 ' ' | '\t' | '\n' | '\r' => {
938+ self . handle_colon_space_error ( chars, prev_token) ?;
922939 chars. next ( ) ; // consume
923940 * location = chars. location ( ) ;
924941 self . next_token ( location, chars, prev_token, true )
@@ -1166,7 +1183,7 @@ impl<'a> Tokenizer<'a> {
11661183 // if the prev token is not a word, then this is not a valid sql
11671184 // word or number.
11681185 if ch == '.' && chars. peekable . clone ( ) . nth ( 1 ) == Some ( '_' ) {
1169- if let Some ( Token :: Word ( _ ) ) = prev_token {
1186+ if !preceded_by_whitespace {
11701187 chars. next ( ) ;
11711188 return Ok ( Some ( Token :: Period ) ) ;
11721189 }
@@ -1210,7 +1227,7 @@ impl<'a> Tokenizer<'a> {
12101227 // we should yield the dot as a dedicated token so compound identifiers
12111228 // starting with digits can be parsed correctly.
12121229 if s == "." && self . dialect . supports_numeric_prefix ( ) {
1213- if let Some ( Token :: Word ( _ ) ) = prev_token {
1230+ if !preceded_by_whitespace {
12141231 return Ok ( Some ( Token :: Period ) ) ;
12151232 }
12161233 }
@@ -1300,6 +1317,7 @@ impl<'a> Tokenizer<'a> {
13001317 }
13011318
13021319 if is_comment {
1320+ self . handle_colon_space_error ( chars, prev_token) ?;
13031321 chars. next ( ) ; // consume second '-'
13041322 // Consume the rest of the line as comment
13051323 let _comment = self . tokenize_single_line_comment ( chars) ;
@@ -1324,12 +1342,14 @@ impl<'a> Tokenizer<'a> {
13241342 chars. next ( ) ; // consume the '/'
13251343 match chars. peek ( ) {
13261344 Some ( '*' ) => {
1345+ self . handle_colon_space_error ( chars, prev_token) ?;
13271346 chars. next ( ) ; // consume the '*', starting a multi-line comment
13281347 let _comment = self . consume_multiline_comment ( chars) ?;
13291348 * location = chars. location ( ) ;
13301349 self . next_token ( location, chars, prev_token, true )
13311350 }
13321351 Some ( '/' ) if dialect_of ! ( self is SnowflakeDialect ) => {
1352+ self . handle_colon_space_error ( chars, prev_token) ?;
13331353 chars. next ( ) ; // consume the second '/', starting a snowflake single-line comment
13341354 // Consume the rest of the line as comment
13351355 let _comment = self . tokenize_single_line_comment ( chars) ;
@@ -1534,6 +1554,7 @@ impl<'a> Tokenizer<'a> {
15341554 '}' => self . consume_and_return ( chars, Token :: RBrace ) ,
15351555 '#' if dialect_of ! ( self is SnowflakeDialect | BigQueryDialect | MySqlDialect | HiveDialect ) =>
15361556 {
1557+ self . handle_colon_space_error ( chars, prev_token) ?;
15371558 chars. next ( ) ; // consume the '#', starting a snowflake single-line comment
15381559 // Consume the rest of the line as comment
15391560 let _comment = self . tokenize_single_line_comment ( chars) ;
@@ -1668,6 +1689,7 @@ impl<'a> Tokenizer<'a> {
16681689
16691690 // whitespace check (including unicode chars) should be last as it covers some of the chars above
16701691 ch if ch. is_whitespace ( ) => {
1692+ self . handle_colon_space_error ( chars, prev_token) ?;
16711693 chars. next ( ) ; // consume
16721694 * location = chars. location ( ) ;
16731695 self . next_token ( location, chars, prev_token, true )
0 commit comments