diff --git a/gcc/rust/parse/rust-parse-impl-attribute.hxx b/gcc/rust/parse/rust-parse-impl-attribute.hxx new file mode 100644 index 00000000000..127470764e2 --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl-attribute.hxx @@ -0,0 +1,390 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl*.h + * This is also the reason why there are no include guards. */ + +#include "rust-parse.h" +#include "rust-parse-error.h" +#include "rust-attribute-values.h" +#include "expected.h" + +namespace Rust { + +// Parse a inner or outer doc comment into an doc attribute +template +Parse::AttributeBody +Parser::parse_doc_comment () +{ + const_TokenPtr token = lexer.peek_token (); + location_t locus = token->get_locus (); + AST::SimplePathSegment segment (Values::Attributes::DOC, locus); + std::vector segments; + segments.push_back (std::move (segment)); + AST::SimplePath attr_path (std::move (segments), false, locus); + AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING, + PrimitiveCoreType::CORETYPE_STR, {}, locus); + std::unique_ptr attr_input ( + new AST::AttrInputLiteral (std::move (lit_expr))); + lexer.skip_token (); + + return Parse::AttributeBody{std::move (attr_path), std::move (attr_input), + locus}; +} + +// Parse a single inner attribute. +template +tl::expected +Parser::parse_inner_attribute () +{ + if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + { + auto body = parse_doc_comment (); + return AST::Attribute (std::move (body.path), std::move (body.input), + body.locus, true); + } + + rust_assert (lexer.peek_token ()->get_id () == HASH); + + lexer.skip_token (); + + if (lexer.peek_token ()->get_id () != EXCLAM) + { + Error error (lexer.peek_token ()->get_locus (), + "expected % or %<[%> for inner attribute"); + add_error (std::move (error)); + + return Parse::Error::Attribute::make_malformed (); + } + lexer.skip_token (); + + if (!skip_token (LEFT_SQUARE)) + return Parse::Error::Attribute::make_malformed (); + + auto body_res = parse_attribute_body (); + if (!body_res) + return Parse::Error::Attribute::make_malformed (); + auto body = std::move (body_res.value ()); + + auto actual_attribute + = AST::Attribute (std::move (body.path), std::move (body.input), body.locus, + true); + + if (!skip_token (RIGHT_SQUARE)) + return Parse::Error::Attribute::make_malformed (); + + return actual_attribute; +} + +// Parse a single outer attribute. +template +tl::expected +Parser::parse_outer_attribute () +{ + if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT) + { + auto body = parse_doc_comment (); + return AST::Attribute (std::move (body.path), std::move (body.input), + body.locus, false); + } + + if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + { + Error error ( + lexer.peek_token ()->get_locus (), ErrorCode::E0753, + "expected outer doc comment, inner doc (% or %) only " + "allowed at start of item " + "and before any outer attribute or doc (%<#[%>, % or %)"); + add_error (std::move (error)); + lexer.skip_token (); + return Parse::Error::Attribute::make_unexpected_inner (); + } + + /* OuterAttribute -> '#' '[' Attr ']' */ + + if (lexer.peek_token ()->get_id () != HASH) + return Parse::Error::Attribute::make_malformed (); + + lexer.skip_token (); + + TokenId id = lexer.peek_token ()->get_id (); + if (id != LEFT_SQUARE) + { + if (id == EXCLAM) + { + // this is inner attribute syntax, so throw error + // inner attributes were either already parsed or not allowed here. + Error error ( + lexer.peek_token ()->get_locus (), + "token % found, indicating inner attribute definition. Inner " + "attributes are not possible at this location"); + add_error (std::move (error)); + } + return Parse::Error::Attribute::make_unexpected_inner (); + } + + lexer.skip_token (); + + auto body_res = parse_attribute_body (); + if (!body_res) + return Parse::Error::Attribute::make_malformed_body (); + auto body = std::move (body_res.value ()); + + auto actual_attribute + = AST::Attribute (std::move (body.path), std::move (body.input), body.locus, + false); + + if (lexer.peek_token ()->get_id () != RIGHT_SQUARE) + return Parse::Error::Attribute::make_malformed (); + + lexer.skip_token (); + + return actual_attribute; +} + +// Parses the body of an attribute (inner or outer). +template +tl::expected +Parser::parse_attribute_body () +{ + location_t locus = lexer.peek_token ()->get_locus (); + + auto attr_path = parse_simple_path (); + // ensure path is valid to parse attribute input + if (!attr_path) + { + Error error (lexer.peek_token ()->get_locus (), + "empty simple path in attribute"); + add_error (std::move (error)); + + // Skip past potential further info in attribute (i.e. attr_input) + skip_after_end_attribute (); + return Parse::Error::AttributeBody::make_invalid_path (); + } + + auto attr_input = parse_attr_input (); + // AttrInput is allowed to be null, so no checks here + if (attr_input) + return Parse::AttributeBody{std::move (attr_path.value ()), + std::move (attr_input.value ()), locus}; + else if (attr_input.error ().kind == Parse::Error::AttrInput::Kind::MISSING) + return Parse::AttributeBody{std::move (attr_path.value ()), nullptr, locus}; + else + return Parse::Error::AttributeBody::make_invalid_attrinput (); +} + +// Parse a contiguous block of inner attributes. +template +AST::AttrVec +Parser::parse_inner_attributes () +{ + AST::AttrVec inner_attributes; + + auto has_valid_inner_attribute_prefix = [&] () { + auto id = lexer.peek_token ()->get_id (); + /* Outer attribute `#[` is not allowed, only accepts `#!` */ + return (id == HASH && lexer.peek_token (1)->get_id () == EXCLAM) + || id == INNER_DOC_COMMENT; + }; + + while (has_valid_inner_attribute_prefix ()) + { + auto inner_attr = parse_inner_attribute (); + + /* Ensure only valid inner attributes are added to the inner_attributes + * list */ + if (inner_attr) + { + inner_attributes.push_back (std::move (inner_attr.value ())); + } + else + { + /* If no more valid inner attributes, break out of loop (only + * contiguous inner attributes parsed). */ + break; + } + } + + inner_attributes.shrink_to_fit (); + return inner_attributes; +} + +// Parses a contiguous block of outer attributes. +template +AST::AttrVec +Parser::parse_outer_attributes () +{ + AST::AttrVec outer_attributes; + + auto has_valid_attribute_prefix = [&] () { + auto id = lexer.peek_token ()->get_id (); + /* We allow inner attributes `#!` and catch the error later */ + return id == HASH || id == OUTER_DOC_COMMENT || id == INNER_DOC_COMMENT; + }; + + while (has_valid_attribute_prefix ()) /* For error handling. */ + { + auto outer_attr = parse_outer_attribute (); + + /* Ensure only valid outer attributes are added to the outer_attributes + * list */ + if (outer_attr) + { + outer_attributes.push_back (std::move (outer_attr.value ())); + } + else + { + /* If no more valid outer attributes, break out of loop (only + * contiguous outer attributes parsed). */ + break; + } + } + + outer_attributes.shrink_to_fit (); + return outer_attributes; + + /* TODO: this shares basically all code with parse_inner_attributes except + * function call - find way of making it more modular? function pointer? */ +} + +// Parses an AttrInput AST node (polymorphic, as AttrInput is abstract) +template +tl::expected, Parse::Error::AttrInput> +Parser::parse_attr_input () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + case LEFT_SQUARE: + case LEFT_CURLY: + { + auto dtoken_tree = parse_delim_token_tree (); + if (!dtoken_tree) + return Parse::Error::AttrInput::make_bad_token_tree (); + + // must be a delimited token tree, so parse that + std::unique_ptr input_tree ( + new AST::DelimTokenTree (dtoken_tree.value ())); + + return tl::expected, + Parse::Error::AttrInput>{std::move (input_tree)}; + } + case EQUAL: + { + // = LiteralExpr + lexer.skip_token (); + + t = lexer.peek_token (); + + // attempt to parse macro + // TODO: macros may/may not be allowed in attributes + // this is needed for "#[doc = include_str!(...)]" + if (Parse::Utils::is_simple_path_segment (t->get_id ())) + { + std::unique_ptr invoke + = parse_macro_invocation ({}); + + if (!invoke) + return Parse::Error::AttrInput::make_bad_macro_invocation (); + + return std::unique_ptr ( + new AST::AttrInputMacro (std::move (invoke))); + } + + /* Ensure token is a "literal expression" (literally only a literal + * token of any type) */ + if (!t->is_literal ()) + { + Error error ( + t->get_locus (), + "unknown token %qs in attribute body - literal expected", + t->get_token_description ()); + add_error (std::move (error)); + + skip_after_end_attribute (); + return Parse::Error::AttrInput::make_malformed (); + } + + AST::Literal::LitType lit_type = AST::Literal::STRING; + // Crappy mapping of token type to literal type + switch (t->get_id ()) + { + case INT_LITERAL: + lit_type = AST::Literal::INT; + break; + case FLOAT_LITERAL: + lit_type = AST::Literal::FLOAT; + break; + case CHAR_LITERAL: + lit_type = AST::Literal::CHAR; + break; + case BYTE_CHAR_LITERAL: + lit_type = AST::Literal::BYTE; + break; + case BYTE_STRING_LITERAL: + lit_type = AST::Literal::BYTE_STRING; + break; + case RAW_STRING_LITERAL: + lit_type = AST::Literal::RAW_STRING; + break; + case STRING_LITERAL: + default: + lit_type = AST::Literal::STRING; + break; // TODO: raw string? don't eliminate it from lexer? + } + + // create actual LiteralExpr + AST::LiteralExpr lit_expr (t->get_str (), lit_type, t->get_type_hint (), + {}, t->get_locus ()); + lexer.skip_token (); + + std::unique_ptr attr_input_lit ( + new AST::AttrInputLiteral (std::move (lit_expr))); + + // do checks or whatever? none required, really + + // FIXME: shouldn't a skip token be required here? + + return tl::expected, + Parse::Error::AttrInput>{ + std::move (attr_input_lit)}; + } + break; + case RIGHT_PAREN: + case RIGHT_SQUARE: + case RIGHT_CURLY: + case END_OF_FILE: + // means AttrInput is missing, which is allowed + return Parse::Error::AttrInput::make_missing_attrinput (); + default: + add_error ( + Error (t->get_locus (), + "unknown token %qs in attribute body - attribute input or " + "none expected", + t->get_token_description ())); + + skip_after_end_attribute (); + return Parse::Error::AttrInput::make_malformed (); + } + rust_unreachable (); + // TODO: find out how to stop gcc error on "no return value" +} + +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl-expr.hxx b/gcc/rust/parse/rust-parse-impl-expr.hxx new file mode 100644 index 00000000000..bcfdd2d7a1e --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl-expr.hxx @@ -0,0 +1,4106 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl.h + * This is also the reason why there are no include guards. */ + +#include "rust-parse.h" + +namespace Rust { + +// Parses a block expression, including the curly braces at start and end. +template +std::unique_ptr +Parser::parse_block_expr ( + AST::AttrVec outer_attrs, tl::optional label, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse statements and expression + std::vector> stmts; + std::unique_ptr expr = nullptr; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + ExprOrStmt expr_or_stmt = parse_stmt_or_expr (); + if (expr_or_stmt.is_error ()) + { + skip_after_end_block (); + return nullptr; + } + + t = lexer.peek_token (); + + if (expr_or_stmt.stmt != nullptr) + { + stmts.push_back (std::move (expr_or_stmt.stmt)); + } + else + { + // assign to expression and end parsing inside + expr = std::move (expr_or_stmt.expr); + break; + } + } + + location_t end_locus = t->get_locus (); + + if (!skip_token (RIGHT_CURLY)) + { + Error error (t->get_locus (), + "error may be from having an expression (as opposed to " + "statement) in the body of the function but not last"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + + // grammar allows for empty block expressions + + stmts.shrink_to_fit (); + + return std::unique_ptr ( + new AST::BlockExpr (std::move (stmts), std::move (expr), + std::move (inner_attrs), std::move (outer_attrs), + std::move (label), locus, end_locus)); +} + +/* Parse an anonymous const expression. This can be a regular const expression + * or an underscore for deferred const inference */ +template +tl::expected +Parser::parse_anon_const () +{ + auto current = lexer.peek_token (); + auto locus = current->get_locus (); + + // Special case deferred inference constants + if (maybe_skip_token (UNDERSCORE)) + return AST::AnonConst (locus); + + auto expr = parse_expr (); + + if (!expr) + return tl::make_unexpected (AnonConstError::InvalidSizeExpr); + + return AST::AnonConst (std::move (expr), locus); +} + +/* Parse a "const block", a block preceded by the `const` keyword whose + * statements can be const evaluated and used in constant contexts */ +template +std::unique_ptr +Parser::parse_const_block_expr (AST::AttrVec outer_attrs, + location_t locus) +{ + auto block = parse_block_expr (); + + if (!block) + { + add_error (Error (locus, "failed to parse inner block in const block")); + skip_after_end_block (); + + return nullptr; + } + + auto block_locus = block->get_locus (); + + return std::make_unique (AST::AnonConst (std::move (block), + block_locus), + locus, std::move (outer_attrs)); +} + +/* Parses a "grouped" expression (expression in parentheses), used to control + * precedence. */ +template +std::unique_ptr +Parser::parse_grouped_expr (AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse required expr inside parentheses + std::unique_ptr expr_in_parens = parse_expr (); + if (expr_in_parens == nullptr) + { + // skip after somewhere? + // error? + return nullptr; + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::GroupedExpr (std::move (expr_in_parens), std::move (inner_attrs), + std::move (outer_attrs), locus)); +} + +// Parses a closure expression (closure definition). +template +std::unique_ptr +Parser::parse_closure_expr (AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + // detect optional "move" + bool has_move = false; + if (lexer.peek_token ()->get_id () == MOVE) + { + lexer.skip_token (); + has_move = true; + } + + // handle parameter list + std::vector params; + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case OR: + // skip token, no parameters + lexer.skip_token (); + break; + case PIPE: + // actually may have parameters + lexer.skip_token (); + t = lexer.peek_token (); + + while (t->get_id () != PIPE) + { + AST::ClosureParam param = parse_closure_param (); + if (param.is_error ()) + { + // TODO is this really an error? + Error error (t->get_locus (), "could not parse closure param"); + add_error (std::move (error)); + + break; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + { + lexer.skip_token (); + // not an error but means param list is done + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + params.shrink_to_fit (); + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in closure expression - expected " + "%<|%> or %<||%>", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + + // again branch based on next token + t = lexer.peek_token (); + if (t->get_id () == RETURN_TYPE) + { + // must be return type closure with block expr + + // skip "return type" token + lexer.skip_token (); + + // parse actual type, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + // error + Error error (t->get_locus (), "failed to parse type for closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse block expr, which is required + std::unique_ptr block = parse_block_expr (); + if (block == nullptr) + { + // error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInnerTyped (std::move (type), std::move (block), + std::move (params), locus, has_move, + std::move (outer_attrs))); + } + else + { + // must be expr-only closure + + // parse expr, which is required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expression in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInner (std::move (expr), std::move (params), locus, + has_move, std::move (outer_attrs))); + } +} + +// Parses a literal token (to literal expression). +template +std::unique_ptr +Parser::parse_literal_expr (AST::AttrVec outer_attrs) +{ + // TODO: change if literal representation in lexer changes + + std::string literal_value; + AST::Literal::LitType type = AST::Literal::STRING; + + // branch based on token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case CHAR_LITERAL: + type = AST::Literal::CHAR; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case STRING_LITERAL: + type = AST::Literal::STRING; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case BYTE_CHAR_LITERAL: + type = AST::Literal::BYTE; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case BYTE_STRING_LITERAL: + type = AST::Literal::BYTE_STRING; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case RAW_STRING_LITERAL: + type = AST::Literal::RAW_STRING; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case INT_LITERAL: + type = AST::Literal::INT; + literal_value = t->get_str (); + lexer.skip_token (); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + literal_value = t->get_str (); + lexer.skip_token (); + break; + // case BOOL_LITERAL + // use true and false keywords rather than "bool literal" Rust terminology + case TRUE_LITERAL: + type = AST::Literal::BOOL; + literal_value = Values::Keywords::TRUE_LITERAL; + lexer.skip_token (); + break; + case FALSE_LITERAL: + type = AST::Literal::BOOL; + literal_value = Values::Keywords::FALSE_LITERAL; + lexer.skip_token (); + break; + default: + // error - cannot be a literal expr + add_error (Error (t->get_locus (), + "unexpected token %qs when parsing literal expression", + t->get_token_description ())); + + // skip? + return nullptr; + } + + // create literal based on stuff in switch + return std::unique_ptr ( + new AST::LiteralExpr (std::move (literal_value), std::move (type), + t->get_type_hint (), std::move (outer_attrs), + t->get_locus ())); +} + +template +std::unique_ptr +Parser::parse_box_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (BOX); + } + + ParseRestrictions restrictions; + restrictions.expr_can_be_null = false; + + std::unique_ptr expr = parse_expr (AST::AttrVec (), restrictions); + + return std::unique_ptr ( + new AST::BoxExpr (std::move (expr), std::move (outer_attrs), locus)); +} + +// Parses a return expression (including any expression to return). +template +std::unique_ptr +Parser::parse_return_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (RETURN_KW); + } + + // parse expression to return, if it exists + ParseRestrictions restrictions; + restrictions.expr_can_be_null = true; + std::unique_ptr returned_expr + = parse_expr (AST::AttrVec (), restrictions); + + return std::unique_ptr ( + new AST::ReturnExpr (std::move (returned_expr), std::move (outer_attrs), + locus)); +} + +// Parses a try expression. +template +std::unique_ptr +Parser::parse_try_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (TRY); + } + + std::unique_ptr block_expr = parse_block_expr (); + + if (!block_expr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse try block expression"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::TryExpr (std::move (block_expr), std::move (outer_attrs), locus)); +} + +/* Parses a break expression (including any label to break to AND any return + * expression). */ +template +std::unique_ptr +Parser::parse_break_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (BREAK); + } + + auto parsed_label = parse_lifetime (false); + auto label = (parsed_label) + ? tl::optional (parsed_label.value ()) + : tl::nullopt; + + // parse break return expression if it exists + ParseRestrictions restrictions; + restrictions.expr_can_be_null = true; + std::unique_ptr return_expr + = parse_expr (AST::AttrVec (), restrictions); + + return std::unique_ptr ( + new AST::BreakExpr (std::move (label), std::move (return_expr), + std::move (outer_attrs), locus)); +} + +// Parses a continue expression (including any label to continue from). +template +std::unique_ptr +Parser::parse_continue_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (CONTINUE); + } + + auto parsed_label = parse_lifetime (false); + auto label = (parsed_label) + ? tl::optional (parsed_label.value ()) + : tl::nullopt; + + return std::unique_ptr ( + new AST::ContinueExpr (std::move (label), std::move (outer_attrs), locus)); +} + +/* Parses an if expression of any kind, including with else, else if, else if + * let, and neither. Note that any outer attributes will be ignored because if + * expressions don't support them. */ +template +std::unique_ptr +Parser::parse_if_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + // TODO: make having outer attributes an error? + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (IF)) + { + skip_after_end_block (); + return nullptr; + } + } + + // detect accidental if let + if (lexer.peek_token ()->get_id () == LET) + { + Error error (lexer.peek_token ()->get_locus (), + "if let expression probably exists, but is being parsed " + "as an if expression. This may be a parser error"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + /* parse required condition expr - HACK to prevent struct expr from being + * parsed */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr condition = parse_expr ({}, no_struct_expr); + if (condition == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse condition expression in if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse required block expr + std::unique_ptr if_body = parse_block_expr (); + if (if_body == nullptr) + return nullptr; + + // branch to parse end or else (and then else, else if, or else if let) + if (lexer.peek_token ()->get_id () != ELSE) + { + // single selection - end of if expression + return std::unique_ptr ( + new AST::IfExpr (std::move (condition), std::move (if_body), + std::move (outer_attrs), locus)); + } + else + { + // double or multiple selection - branch on end, else if, or else if let + + // skip "else" + lexer.skip_token (); + + // branch on whether next token is '{' or 'if' + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: + { + // double selection - else + // parse else block expr (required) + std::unique_ptr else_body = parse_block_expr (); + if (else_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse else body block expression in " + "if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqElse (std::move (condition), + std::move (if_body), + std::move (else_body), + std::move (outer_attrs), locus)); + } + case IF: + { + // multiple selection - else if or else if let + // branch on whether next token is 'let' or not + if (lexer.peek_token (1)->get_id () == LET) + { + // parse if let expr (required) + std::unique_ptr if_let_expr + = parse_if_let_expr (); + if (if_let_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if let expression " + "after if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqElse (std::move (condition), + std::move (if_body), + std::move (if_let_expr), + std::move (outer_attrs), locus)); + } + else + { + // parse if expr (required) + std::unique_ptr if_expr = parse_if_expr (); + if (if_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if expression after " + "if expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfExprConseqElse (std::move (condition), + std::move (if_body), + std::move (if_expr), + std::move (outer_attrs), locus)); + } + } + default: + // error - invalid token + add_error (Error (t->get_locus (), + "unexpected token %qs after else in if expression", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } +} + +/* Parses an if let expression of any kind, including with else, else if, else + * if let, and none. Note that any outer attributes will be ignored as if let + * expressions don't support them. */ +template +std::unique_ptr +Parser::parse_if_let_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + // TODO: make having outer attributes an error? + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + if (!skip_token (IF)) + { + skip_after_end_block (); + return nullptr; + } + } + + // detect accidental if expr parsed as if let expr + if (lexer.peek_token ()->get_id () != LET) + { + Error error (lexer.peek_token ()->get_locus (), + "if expression probably exists, but is being parsed as an " + "if let expression. This may be a parser error"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + lexer.skip_token (); + + // parse match arm patterns (which are required) + std::vector> match_arm_patterns + = parse_match_arm_patterns (EQUAL); + if (match_arm_patterns.empty ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse any match arm patterns in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (!skip_token (EQUAL)) + { + // skip somewhere? + return nullptr; + } + + // parse expression (required) - HACK to prevent struct expr being parsed + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr scrutinee_expr = parse_expr ({}, no_struct_expr); + if (scrutinee_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse scrutinee expression in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check for expression not being a struct expression or lazy boolean + * expression here? or actually probably in semantic analysis. */ + + // parse block expression (required) + std::unique_ptr if_let_body = parse_block_expr (); + if (if_let_body == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse if let body block expression in if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // branch to parse end or else (and then else, else if, or else if let) + if (lexer.peek_token ()->get_id () != ELSE) + { + // single selection - end of if let expression + return std::unique_ptr ( + new AST::IfLetExpr (std::move (match_arm_patterns), + std::move (scrutinee_expr), std::move (if_let_body), + std::move (outer_attrs), locus)); + } + else + { + // double or multiple selection - branch on end, else if, or else if let + + // skip "else" + lexer.skip_token (); + + // branch on whether next token is '{' or 'if' + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: + { + // double selection - else + // parse else block expr (required) + std::unique_ptr else_body = parse_block_expr (); + if (else_body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse else body block expression in " + "if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqElse (std::move (match_arm_patterns), + std::move (scrutinee_expr), + std::move (if_let_body), + std::move (else_body), + std::move (outer_attrs), locus)); + } + case IF: + { + // multiple selection - else if or else if let + // branch on whether next token is 'let' or not + if (lexer.peek_token (1)->get_id () == LET) + { + // parse if let expr (required) + std::unique_ptr if_let_expr + = parse_if_let_expr (); + if (if_let_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if let expression " + "after if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqElse ( + std::move (match_arm_patterns), std::move (scrutinee_expr), + std::move (if_let_body), std::move (if_let_expr), + std::move (outer_attrs), locus)); + } + else + { + // parse if expr (required) + std::unique_ptr if_expr = parse_if_expr (); + if (if_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse (else) if expression after " + "if let expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::IfLetExprConseqElse ( + std::move (match_arm_patterns), std::move (scrutinee_expr), + std::move (if_let_body), std::move (if_expr), + std::move (outer_attrs), locus)); + } + } + default: + // error - invalid token + add_error ( + Error (t->get_locus (), + "unexpected token %qs after else in if let expression", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } +} + +/* TODO: possibly decide on different method of handling label (i.e. not + * parameter) */ + +/* Parses a "loop" infinite loop expression. Label is not parsed and should be + * parsed via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_loop_expr (AST::AttrVec outer_attrs, + tl::optional label, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + if (label) + locus = label->get_locus (); + else + locus = lexer.peek_token ()->get_locus (); + + if (!skip_token (LOOP)) + { + skip_after_end_block (); + return nullptr; + } + } + else + { + if (label) + locus = label->get_locus (); + } + + // parse loop body, which is required + std::unique_ptr loop_body = parse_block_expr (); + if (loop_body == nullptr) + return nullptr; + + return std::unique_ptr ( + new AST::LoopExpr (std::move (loop_body), locus, std::move (label), + std::move (outer_attrs))); +} + +/* Parses a "while" loop expression. Label is not parsed and should be parsed + * via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_while_loop_expr ( + AST::AttrVec outer_attrs, tl::optional label, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + if (label) + locus = label->get_locus (); + else + locus = lexer.peek_token ()->get_locus (); + + if (!skip_token (WHILE)) + { + skip_after_end_block (); + return nullptr; + } + } + else + { + if (label) + locus = label->get_locus (); + } + + // ensure it isn't a while let loop + if (lexer.peek_token ()->get_id () == LET) + { + Error error (lexer.peek_token ()->get_locus (), + "appears to be while let loop but is being parsed by " + "while loop - this may be a compiler issue"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse loop predicate (required) with HACK to prevent struct expr parsing + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr predicate = parse_expr ({}, no_struct_expr); + if (predicate == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse predicate expression in while loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check that it isn't struct expression here? actually, probably in + * semantic analysis */ + + // parse loop body (required) + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop body block expression in while loop"); + add_error (std::move (error)); + + // skip somewhere + return nullptr; + } + + return std::unique_ptr ( + new AST::WhileLoopExpr (std::move (predicate), std::move (body), locus, + std::move (label), std::move (outer_attrs))); +} + +/* Parses a "while let" loop expression. Label is not parsed and should be + * parsed via parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_while_let_loop_expr ( + AST::AttrVec outer_attrs, tl::optional label) +{ + location_t locus = UNKNOWN_LOCATION; + if (label) + locus = label->get_locus (); + else + locus = lexer.peek_token ()->get_locus (); + maybe_skip_token (WHILE); + + /* check for possible accidental recognition of a while loop as a while let + * loop */ + if (lexer.peek_token ()->get_id () != LET) + { + Error error (lexer.peek_token ()->get_locus (), + "appears to be a while loop but is being parsed by " + "while let loop - this may be a compiler issue"); + add_error (std::move (error)); + + // skip somewhere + return nullptr; + } + // as this token is definitely let now, save the computation of comparison + lexer.skip_token (); + + // parse predicate patterns + std::vector> predicate_patterns + = parse_match_arm_patterns (EQUAL); + // ensure that there is at least 1 pattern + if (predicate_patterns.empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "should be at least 1 pattern"); + add_error (std::move (error)); + return nullptr; + } + + if (!skip_token (EQUAL)) + { + // skip somewhere? + return nullptr; + } + + /* parse predicate expression, which is required (and HACK to prevent struct + * expr) */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr predicate_expr = parse_expr ({}, no_struct_expr); + if (predicate_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse predicate expression in while let loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: ensure that struct expression is not parsed? Actually, probably in + * semantic analysis. */ + + // parse loop body, which is required + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr (loop body) of while let loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr (new AST::WhileLetLoopExpr ( + std::move (predicate_patterns), std::move (predicate_expr), + std::move (body), locus, std::move (label), std::move (outer_attrs))); +} + +/* Parses a "for" iterative loop. Label is not parsed and should be parsed via + * parse_labelled_loop_expr, which would call this. */ +template +std::unique_ptr +Parser::parse_for_loop_expr ( + AST::AttrVec outer_attrs, tl::optional label) +{ + location_t locus = UNKNOWN_LOCATION; + if (label) + locus = label->get_locus (); + else + locus = lexer.peek_token ()->get_locus (); + maybe_skip_token (FOR); + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse iterator pattern in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (!skip_token (IN)) + { + // skip somewhere? + return nullptr; + } + + /* parse iterator expression, which is required - also HACK to prevent + * struct expr */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr expr = parse_expr ({}, no_struct_expr); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse iterator expression in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + // TODO: check to ensure this isn't struct expr? Or in semantic analysis. + + // parse loop body, which is required + std::unique_ptr body = parse_block_expr (); + if (body == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop body block expression in for loop"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ForLoopExpr (std::move (pattern), std::move (expr), + std::move (body), locus, std::move (label), + std::move (outer_attrs))); +} + +// Parses a loop expression with label (any kind of loop - disambiguates). +template +std::unique_ptr +Parser::parse_labelled_loop_expr (const_TokenPtr tok, + AST::AttrVec outer_attrs) +{ + /* TODO: decide whether it should not work if there is no label, or parse it + * with no label at the moment, I will make it not work with no label + * because that's the implication. */ + + if (tok->get_id () != LIFETIME) + { + Error error (tok->get_locus (), + "expected lifetime in labelled loop expr (to parse loop " + "label) - found %qs", + tok->get_token_description ()); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + // parse loop label (required) + // TODO: Convert this return type to tl::expected instead of tl::optional + auto parsed_label = parse_loop_label (tok); + if (!parsed_label) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse loop label in labelled loop expr"); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + auto label = parsed_label + ? tl::optional (parsed_label.value ()) + : tl::nullopt; + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LOOP: + return parse_loop_expr (std::move (outer_attrs), std::move (label)); + case FOR: + return parse_for_loop_expr (std::move (outer_attrs), std::move (label)); + case WHILE: + // further disambiguate into while vs while let + if (lexer.peek_token (1)->get_id () == LET) + { + return parse_while_let_loop_expr (std::move (outer_attrs), + std::move (label)); + } + else + { + return parse_while_loop_expr (std::move (outer_attrs), + std::move (label)); + } + case LEFT_CURLY: + return parse_block_expr (std::move (outer_attrs), std::move (label)); + default: + // error + add_error (Error (t->get_locus (), + "unexpected token %qs when parsing labelled loop", + t->get_token_description ())); + + // skip? + return nullptr; + } +} + +// Parses a match expression. +template +std::unique_ptr +Parser::parse_match_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (MATCH_KW); + } + + /* parse scrutinee expression, which is required (and HACK to prevent struct + * expr) */ + ParseRestrictions no_struct_expr; + no_struct_expr.can_be_struct_expr = false; + std::unique_ptr scrutinee = parse_expr ({}, no_struct_expr); + if (scrutinee == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse scrutinee expression in match expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + /* TODO: check for scrutinee expr not being struct expr? or do so in + * semantic analysis */ + + if (!skip_token (LEFT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + // parse inner attributes (if they exist) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse match arms (if they exist) + // std::vector > match_arms; + std::vector match_arms; + + // parse match cases + while (lexer.peek_token ()->get_id () != RIGHT_CURLY) + { + // parse match arm itself, which is required + AST::MatchArm arm = parse_match_arm (); + if (arm.is_error ()) + { + // TODO is this worth throwing everything away? + Error error (lexer.peek_token ()->get_locus (), + "failed to parse match arm in match arms"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (MATCH_ARROW)) + { + // skip after somewhere? + // TODO is returning here a good idea? or is break better? + return nullptr; + } + + ParseRestrictions restrictions; + restrictions.expr_can_be_stmt = true; + + std::unique_ptr expr = parse_expr ({}, restrictions); + + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr in match arm in match expr"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + bool is_expr_without_block = expr->is_expr_without_block (); + + match_arms.push_back (AST::MatchCase (std::move (arm), std::move (expr))); + + // handle comma presence + if (lexer.peek_token ()->get_id () != COMMA) + { + if (!is_expr_without_block) + { + // allowed even if not final case + continue; + } + else if (is_expr_without_block + && lexer.peek_token ()->get_id () != RIGHT_CURLY) + { + // not allowed if not final case + Error error (lexer.peek_token ()->get_locus (), + "exprwithoutblock requires comma after match case " + "expression in match arm (if not final case)"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // otherwise, must be final case, so fine + break; + } + } + lexer.skip_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + match_arms.shrink_to_fit (); + + return std::unique_ptr ( + new AST::MatchExpr (std::move (scrutinee), std::move (match_arms), + std::move (inner_attrs), std::move (outer_attrs), + locus)); +} + +// Parses an async block expression. +template +std::unique_ptr +Parser::parse_async_block_expr (AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (ASYNC); + + // detect optional move token + bool has_move = false; + if (lexer.peek_token ()->get_id () == MOVE) + { + lexer.skip_token (); + has_move = true; + } + + // parse block expression (required) + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse block expression of async block expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::AsyncBlockExpr (std::move (block_expr), has_move, + std::move (outer_attrs), locus)); +} + +// Parses an unsafe block expression. +template +std::unique_ptr +Parser::parse_unsafe_block_expr ( + AST::AttrVec outer_attrs, location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (UNSAFE); + } + + // parse block expression (required) + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse block expression of unsafe block expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::UnsafeBlockExpr (std::move (block_expr), std::move (outer_attrs), + locus)); +} + +// Parses an array definition expression. +template +std::unique_ptr +Parser::parse_array_expr (AST::AttrVec outer_attrs, + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + } + + // parse optional inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse the "array elements" section, which is optional + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + { + // no array elements + lexer.skip_token (); + + std::vector> exprs; + auto array_elems + = std::make_unique (std::move (exprs), locus); + return std::make_unique (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus); + } + else + { + // should have array elements + // parse initial expression, which is required for either + std::unique_ptr initial_expr = parse_expr (); + if (initial_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse expression in array expression " + "(even though arrayelems seems to be present)"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + // copy array elems + lexer.skip_token (); + + // parse copy amount expression (required) + std::unique_ptr copy_amount = parse_expr (); + if (copy_amount == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse copy amount expression in array " + "expression (arrayelems)"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + skip_token (RIGHT_SQUARE); + + std::unique_ptr copied_array_elems ( + new AST::ArrayElemsCopied (std::move (initial_expr), + std::move (copy_amount), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (copied_array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + { + // single-element array expression + std::vector> exprs; + exprs.reserve (1); + exprs.push_back (std::move (initial_expr)); + exprs.shrink_to_fit (); + + skip_token (RIGHT_SQUARE); + + std::unique_ptr array_elems ( + new AST::ArrayElemsValues (std::move (exprs), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == COMMA) + { + // multi-element array expression (or trailing comma) + std::vector> exprs; + exprs.push_back (std::move (initial_expr)); + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // quick break if right square bracket + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + break; + + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse element in array expression"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + exprs.push_back (std::move (expr)); + + t = lexer.peek_token (); + } + + skip_token (RIGHT_SQUARE); + + exprs.shrink_to_fit (); + + std::unique_ptr array_elems ( + new AST::ArrayElemsValues (std::move (exprs), locus)); + return std::unique_ptr ( + new AST::ArrayExpr (std::move (array_elems), + std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // error + Error error (lexer.peek_token ()->get_locus (), + "unexpected token %qs in array expression (arrayelems)", + lexer.peek_token ()->get_token_description ()); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + } +} + +// Parses a grouped or tuple expression (disambiguates). +template +std::unique_ptr +Parser::parse_grouped_or_tuple_expr ( + AST::AttrVec outer_attrs, location_t pratt_parsed_loc) +{ + // adjustment to allow Pratt parsing to reuse function without copy-paste + location_t locus = pratt_parsed_loc; + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + } + + // parse optional inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + // must be empty tuple + lexer.skip_token (); + + // create tuple with empty tuple elems + return std::unique_ptr ( + new AST::TupleExpr (std::vector> (), + std::move (inner_attrs), std::move (outer_attrs), + locus)); + } + + // parse first expression (required) + std::unique_ptr first_expr = parse_expr (); + if (first_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expression in grouped or tuple expression"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + + // detect whether grouped expression with right parentheses as next token + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + // must be grouped expr + lexer.skip_token (); + + // create grouped expr + return std::unique_ptr ( + new AST::GroupedExpr (std::move (first_expr), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else if (lexer.peek_token ()->get_id () == COMMA) + { + // tuple expr + std::vector> exprs; + exprs.push_back (std::move (first_expr)); + + // parse potential other tuple exprs + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break out if right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse expr, which is now required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr in tuple expr"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + exprs.push_back (std::move (expr)); + + t = lexer.peek_token (); + } + + // skip right paren + skip_token (RIGHT_PAREN); + + return std::unique_ptr ( + new AST::TupleExpr (std::move (exprs), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // error + const_TokenPtr t = lexer.peek_token (); + Error error (t->get_locus (), + "unexpected token %qs in grouped or tuple expression " + "(parenthesised expression) - expected %<)%> for grouped " + "expr and %<,%> for tuple expr", + t->get_token_description ()); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } +} + +// Parses a struct expression field. +template +std::unique_ptr +Parser::parse_struct_expr_field () +{ + AST::AttrVec outer_attrs = parse_outer_attributes (); + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + if (lexer.peek_token (1)->get_id () == COLON) + { + // struct expr field with identifier and expr + Identifier ident = {t}; + lexer.skip_token (1); + + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse struct expression field with " + "identifier and expression"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructExprFieldIdentifierValue (std::move (ident), + std::move (expr), + std::move (outer_attrs), + t->get_locus ())); + } + else + { + // struct expr field with identifier only + Identifier ident{t}; + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructExprFieldIdentifier (std::move (ident), + std::move (outer_attrs), + t->get_locus ())); + } + case INT_LITERAL: + { + // parse tuple index field + int index = atoi (t->get_str ().c_str ()); + lexer.skip_token (); + + if (!skip_token (COLON)) + { + // skip somewhere? + return nullptr; + } + + // parse field expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), + "failed to parse expr in struct (or enum) expr " + "field with tuple index"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructExprFieldIndexValue (index, std::move (expr), + std::move (outer_attrs), + t->get_locus ())); + } + case DOT_DOT: + /* this is a struct base and can't be parsed here, so just return + * nothing without erroring */ + + return nullptr; + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs as first token of struct expr field - " + "expected identifier or integer literal", + t->get_token_description ())); + + return nullptr; + } +} + +/* Pratt parser impl of parse_expr. FIXME: this is only provisional and + * probably will be changed. */ +template +std::unique_ptr +Parser::parse_expr (int right_binding_power, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + const_TokenPtr current_token = lexer.peek_token (); + // Special hack because we are allowed to return nullptr, in that case we + // don't want to skip the token, since we don't actually parse it. But if + // null isn't allowed it indicates an error, and we want to skip past that. + // So return early if it is one of the tokens that ends an expression + // (or at least cannot start a new expression). + if (restrictions.expr_can_be_null) + { + TokenId id = current_token->get_id (); + if (id == SEMICOLON || id == RIGHT_PAREN || id == RIGHT_CURLY + || id == RIGHT_SQUARE || id == COMMA || id == LEFT_CURLY) + return nullptr; + } + + ParseRestrictions null_denotation_restrictions = restrictions; + null_denotation_restrictions.expr_can_be_stmt = false; + + // parse null denotation (unary part of expression) + std::unique_ptr expr + = null_denotation ({}, null_denotation_restrictions); + if (expr == nullptr) + return nullptr; + + return left_denotations (std::move (expr), right_binding_power, + std::move (outer_attrs), restrictions); +} + +// Parse expression with lowest left binding power. +template +std::unique_ptr +Parser::parse_expr (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + return parse_expr (LBP_LOWEST, std::move (outer_attrs), restrictions); +} + +template +std::unique_ptr +Parser::left_denotations (std::unique_ptr expr, + int right_binding_power, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + if (expr == nullptr) + { + // DEBUG + rust_debug ("null denotation is null; returning null for parse_expr"); + return nullptr; + } + + const_TokenPtr current_token = lexer.peek_token (); + + if (restrictions.expr_can_be_stmt && !expr->is_expr_without_block () + && current_token->get_id () != DOT + && current_token->get_id () != QUESTION_MARK) + { + rust_debug ("statement expression with block"); + expr->set_outer_attrs (std::move (outer_attrs)); + return expr; + } + + restrictions.expr_can_be_stmt = false; + + // stop parsing if find lower priority token - parse higher priority first + while (right_binding_power < left_binding_power (current_token)) + { + lexer.skip_token (); + + // FIXME attributes should generally be applied to the null denotation. + expr = left_denotation (current_token, std::move (expr), + std::move (outer_attrs), restrictions); + + if (expr == nullptr) + { + // DEBUG + rust_debug ("left denotation is null; returning null for parse_expr"); + + return nullptr; + } + + current_token = lexer.peek_token (); + } + + return expr; +} + +/* Determines action to take when finding token at beginning of expression. */ +template +std::unique_ptr +Parser::null_denotation (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + /* note: tok is previous character in input stream, not current one, as + * parse_expr skips it before passing it in */ + + /* as a Pratt parser (which works by decomposing expressions into a null + * denotation and then a left denotation), null denotations handle primaries + * and unary operands (but only prefix unary operands) */ + + auto tok = lexer.peek_token (); + + switch (tok->get_id ()) + { + case IDENTIFIER: + case SELF: + case SELF_ALIAS: + case DOLLAR_SIGN: + case CRATE: + case SUPER: + case SCOPE_RESOLUTION: + { + // DEBUG + rust_debug ("beginning null denotation identifier handling"); + + /* best option: parse as path, then extract identifier, macro, + * struct/enum, or just path info from it */ + AST::PathInExpression path = parse_path_in_expression (); + + return null_denotation_path (std::move (path), std::move (outer_attrs), + restrictions); + } + default: + if (tok->get_id () == LEFT_SHIFT) + { + lexer.split_current_token (LEFT_ANGLE, LEFT_ANGLE); + tok = lexer.peek_token (); + } + + lexer.skip_token (); + return null_denotation_not_path (std::move (tok), std::move (outer_attrs), + restrictions); + } +} + +// Handling of expresions that start with a path for `null_denotation`. +template +std::unique_ptr +Parser::null_denotation_path ( + AST::PathInExpression path, AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + rust_debug ("parsing null denotation after path"); + + // HACK: always make "self" by itself a path (regardless of next + // tokens) + if (path.is_single_segment () && path.get_segments ()[0].is_lower_self_seg ()) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // macro + return parse_macro_invocation_partial (std::move (path), + std::move (outer_attrs)); + case LEFT_CURLY: + { + bool not_a_block = lexer.peek_token (1)->get_id () == IDENTIFIER + && (lexer.peek_token (2)->get_id () == COMMA + || (lexer.peek_token (2)->get_id () == COLON + && (lexer.peek_token (4)->get_id () == COMMA + || !Parse::Utils::can_tok_start_type ( + lexer.peek_token (3)->get_id ())))); + + /* definitely not a block: + * path '{' ident ',' + * path '{' ident ':' [anything] ',' + * path '{' ident ':' [not a type] + * otherwise, assume block expr and thus path */ + // DEBUG + rust_debug ("values of lookahead: '%s' '%s' '%s' '%s' ", + lexer.peek_token (1)->get_token_description (), + lexer.peek_token (2)->get_token_description (), + lexer.peek_token (3)->get_token_description (), + lexer.peek_token (4)->get_token_description ()); + + rust_debug ("can be struct expr: '%s', not a block: '%s'", + restrictions.can_be_struct_expr ? "true" : "false", + not_a_block ? "true" : "false"); + + // struct/enum expr struct + if (!restrictions.can_be_struct_expr && !not_a_block) + { + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_struct_partial (std::move (path), + std::move (outer_attrs)); + } + case LEFT_PAREN: + // struct/enum expr tuple + if (!restrictions.can_be_struct_expr) + { + // assume path is returned + // HACK: add outer attributes to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + return parse_struct_expr_tuple_partial (std::move (path), + std::move (outer_attrs)); + default: + // assume path is returned if not single segment + if (path.is_single_segment ()) + { + // FIXME: This should probably be returned as a path. + /* HACK: may have to become permanent, but this is my current + * identifier expression */ + return std::unique_ptr (new AST::IdentifierExpr ( + path.get_segments ()[0].get_ident_segment ().as_string (), {}, + path.get_locus ())); + } + // HACK: add outer attrs to path + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + rust_unreachable (); +} + +// Handling of expresions that do not start with a path for `null_denotation`. +template +std::unique_ptr +Parser::null_denotation_not_path ( + const_TokenPtr tok, AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + switch (tok->get_id ()) + { + // FIXME: Handle in null_denotation_path? + case LEFT_SHIFT: + case LEFT_ANGLE: + { + // qualified path + // HACK: add outer attrs to path + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (tok->get_locus ()); + path.set_outer_attrs (std::move (outer_attrs)); + return std::unique_ptr ( + new AST::QualifiedPathInExpression (std::move (path))); + } + // FIXME: delegate to parse_literal_expr instead? would have to rejig + // tokens and whatever. + // FIXME: for literal exprs, outer attrs should be passed in, and later + // error if it does not make up the entire statement. + case INT_LITERAL: + // we should check the range, but ignore for now + // encode as int? + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::INT, + tok->get_type_hint (), {}, tok->get_locus ())); + case FLOAT_LITERAL: + // encode as float? + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::FLOAT, + tok->get_type_hint (), {}, tok->get_locus ())); + case STRING_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING, + tok->get_type_hint (), {}, tok->get_locus ())); + case BYTE_STRING_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE_STRING, + tok->get_type_hint (), {}, tok->get_locus ())); + case RAW_STRING_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::RAW_STRING, + tok->get_type_hint (), {}, tok->get_locus ())); + case CHAR_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::CHAR, + tok->get_type_hint (), {}, tok->get_locus ())); + case BYTE_CHAR_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE, + tok->get_type_hint (), {}, tok->get_locus ())); + case TRUE_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (Values::Keywords::TRUE_LITERAL, + AST::Literal::BOOL, tok->get_type_hint (), {}, + tok->get_locus ())); + case FALSE_LITERAL: + return std::unique_ptr ( + new AST::LiteralExpr (Values::Keywords::FALSE_LITERAL, + AST::Literal::BOOL, tok->get_type_hint (), {}, + tok->get_locus ())); + case LEFT_PAREN: + return parse_grouped_or_tuple_expr (std::move (outer_attrs), + tok->get_locus ()); + + /*case PLUS: { // unary plus operator + // invoke parse_expr recursively with appropriate priority, etc. for + below AST::Expr* expr = parse_expr(LBP_UNARY_PLUS); + + if (expr == nullptr) + return nullptr; + // can only apply to integer and float expressions + if (expr->get_type() != integer_type_node || expr->get_type() != + float_type_node) { rust_error_at(tok->get_locus(), "operand of unary + plus must be int or float but it is %s", print_type(expr->get_type())); + return nullptr; + } + + return Tree(expr, tok->get_locus()); + }*/ + // Rust has no unary plus operator + case MINUS: + { // unary minus + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + if (!restrictions.can_be_struct_expr) + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_MINUS, {}, entered_from_unary); + + if (expr == nullptr) + return nullptr; + // can only apply to integer and float expressions + /*if (expr.get_type() != integer_type_node || expr.get_type() != + float_type_node) { rust_error_at(tok->get_locus(), "operand of unary + minus must be int or float but it is %s", + print_type(expr.get_type())); return Tree::error(); + }*/ + /* FIXME: when implemented the "get type" method on expr, ensure it is + * int or float type (except unsigned int). Actually, this would + * probably have to be done in semantic analysis (as type checking). + */ + + /* FIXME: allow outer attributes on these expressions by having an + * outer attrs parameter in function*/ + return std::unique_ptr ( + new AST::NegationExpr (std::move (expr), NegationOperator::NEGATE, + std::move (outer_attrs), tok->get_locus ())); + } + case EXCLAM: + { // logical or bitwise not + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + if (!restrictions.can_be_struct_expr) + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_EXCLAM, {}, entered_from_unary); + + if (expr == nullptr) + return nullptr; + // can only apply to boolean expressions + /*if (expr.get_type() != boolean_type_node) { + rust_error_at(tok->get_locus(), + "operand of logical not must be a boolean but it is %s", + print_type(expr.get_type())); + return Tree::error(); + }*/ + /* FIXME: type checking for boolean or integer expressions in semantic + * analysis */ + + // FIXME: allow outer attributes on these expressions + return std::unique_ptr ( + new AST::NegationExpr (std::move (expr), NegationOperator::NOT, + std::move (outer_attrs), tok->get_locus ())); + } + case ASTERISK: + { + /* pointer dereference only - HACK: as struct expressions should + * always be value expressions, cannot be dereferenced */ + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + entered_from_unary.can_be_struct_expr = false; + std::unique_ptr expr + = parse_expr (LBP_UNARY_ASTERISK, {}, entered_from_unary); + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::DereferenceExpr (std::move (expr), std::move (outer_attrs), + tok->get_locus ())); + } + case AMP: + { + // (single) "borrow" expression - shared (mutable) or immutable + std::unique_ptr expr = nullptr; + Mutability mutability = Mutability::Imm; + bool raw_borrow = false; + + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + if (!restrictions.can_be_struct_expr) + entered_from_unary.can_be_struct_expr = false; + + auto is_mutability = [] (const_TokenPtr token) { + return token->get_id () == CONST || token->get_id () == MUT; + }; + + auto t = lexer.peek_token (); + // Weak raw keyword, we look (1) ahead and treat it as an identifier if + // there is no mut nor const. + if (t->get_id () == IDENTIFIER + && t->get_str () == Values::WeakKeywords::RAW + && is_mutability (lexer.peek_token (1))) + { + lexer.skip_token (); + switch (lexer.peek_token ()->get_id ()) + { + case MUT: + mutability = Mutability::Mut; + break; + case CONST: + mutability = Mutability::Imm; + break; + default: + rust_error_at (lexer.peek_token ()->get_locus (), + "raw borrow should be either const or mut"); + } + lexer.skip_token (); + expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); + raw_borrow = true; + } + else if (t->get_id () == MUT) + { + lexer.skip_token (); + expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); + mutability = Mutability::Mut; + raw_borrow = false; + } + else + { + expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); + raw_borrow = false; + } + + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::BorrowExpr (std::move (expr), mutability, raw_borrow, false, + std::move (outer_attrs), tok->get_locus ())); + } + case LOGICAL_AND: + { + // (double) "borrow" expression - shared (mutable) or immutable + std::unique_ptr expr = nullptr; + Mutability mutability = Mutability::Imm; + + ParseRestrictions entered_from_unary; + entered_from_unary.entered_from_unary = true; + + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); + mutability = Mutability::Mut; + } + else + { + expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); + mutability = Mutability::Imm; + } + + // FIXME: allow outer attributes on expression + return std::unique_ptr ( + new AST::BorrowExpr (std::move (expr), mutability, false, true, + std::move (outer_attrs), tok->get_locus ())); + } + case OR: + case PIPE: + case MOVE: + // closure expression + return parse_closure_expr_pratt (tok, std::move (outer_attrs)); + case DOT_DOT: + // either "range to" or "range full" expressions + return parse_nud_range_exclusive_expr (tok, std::move (outer_attrs)); + case DOT_DOT_EQ: + // range to inclusive expr + return parse_range_to_inclusive_expr (tok, std::move (outer_attrs)); + case RETURN_KW: + // FIXME: is this really a null denotation expression? + return parse_return_expr (std::move (outer_attrs), tok->get_locus ()); + case TRY: + // FIXME: is this really a null denotation expression? + return parse_try_expr (std::move (outer_attrs), tok->get_locus ()); + case BREAK: + // FIXME: is this really a null denotation expression? + return parse_break_expr (std::move (outer_attrs), tok->get_locus ()); + case CONTINUE: + return parse_continue_expr (std::move (outer_attrs), tok->get_locus ()); + case LEFT_CURLY: + // ok - this is an expression with block for once. + return parse_block_expr (std::move (outer_attrs), tl::nullopt, + tok->get_locus ()); + case IF: + // if or if let, so more lookahead to find out + if (lexer.peek_token ()->get_id () == LET) + { + // if let expr + return parse_if_let_expr (std::move (outer_attrs), tok->get_locus ()); + } + else + { + // if expr + return parse_if_expr (std::move (outer_attrs), tok->get_locus ()); + } + case LIFETIME: + return parse_labelled_loop_expr (tok, std::move (outer_attrs)); + case LOOP: + return parse_loop_expr (std::move (outer_attrs), tl::nullopt, + tok->get_locus ()); + case WHILE: + if (lexer.peek_token ()->get_id () == LET) + { + return parse_while_let_loop_expr (std::move (outer_attrs)); + } + else + { + return parse_while_loop_expr (std::move (outer_attrs), tl::nullopt, + tok->get_locus ()); + } + case FOR: + return parse_for_loop_expr (std::move (outer_attrs), tl::nullopt); + case MATCH_KW: + // also an expression with block + return parse_match_expr (std::move (outer_attrs), tok->get_locus ()); + case LEFT_SQUARE: + // array definition expr (not indexing) + return parse_array_expr (std::move (outer_attrs), tok->get_locus ()); + case UNSAFE: + return parse_unsafe_block_expr (std::move (outer_attrs), + tok->get_locus ()); + case BOX: + return parse_box_expr (std::move (outer_attrs), tok->get_locus ()); + case UNDERSCORE: + add_error ( + Error (tok->get_locus (), + "use of %qs is not allowed on the right-side of an assignment", + tok->get_token_description ())); + return nullptr; + case CONST: + return parse_const_block_expr (std::move (outer_attrs), + tok->get_locus ()); + default: + if (!restrictions.expr_can_be_null) + add_error (Error (tok->get_locus (), + "found unexpected token %qs in null denotation", + tok->get_token_description ())); + return nullptr; + } +} + +/* Called for each token that can appear in infix (between) position. Can be + * operators or other punctuation. Returns a function pointer to member + * function that implements the left denotation for the token given. */ +template +std::unique_ptr +Parser::left_denotation (const_TokenPtr tok, + std::unique_ptr left, + AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + // Token passed in has already been skipped, so peek gives "next" token + switch (tok->get_id ()) + { + // FIXME: allow for outer attributes to be applied + case QUESTION_MARK: + { + location_t left_locus = left->get_locus (); + // error propagation expression - unary postfix + return std::unique_ptr ( + new AST::ErrorPropagationExpr (std::move (left), + std::move (outer_attrs), left_locus)); + } + case PLUS: + // sum expression - binary infix + /*return parse_binary_plus_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr (tok, std::move (left), + std::move (outer_attrs), + ArithmeticOrLogicalOperator::ADD, + restrictions); + case MINUS: + // difference expression - binary infix + /*return parse_binary_minus_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::SUBTRACT, restrictions); + case ASTERISK: + // product expression - binary infix + /*return parse_binary_mult_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::MULTIPLY, restrictions); + case DIV: + // quotient expression - binary infix + /*return parse_binary_div_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::DIVIDE, restrictions); + case PERCENT: + // modulo expression - binary infix + /*return parse_binary_mod_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::MODULUS, restrictions); + case AMP: + // logical or bitwise and expression - binary infix + /*return parse_bitwise_and_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_AND, restrictions); + case PIPE: + // logical or bitwise or expression - binary infix + /*return parse_bitwise_or_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_OR, restrictions); + case CARET: + // logical or bitwise xor expression - binary infix + /*return parse_bitwise_xor_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::BITWISE_XOR, restrictions); + case LEFT_SHIFT: + // left shift expression - binary infix + /*return parse_left_shift_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::LEFT_SHIFT, restrictions); + case RIGHT_SHIFT: + // right shift expression - binary infix + /*return parse_right_shift_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_arithmetic_or_logical_expr ( + tok, std::move (left), std::move (outer_attrs), + ArithmeticOrLogicalOperator::RIGHT_SHIFT, restrictions); + case EQUAL_EQUAL: + // equal to expression - binary infix (no associativity) + /*return parse_binary_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::EQUAL, restrictions); + case NOT_EQUAL: + // not equal to expression - binary infix (no associativity) + /*return parse_binary_not_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::NOT_EQUAL, + restrictions); + case RIGHT_ANGLE: + // greater than expression - binary infix (no associativity) + /*return parse_binary_greater_than_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::GREATER_THAN, + restrictions); + case LEFT_ANGLE: + // less than expression - binary infix (no associativity) + /*return parse_binary_less_than_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::LESS_THAN, + restrictions); + case GREATER_OR_EQUAL: + // greater than or equal to expression - binary infix (no associativity) + /*return parse_binary_greater_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::GREATER_OR_EQUAL, + restrictions); + case LESS_OR_EQUAL: + // less than or equal to expression - binary infix (no associativity) + /*return parse_binary_less_equal_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_comparison_expr (tok, std::move (left), + std::move (outer_attrs), + ComparisonOperator::LESS_OR_EQUAL, + restrictions); + case OR: + // lazy logical or expression - binary infix + return parse_lazy_or_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case LOGICAL_AND: + // lazy logical and expression - binary infix + return parse_lazy_and_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case AS: + /* type cast expression - kind of binary infix (RHS is actually a + * TypeNoBounds) */ + return parse_type_cast_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case EQUAL: + // assignment expression - binary infix (note right-to-left + // associativity) + return parse_assig_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + case PLUS_EQ: + /* plus-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_plus_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr (tok, std::move (left), + std::move (outer_attrs), + CompoundAssignmentOperator::ADD, + restrictions); + case MINUS_EQ: + /* minus-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_minus_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::SUBTRACT, restrictions); + case ASTERISK_EQ: + /* multiply-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_mult_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::MULTIPLY, restrictions); + case DIV_EQ: + /* division-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_div_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr (tok, std::move (left), + std::move (outer_attrs), + CompoundAssignmentOperator::DIVIDE, + restrictions); + case PERCENT_EQ: + /* modulo-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_mod_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::MODULUS, restrictions); + case AMP_EQ: + /* bitwise and-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_and_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_AND, restrictions); + case PIPE_EQ: + /* bitwise or-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_or_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_OR, restrictions); + case CARET_EQ: + /* bitwise xor-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_xor_assig_expr (tok, std::move (left), + std::move (outer_attrs), restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::BITWISE_XOR, restrictions); + case LEFT_SHIFT_EQ: + /* left shift-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_left_shift_assig_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::LEFT_SHIFT, restrictions); + case RIGHT_SHIFT_EQ: + /* right shift-assignment expression - binary infix (note right-to-left + * associativity) */ + /*return parse_right_shift_assig_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions);*/ + return parse_compound_assignment_expr ( + tok, std::move (left), std::move (outer_attrs), + CompoundAssignmentOperator::RIGHT_SHIFT, restrictions); + case DOT_DOT: + /* range exclusive expression - binary infix (no associativity) + * either "range" or "range from" */ + return parse_led_range_exclusive_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + case DOT_DOT_EQ: + /* range inclusive expression - binary infix (no associativity) + * unambiguously RangeInclusiveExpr */ + return parse_range_inclusive_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case SCOPE_RESOLUTION: + // path expression - binary infix? FIXME should this even be parsed + // here? + add_error ( + Error (tok->get_locus (), + "found scope resolution operator in left denotation " + "function - this should probably be handled elsewhere")); + + return nullptr; + case DOT: + { + /* field expression or method call - relies on parentheses after next + * identifier or await if token after is "await" (unary postfix) or + * tuple index if token after is a decimal int literal */ + + const_TokenPtr next_tok = lexer.peek_token (); + if (next_tok->get_id () == IDENTIFIER + && next_tok->get_str () == Values::Keywords::AWAIT) + { + // await expression + return parse_await_expr (tok, std::move (left), + std::move (outer_attrs)); + } + else if (next_tok->get_id () == INT_LITERAL) + { + // tuple index expression - TODO check for decimal int literal + return parse_tuple_index_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + else if (next_tok->get_id () == FLOAT_LITERAL) + { + // Lexer has misidentified a tuple index as a float literal + // eg: `(x, (y, z)).1.0` -> 1.0 has been identified as a float + // literal. This means we should split it into three new separate + // tokens, the first tuple index, the dot and the second tuple + // index. + auto current_loc = next_tok->get_locus (); + auto str = next_tok->get_str (); + auto dot_pos = str.find ("."); + auto prefix = str.substr (0, dot_pos); + auto suffix = str.substr (dot_pos + 1); + if (dot_pos == str.size () - 1) + lexer.split_current_token ( + {Token::make_int (current_loc, std::move (prefix), + CORETYPE_PURE_DECIMAL), + Token::make (DOT, current_loc + 1)}); + else + lexer.split_current_token ( + {Token::make_int (current_loc, std::move (prefix), + CORETYPE_PURE_DECIMAL), + Token::make (DOT, current_loc + 1), + Token::make_int (current_loc + 2, std::move (suffix), + CORETYPE_PURE_DECIMAL)}); + return parse_tuple_index_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + else if (next_tok->get_id () == IDENTIFIER + && lexer.peek_token (1)->get_id () != LEFT_PAREN + && lexer.peek_token (1)->get_id () != SCOPE_RESOLUTION) + { + /* field expression (or should be) - FIXME: scope resolution right + * after identifier should always be method, I'm pretty sure */ + return parse_field_access_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + else + { + // method call (probably) + return parse_method_call_expr (tok, std::move (left), + std::move (outer_attrs), + restrictions); + } + } + case LEFT_PAREN: + // function call - method call is based on dot notation first + return parse_function_call_expr (tok, std::move (left), + std::move (outer_attrs), restrictions); + case LEFT_SQUARE: + // array or slice index expression (pseudo binary infix) + return parse_index_expr (tok, std::move (left), std::move (outer_attrs), + restrictions); + default: + add_error (Error (tok->get_locus (), + "found unexpected token %qs in left denotation", + tok->get_token_description ())); + + return nullptr; + } +} + +/* Returns the left binding power for the given ArithmeticOrLogicalExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_arithmetic_or_logical_expr ( + AST::ArithmeticOrLogicalExpr::ExprType expr_type) +{ + switch (expr_type) + { + case ArithmeticOrLogicalOperator::ADD: + return LBP_PLUS; + case ArithmeticOrLogicalOperator::SUBTRACT: + return LBP_MINUS; + case ArithmeticOrLogicalOperator::MULTIPLY: + return LBP_MUL; + case ArithmeticOrLogicalOperator::DIVIDE: + return LBP_DIV; + case ArithmeticOrLogicalOperator::MODULUS: + return LBP_MOD; + case ArithmeticOrLogicalOperator::BITWISE_AND: + return LBP_AMP; + case ArithmeticOrLogicalOperator::BITWISE_OR: + return LBP_PIPE; + case ArithmeticOrLogicalOperator::BITWISE_XOR: + return LBP_CARET; + case ArithmeticOrLogicalOperator::LEFT_SHIFT: + return LBP_L_SHIFT; + case ArithmeticOrLogicalOperator::RIGHT_SHIFT: + return LBP_R_SHIFT; + default: + // WTF? should not happen, this is an error + rust_unreachable (); + + return LBP_PLUS; + } +} + +// Parses an arithmetic or logical expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_arithmetic_or_logical_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::ArithmeticOrLogicalExpr::ExprType expr_type, + ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_arithmetic_or_logical_expr (expr_type), + AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + expr_type, locus)); +} + +// Parses a binary addition expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_plus_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PLUS, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::ADD, locus)); +} + +// Parses a binary subtraction expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_minus_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MINUS, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::SUBTRACT, + locus)); +} + +// Parses a binary multiplication expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_mult_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MUL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::MULTIPLY, + locus)); +} + +// Parses a binary division expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_div_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DIV, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::DIVIDE, + locus)); +} + +// Parses a binary modulo expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_mod_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MOD, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::MODULUS, + locus)); +} + +/* Parses a binary bitwise (or eager logical) and expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_and_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_AMP, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_AND, + locus)); +} + +/* Parses a binary bitwise (or eager logical) or expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_or_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PIPE, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_OR, + locus)); +} + +/* Parses a binary bitwise (or eager logical) xor expression (with Pratt + * parsing). */ +template +std::unique_ptr +Parser::parse_bitwise_xor_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_CARET, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::BITWISE_XOR, + locus)); +} + +// Parses a binary left shift expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_left_shift_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_L_SHIFT, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::LEFT_SHIFT, + locus)); +} + +// Parses a binary right shift expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_right_shift_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_R_SHIFT, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), + ArithmeticOrLogicalOperator::RIGHT_SHIFT, + locus)); +} + +/* Returns the left binding power for the given ComparisonExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_comparison_expr (AST::ComparisonExpr::ExprType expr_type) +{ + switch (expr_type) + { + case ComparisonOperator::EQUAL: + return LBP_EQUAL; + case ComparisonOperator::NOT_EQUAL: + return LBP_NOT_EQUAL; + case ComparisonOperator::GREATER_THAN: + return LBP_GREATER_THAN; + case ComparisonOperator::LESS_THAN: + return LBP_SMALLER_THAN; + case ComparisonOperator::GREATER_OR_EQUAL: + return LBP_GREATER_EQUAL; + case ComparisonOperator::LESS_OR_EQUAL: + return LBP_SMALLER_EQUAL; + default: + // WTF? should not happen, this is an error + rust_unreachable (); + + return LBP_EQUAL; + } +} + +/* Parses a ComparisonExpr of given type and LBP. TODO find a way to only + * specify one and have the other looked up - e.g. specify ExprType and + * binding power is looked up? */ +template +std::unique_ptr +Parser::parse_comparison_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::ComparisonExpr::ExprType expr_type, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_comparison_expr (expr_type), AST::AttrVec (), + restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), expr_type, + locus)); +} + +// Parses a binary equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::EQUAL, locus)); +} + +// Parses a binary not equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_not_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_NOT_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::NOT_EQUAL, locus)); +} + +// Parses a binary greater than expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_greater_than_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_GREATER_THAN, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::GREATER_THAN, locus)); +} + +// Parses a binary less than expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_less_than_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_SMALLER_THAN, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::LESS_THAN, locus)); +} + +// Parses a binary greater than or equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_greater_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_GREATER_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::GREATER_OR_EQUAL, locus)); +} + +// Parses a binary less than or equal to expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_binary_less_equal_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_SMALLER_EQUAL, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::ComparisonExpr (std::move (left), std::move (right), + ComparisonOperator::LESS_OR_EQUAL, locus)); +} + +// Parses a binary lazy boolean or expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_lazy_or_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_LOGICAL_OR, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::LazyBooleanExpr (std::move (left), std::move (right), + LazyBooleanOperator::LOGICAL_OR, locus)); +} + +// Parses a binary lazy boolean and expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_lazy_and_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_LOGICAL_AND, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::LazyBooleanExpr (std::move (left), std::move (right), + LazyBooleanOperator::LOGICAL_AND, locus)); +} + +// Parses a pseudo-binary infix type cast expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_type_cast_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr expr_to_cast, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, + ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + return nullptr; + // FIXME: how do I get precedence put in here? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = expr_to_cast->get_locus (); + + return std::unique_ptr ( + new AST::TypeCastExpr (std::move (expr_to_cast), std::move (type), locus)); +} + +// Parses a binary assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::AssignmentExpr (std::move (left), std::move (right), + std::move (outer_attrs), locus)); +} + +/* Returns the left binding power for the given CompoundAssignmentExpr type. + * TODO make constexpr? Would that even do anything useful? */ +inline binding_powers +get_lbp_for_compound_assignment_expr ( + AST::CompoundAssignmentExpr::ExprType expr_type) +{ + switch (expr_type) + { + case CompoundAssignmentOperator::ADD: + return LBP_PLUS; + case CompoundAssignmentOperator::SUBTRACT: + return LBP_MINUS; + case CompoundAssignmentOperator::MULTIPLY: + return LBP_MUL; + case CompoundAssignmentOperator::DIVIDE: + return LBP_DIV; + case CompoundAssignmentOperator::MODULUS: + return LBP_MOD; + case CompoundAssignmentOperator::BITWISE_AND: + return LBP_AMP; + case CompoundAssignmentOperator::BITWISE_OR: + return LBP_PIPE; + case CompoundAssignmentOperator::BITWISE_XOR: + return LBP_CARET; + case CompoundAssignmentOperator::LEFT_SHIFT: + return LBP_L_SHIFT; + case CompoundAssignmentOperator::RIGHT_SHIFT: + return LBP_R_SHIFT; + default: + // WTF? should not happen, this is an error + rust_unreachable (); + + return LBP_PLUS; + } +} + +// Parses a compound assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_compound_assignment_expr ( + const_TokenPtr, std::unique_ptr left, AST::AttrVec, + AST::CompoundAssignmentExpr::ExprType expr_type, + ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (get_lbp_for_compound_assignment_expr (expr_type) - 1, + AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + expr_type, locus)); +} + +// Parses a binary add-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_plus_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PLUS_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::ADD, locus)); +} + +// Parses a binary minus-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_minus_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MINUS_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::SUBTRACT, + locus)); +} + +// Parses a binary multiplication-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_mult_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MULT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::MULTIPLY, + locus)); +} + +// Parses a binary division-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_div_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DIV_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::DIVIDE, + locus)); +} + +// Parses a binary modulo-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_mod_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_MOD_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::MODULUS, + locus)); +} + +// Parses a binary and-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_and_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_AMP_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_AND, + locus)); +} + +// Parses a binary or-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_or_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_PIPE_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_OR, + locus)); +} + +// Parses a binary xor-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_xor_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_CARET_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::BITWISE_XOR, + locus)); +} + +// Parses a binary left shift-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_left_shift_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_L_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::LEFT_SHIFT, + locus)); +} + +// Parses a binary right shift-assignment expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_right_shift_assig_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_R_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::CompoundAssignmentExpr (std::move (left), std::move (right), + CompoundAssignmentOperator::RIGHT_SHIFT, + locus)); +} + +// Parses a postfix unary await expression (with Pratt parsing). +template +std::unique_ptr +Parser::parse_await_expr ( + const_TokenPtr tok, std::unique_ptr expr_to_await, + AST::AttrVec outer_attrs) +{ + /* skip "await" identifier (as "." has already been consumed in + * parse_expression) this assumes that the identifier was already identified + * as await */ + if (!skip_token (IDENTIFIER)) + { + Error error (tok->get_locus (), "failed to skip % in await expr " + "- this is probably a deep issue"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // TODO: check inside async block in semantic analysis + location_t locus = expr_to_await->get_locus (); + + return std::unique_ptr ( + new AST::AwaitExpr (std::move (expr_to_await), std::move (outer_attrs), + locus)); +} + +/* Parses an exclusive range ('..') in left denotation position (i.e. + * RangeFromExpr or RangeFromToExpr). */ +template +std::unique_ptr +Parser::parse_led_range_exclusive_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // FIXME: this probably parses expressions accidently or whatever + // try parsing RHS (as tok has already been consumed in parse_expression) + // Can be nullptr, in which case it is a RangeFromExpr, otherwise a + // RangeFromToExpr. + restrictions.expr_can_be_null = true; + std::unique_ptr right + = parse_expr (LBP_DOT_DOT, AST::AttrVec (), restrictions); + + location_t locus = left->get_locus (); + + if (right == nullptr) + { + // range from expr + return std::unique_ptr ( + new AST::RangeFromExpr (std::move (left), locus)); + } + else + { + return std::unique_ptr ( + new AST::RangeFromToExpr (std::move (left), std::move (right), locus)); + } + // FIXME: make non-associative +} + +/* Parses an exclusive range ('..') in null denotation position (i.e. + * RangeToExpr or RangeFullExpr). */ +template +std::unique_ptr +Parser::parse_nud_range_exclusive_expr ( + const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) +{ + auto restrictions = ParseRestrictions (); + restrictions.expr_can_be_null = true; + + // FIXME: this probably parses expressions accidently or whatever + // try parsing RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DOT_DOT, AST::AttrVec (), restrictions); + + location_t locus = tok->get_locus (); + + if (right == nullptr) + { + // range from expr + return std::unique_ptr ( + new AST::RangeFullExpr (locus)); + } + else + { + return std::unique_ptr ( + new AST::RangeToExpr (std::move (right), locus)); + } + // FIXME: make non-associative +} + +// Parses a full binary range inclusive expression. +template +std::unique_ptr +Parser::parse_range_inclusive_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, + AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right + = parse_expr (LBP_DOT_DOT_EQ, AST::AttrVec (), restrictions); + if (right == nullptr) + return nullptr; + // FIXME: make non-associative + + // TODO: check types. actually, do so during semantic analysis + location_t locus = left->get_locus (); + + return std::unique_ptr ( + new AST::RangeFromToInclExpr (std::move (left), std::move (right), locus)); +} + +// Parses an inclusive range-to prefix unary expression. +template +std::unique_ptr +Parser::parse_range_to_inclusive_expr ( + const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) +{ + // parse RHS (as tok has already been consumed in parse_expression) + std::unique_ptr right = parse_expr (LBP_DOT_DOT_EQ); + if (right == nullptr) + return nullptr; + // FIXME: make non-associative + + // TODO: check types. actually, do so during semantic analysis + + return std::unique_ptr ( + new AST::RangeToInclExpr (std::move (right), tok->get_locus ())); +} + +// Parses a pseudo-binary infix tuple index expression. +template +std::unique_ptr +Parser::parse_tuple_index_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr tuple_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + // parse int literal (as token already skipped) + const_TokenPtr index_tok = expect_token (INT_LITERAL); + if (index_tok == nullptr) + { + return nullptr; + } + std::string index = index_tok->get_str (); + + // convert to integer + if (!index_tok->is_pure_decimal ()) + { + Error error (index_tok->get_locus (), + "tuple index should be a pure decimal literal"); + add_error (std::move (error)); + } + int index_int = atoi (index.c_str ()); + + location_t locus = tuple_expr->get_locus (); + + return std::unique_ptr ( + new AST::TupleIndexExpr (std::move (tuple_expr), index_int, + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix array (or slice) index expression. +template +std::unique_ptr +Parser::parse_index_expr ( + const_TokenPtr, std::unique_ptr array_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse RHS (as tok has already been consumed in parse_expression) + /*std::unique_ptr index_expr + = parse_expr (LBP_ARRAY_REF, AST::AttrVec (), + restrictions);*/ + // TODO: conceptually, should treat [] as brackets, so just parse all expr + std::unique_ptr index_expr = parse_expr (); + if (index_expr == nullptr) + return nullptr; + + // skip ']' at end of array + if (!skip_token (RIGHT_SQUARE)) + { + // skip somewhere? + return nullptr; + } + + // TODO: check types. actually, do so during semantic analysis + location_t locus = array_expr->get_locus (); + + return std::unique_ptr ( + new AST::ArrayIndexExpr (std::move (array_expr), std::move (index_expr), + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix struct field access expression. +template +std::unique_ptr +Parser::parse_field_access_expr ( + const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr struct_expr, + AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) +{ + /* get field name identifier (assume that this is a field access expr and + * not await, for instance) */ + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident{ident_tok}; + + location_t locus = struct_expr->get_locus (); + + // TODO: check types. actually, do so during semantic analysis + return std::unique_ptr ( + new AST::FieldAccessExpr (std::move (struct_expr), std::move (ident), + std::move (outer_attrs), locus)); +} + +// Parses a pseudo-binary infix method call expression. +template +std::unique_ptr +Parser::parse_method_call_expr ( + const_TokenPtr tok, std::unique_ptr receiver_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse path expr segment + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + Error error (tok->get_locus (), + "failed to parse path expr segment of method call expr"); + add_error (std::move (error)); + + return nullptr; + } + + // skip left parentheses + if (!skip_token (LEFT_PAREN)) + { + return nullptr; + } + + // parse method params (if they exist) + std::vector> params; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr param = parse_expr (); + if (param == nullptr) + { + Error error (t->get_locus (), + "failed to parse method param in method call"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip right paren + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // TODO: check types. actually do so in semantic analysis pass. + location_t locus = receiver_expr->get_locus (); + + return std::unique_ptr ( + new AST::MethodCallExpr (std::move (receiver_expr), std::move (segment), + std::move (params), std::move (outer_attrs), + locus)); +} + +// Parses a pseudo-binary infix function call expression. +template +std::unique_ptr +Parser::parse_function_call_expr ( + const_TokenPtr, std::unique_ptr function_expr, + AST::AttrVec outer_attrs, ParseRestrictions) +{ + // parse function params (if they exist) + std::vector> params; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr param = parse_expr (); + if (param == nullptr) + { + Error error (t->get_locus (), + "failed to parse function param in function call"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip ')' at end of param list + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return nullptr; + } + + // TODO: check types. actually, do so during semantic analysis + location_t locus = function_expr->get_locus (); + + return std::unique_ptr ( + new AST::CallExpr (std::move (function_expr), std::move (params), + std::move (outer_attrs), locus)); +} + +/* Parses a struct expr struct with a path in expression already parsed (but + * not + * '{' token). */ +template +std::unique_ptr +Parser::parse_struct_expr_struct_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs) +{ + // assume struct expr struct (as struct-enum disambiguation requires name + // lookup) again, make statement if final ';' + if (!skip_token (LEFT_CURLY)) + { + return nullptr; + } + + // parse inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + location_t path_locus = path.get_locus (); + switch (t->get_id ()) + { + case RIGHT_CURLY: + // struct with no body + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructExprStruct (std::move (path), std::move (inner_attrs), + std::move (outer_attrs), path_locus)); + case DOT_DOT: + /* technically this would give a struct base-only struct, but this + * algorithm should work too. As such, AST type not happening. */ + case IDENTIFIER: + case HASH: + case INT_LITERAL: + { + // struct with struct expr fields + + // parse struct expr fields + std::vector> fields; + + while (t->get_id () != RIGHT_CURLY && t->get_id () != DOT_DOT) + { + std::unique_ptr field + = parse_struct_expr_field (); + if (field == nullptr) + { + Error error (t->get_locus (), + "failed to parse struct (or enum) expr field"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG: + rust_debug ("struct/enum expr field validated to not be null"); + + fields.push_back (std::move (field)); + + // DEBUG: + rust_debug ("struct/enum expr field pushed back"); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // DEBUG: + rust_debug ("lack of comma detected in struct/enum expr " + "fields - break"); + break; + } + lexer.skip_token (); + + // DEBUG: + rust_debug ("struct/enum expr fields comma skipped "); + + t = lexer.peek_token (); + } + + // DEBUG: + rust_debug ("struct/enum expr about to parse struct base "); + + // parse struct base if it exists + AST::StructBase struct_base = AST::StructBase::error (); + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + location_t dot_dot_location = lexer.peek_token ()->get_locus (); + lexer.skip_token (); + + // parse required struct base expr + std::unique_ptr base_expr = parse_expr (); + if (base_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct base expression in struct " + "expression"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG: + rust_debug ("struct/enum expr - parsed and validated base expr"); + + struct_base + = AST::StructBase (std::move (base_expr), dot_dot_location); + + // DEBUG: + rust_debug ("assigned struct base to new struct base "); + } + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + // DEBUG: + rust_debug ( + "struct/enum expr skipped right curly - done and ready to return"); + + return std::unique_ptr ( + new AST::StructExprStructFields (std::move (path), std::move (fields), + path_locus, std::move (struct_base), + std::move (inner_attrs), + std::move (outer_attrs))); + } + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in struct (or enum) expression - " + "expected %<}%>, identifier, integer literal, or %<..%>", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a struct expr tuple with a path in expression already parsed (but + * not + * '(' token). + * FIXME: this currently outputs a call expr, as they cannot be disambiguated. + * A better solution would be to just get this to call that function directly. + * */ +template +std::unique_ptr +Parser::parse_struct_expr_tuple_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs) +{ + if (!skip_token (LEFT_PAREN)) + { + return nullptr; + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + std::vector> exprs; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + // parse expression (required) + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (t->get_locus (), "failed to parse expression in " + "struct (or enum) expression tuple"); + add_error (std::move (error)); + + return nullptr; + } + exprs.push_back (std::move (expr)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + location_t path_locus = path.get_locus (); + + auto pathExpr = std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + + return std::unique_ptr ( + new AST::CallExpr (std::move (pathExpr), std::move (exprs), + std::move (outer_attrs), path_locus)); +} + +// Parses a closure expression with pratt parsing (from null denotation). +template +std::unique_ptr +Parser::parse_closure_expr_pratt (const_TokenPtr tok, + AST::AttrVec outer_attrs) +{ + // TODO: does this need pratt parsing (for precedence)? probably not, but + // idk + location_t locus = tok->get_locus (); + bool has_move = false; + if (tok->get_id () == MOVE) + { + has_move = true; + tok = lexer.peek_token (); + lexer.skip_token (); + // skip token and reassign + } + + // handle parameter list + std::vector params; + + switch (tok->get_id ()) + { + case OR: + // no parameters, don't skip token + break; + case PIPE: + { + // actually may have parameters + // don't skip token + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != PIPE) + { + AST::ClosureParam param = parse_closure_param (); + if (param.is_error ()) + { + // TODO is this really an error? + Error error (t->get_locus (), "could not parse closure param"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + { + if (lexer.peek_token ()->get_id () == OR) + lexer.split_current_token (PIPE, PIPE); + // not an error but means param list is done + break; + } + // skip comma + lexer.skip_token (); + + if (lexer.peek_token ()->get_id () == OR) + lexer.split_current_token (PIPE, PIPE); + + t = lexer.peek_token (); + } + + if (!skip_token (PIPE)) + { + return nullptr; + } + break; + } + default: + add_error (Error (tok->get_locus (), + "unexpected token %qs in closure expression - expected " + "%<|%> or %<||%>", + tok->get_token_description ())); + + // skip somewhere? + return nullptr; + } + + // again branch based on next token + tok = lexer.peek_token (); + if (tok->get_id () == RETURN_TYPE) + { + // must be return type closure with block expr + + // skip "return type" token + lexer.skip_token (); + + // parse actual type, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + // error + Error error (tok->get_locus (), "failed to parse type for closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + // parse block expr, which is required + std::unique_ptr block = parse_block_expr (); + if (block == nullptr) + { + // error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse block expr in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInnerTyped (std::move (type), std::move (block), + std::move (params), locus, has_move, + std::move (outer_attrs))); + } + else + { + // must be expr-only closure + + // parse expr, which is required + std::unique_ptr expr = parse_expr (); + if (expr == nullptr) + { + Error error (tok->get_locus (), + "failed to parse expression in closure"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ClosureExprInner (std::move (expr), std::move (params), locus, + has_move, std::move (outer_attrs))); + } +} + +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl-lexer.cc b/gcc/rust/parse/rust-parse-impl-lexer.cc index fec91e8b19d..9943a606d71 100644 --- a/gcc/rust/parse/rust-parse-impl-lexer.cc +++ b/gcc/rust/parse/rust-parse-impl-lexer.cc @@ -16,7 +16,7 @@ // along with GCC; see the file COPYING3. If not see // . -#include "rust-parse-impl.h" +#include "rust-parse-impl.hxx" namespace Rust { diff --git a/gcc/rust/parse/rust-parse-impl-macro.cc b/gcc/rust/parse/rust-parse-impl-macro.cc index e632887fa72..7ac2d31102c 100644 --- a/gcc/rust/parse/rust-parse-impl-macro.cc +++ b/gcc/rust/parse/rust-parse-impl-macro.cc @@ -16,7 +16,7 @@ // along with GCC; see the file COPYING3. If not see // . -#include "rust-parse-impl.h" +#include "rust-parse-impl.hxx" #include "rust-macro-invoc-lexer.h" namespace Rust { diff --git a/gcc/rust/parse/rust-parse-impl-macro.hxx b/gcc/rust/parse/rust-parse-impl-macro.hxx new file mode 100644 index 00000000000..bc26e6fb935 --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl-macro.hxx @@ -0,0 +1,621 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl.h + * This is also the reason why there are no include guards. */ + +#include "rust-parse.h" + +namespace Rust { + +// Parses a semi-coloned (except for full block) macro invocation item. +template +std::unique_ptr +Parser::parse_macro_invocation_semi ( + AST::AttrVec outer_attrs) +{ + location_t macro_locus = lexer.peek_token ()->get_locus (); + auto path = parse_simple_path (); + if (!path) + return nullptr; + + if (!skip_token (EXCLAM)) + { + // skip after somewhere? + return nullptr; + } + + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // Map tokens to DelimType + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs - expecting delimiters (for a " + "macro invocation semi body)", + t->get_token_description ())); + + return nullptr; + } + location_t tok_tree_locus = t->get_locus (); + lexer.skip_token (); + + // parse actual token trees + std::vector> token_trees; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees.push_back (std::move (delim_open)); + + t = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type) + && t->get_id () != END_OF_FILE) + { + auto tree = parse_token_tree (); + if (!tree) + return nullptr; + + token_trees.push_back (std::move (tree.value ())); + + t = lexer.peek_token (); + } + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees.push_back (std::move (delim_close)); + + AST::DelimTokenTree delim_tok_tree (delim_type, std::move (token_trees), + tok_tree_locus); + AST::MacroInvocData invoc_data (std::move (path.value ()), + std::move (delim_tok_tree)); + + // parse end delimiters + t = lexer.peek_token (); + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + if (delim_type != AST::CURLY) + { + // skip semicolon at end of non-curly macro invocation semis + if (!skip_token (SEMICOLON)) + { + // as this is the end, allow recovery (probably) - may change + + return AST::MacroInvocation::Regular (std::move (invoc_data), + std::move (outer_attrs), + macro_locus, true); + } + } + + // DEBUG: + rust_debug ("skipped token is '%s', next token (current peek) is '%s'", + t->get_token_description (), + lexer.peek_token ()->get_token_description ()); + + return AST::MacroInvocation::Regular (std::move (invoc_data), + std::move (outer_attrs), + macro_locus, true); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a macro invocation semi)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + /* return empty macro invocation despite possibly parsing mostly valid one + * - TODO is this a good idea? */ + return nullptr; + } +} + +// Parses a non-semicoloned macro invocation (i.e. as pattern or expression). +template +std::unique_ptr +Parser::parse_macro_invocation (AST::AttrVec outer_attrs) +{ + // parse macro path + auto macro_path = parse_simple_path (); + if (!macro_path) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro invocation path"); + add_error (std::move (error)); + + // skip? + return nullptr; + } + + if (!skip_token (EXCLAM)) + { + // skip after somewhere? + return nullptr; + } + + // parse internal delim token tree + auto delim_tok_tree = parse_delim_token_tree (); + if (!delim_tok_tree) + return nullptr; + + location_t macro_locus = macro_path->get_locus (); + + return AST::MacroInvocation::Regular ( + AST::MacroInvocData (std::move (macro_path.value ()), + std::move (delim_tok_tree.value ())), + std::move (outer_attrs), macro_locus); +} + +// Parses a macro rule definition - does not parse semicolons. +template +AST::MacroRule +Parser::parse_macro_rule () +{ + location_t locus = lexer.peek_token ()->get_locus (); + + // parse macro matcher + AST::MacroMatcher matcher = parse_macro_matcher (); + + if (matcher.is_error ()) + return AST::MacroRule::create_error (locus); + + if (!skip_token (MATCH_ARROW)) + { + // skip after somewhere? + return AST::MacroRule::create_error (locus); + } + + // parse transcriber (this is just a delim token tree) + location_t token_tree_loc = lexer.peek_token ()->get_locus (); + auto delim_token_tree = parse_delim_token_tree (); + if (!delim_token_tree) + return AST::MacroRule::create_error (token_tree_loc); + + AST::MacroTranscriber transcriber (delim_token_tree.value (), token_tree_loc); + + return AST::MacroRule (std::move (matcher), std::move (transcriber), locus); +} + +// Parses a macro matcher (part of a macro rule definition). +template +AST::MacroMatcher +Parser::parse_macro_matcher () +{ + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // DEBUG + rust_debug ("begun parsing macro matcher"); + + // Map tokens to DelimType + const_TokenPtr t = lexer.peek_token (); + location_t locus = t->get_locus (); + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error ( + t->get_locus (), + "unexpected token %qs - expecting delimiters (for a macro matcher)", + t->get_token_description ())); + + return AST::MacroMatcher::create_error (t->get_locus ()); + } + lexer.skip_token (); + + // parse actual macro matches + std::vector> matches; + // Set of possible preceding macro matches to make sure follow-set + // restrictions are respected. + // TODO: Consider using std::reference_wrapper instead of raw pointers? + std::vector last_matches; + + t = lexer.peek_token (); + // parse token trees until the initial delimiter token is found again + while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) + { + std::unique_ptr match = parse_macro_match (); + + if (match == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse macro match for macro matcher - found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + return AST::MacroMatcher::create_error (t->get_locus ()); + } + + if (matches.size () > 0) + { + const auto *last_match = matches.back ().get (); + + // We want to check if we are dealing with a zeroable repetition + bool zeroable = false; + if (last_match->get_macro_match_type () + == AST::MacroMatch::MacroMatchType::Repetition) + { + auto repetition + = static_cast (last_match); + + if (repetition->get_op () + != AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE) + zeroable = true; + } + + if (!zeroable) + last_matches.clear (); + + last_matches.emplace_back (last_match); + + for (auto last : last_matches) + if (!is_match_compatible (*last, *match)) + return AST::MacroMatcher::create_error ( + match->get_match_locus ()); + } + + matches.push_back (std::move (match)); + + // DEBUG + rust_debug ("pushed back a match in macro matcher"); + + t = lexer.peek_token (); + } + + // parse end delimiters + t = lexer.peek_token (); + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + return AST::MacroMatcher (delim_type, std::move (matches), locus); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a macro matcher)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + /* return error macro matcher despite possibly parsing mostly correct one? + * TODO is this the best idea? */ + return AST::MacroMatcher::create_error (t->get_locus ()); + } +} + +// Parses a macro match (syntax match inside a matcher in a macro rule). +template +std::unique_ptr +Parser::parse_macro_match () +{ + // branch based on token available + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + case LEFT_SQUARE: + case LEFT_CURLY: + { + // must be macro matcher as delimited + AST::MacroMatcher matcher = parse_macro_matcher (); + if (matcher.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro matcher in macro match"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::MacroMatcher (std::move (matcher))); + } + case DOLLAR_SIGN: + { + // have to do more lookahead to determine if fragment or repetition + const_TokenPtr t2 = lexer.peek_token (1); + switch (t2->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + // macro fragment + return parse_macro_match_fragment (); + case LEFT_PAREN: + // macro repetition + return parse_macro_match_repetition (); + default: + if (token_id_is_keyword (t2->get_id ()) && t2->get_id () != CRATE) + { + // keyword as macro fragment + return parse_macro_match_fragment (); + } + else + { + // error: unrecognised + add_error (Error ( + t2->get_locus (), + "unrecognised token combination %<$%s%> at start of " + "macro match - did you mean %<$identifier%> or %<$(%>?", + t2->get_token_description ())); + + // skip somewhere? + return nullptr; + } + } + } + case RIGHT_PAREN: + case RIGHT_SQUARE: + case RIGHT_CURLY: + // not allowed + add_error (Error ( + t->get_locus (), + "closing delimiters like %qs are not allowed at the start of a macro " + "match", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + default: + // just the token + lexer.skip_token (); + return std::unique_ptr (new AST::Token (std::move (t))); + } +} + +// Parses a fragment macro match. +template +std::unique_ptr +Parser::parse_macro_match_fragment () +{ + location_t fragment_locus = lexer.peek_token ()->get_locus (); + skip_token (DOLLAR_SIGN); + + Identifier ident; + auto identifier = lexer.peek_token (); + if (identifier->get_id () == UNDERSCORE) + ident = {Values::Keywords::UNDERSCORE, identifier->get_locus ()}; + else + ident = {identifier}; + + if (ident.empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "missing identifier in macro match fragment"); + add_error (std::move (error)); + + return nullptr; + } + skip_token (identifier->get_id ()); + + if (!skip_token (COLON)) + { + // skip after somewhere? + return nullptr; + } + + // get MacroFragSpec for macro + const_TokenPtr t = expect_token (IDENTIFIER); + if (t == nullptr) + return nullptr; + + AST::MacroFragSpec frag + = AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ()); + if (frag.is_error ()) + { + Error error (t->get_locus (), + "invalid fragment specifier %qs in fragment macro match", + t->get_str ().c_str ()); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::MacroMatchFragment (std::move (ident), frag, fragment_locus)); +} + +// Parses a repetition macro match. +template +std::unique_ptr +Parser::parse_macro_match_repetition () +{ + skip_token (DOLLAR_SIGN); + skip_token (LEFT_PAREN); + + std::vector> matches; + + // parse required first macro match + std::unique_ptr initial_match = parse_macro_match (); + if (initial_match == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "could not parse required first macro match in macro match repetition"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + matches.push_back (std::move (initial_match)); + + // parse optional later macro matches + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr match = parse_macro_match (); + + if (match == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro match in macro match repetition"); + add_error (std::move (error)); + + return nullptr; + } + + matches.push_back (std::move (match)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + t = lexer.peek_token (); + // see if separator token exists + std::unique_ptr separator = nullptr; + switch (t->get_id ()) + { + // repetition operators + case ASTERISK: + case PLUS: + case QUESTION_MARK: + // delimiters + case LEFT_PAREN: + case LEFT_CURLY: + case LEFT_SQUARE: + case RIGHT_PAREN: + case RIGHT_CURLY: + case RIGHT_SQUARE: + // separator does not exist, so still null and don't skip token + break; + default: + // separator does exist + separator = std::unique_ptr (new AST::Token (std::move (t))); + lexer.skip_token (); + break; + } + + // parse repetition operator + t = lexer.peek_token (); + AST::MacroMatchRepetition::MacroRepOp op = AST::MacroMatchRepetition::NONE; + switch (t->get_id ()) + { + case ASTERISK: + op = AST::MacroMatchRepetition::ANY; + lexer.skip_token (); + break; + case PLUS: + op = AST::MacroMatchRepetition::ONE_OR_MORE; + lexer.skip_token (); + break; + case QUESTION_MARK: + op = AST::MacroMatchRepetition::ZERO_OR_ONE; + lexer.skip_token (); + + if (separator != nullptr) + { + add_error ( + Error (separator->get_locus (), + "the % macro repetition operator does not take a " + "separator")); + separator = nullptr; + } + + break; + default: + add_error ( + Error (t->get_locus (), + "expected macro repetition operator (%<*%>, %<+%>, or %) in " + "macro match - found %qs", + t->get_token_description ())); + + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::MacroMatchRepetition (std::move (matches), op, + std::move (separator), t->get_locus ())); +} + +/* Parses a macro invocation with a path in expression already parsed (but not + * '!' token). */ +template +std::unique_ptr +Parser::parse_macro_invocation_partial ( + AST::PathInExpression path, AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + // macro invocation + if (!skip_token (EXCLAM)) + { + return nullptr; + } + + // convert PathInExpression to SimplePath - if this isn't possible, error + AST::SimplePath converted_path = path.as_simple_path (); + if (converted_path.is_empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse simple path in macro invocation"); + add_error (std::move (error)); + + return nullptr; + } + + auto tok_tree = parse_delim_token_tree (); + if (!tok_tree) + return nullptr; + + rust_debug ("successfully parsed macro invocation (via partial)"); + + location_t macro_locus = converted_path.get_locus (); + + return AST::MacroInvocation::Regular ( + AST::MacroInvocData (std::move (converted_path), + std::move (tok_tree.value ())), + std::move (outer_attrs), macro_locus); +} + +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl-path.hxx b/gcc/rust/parse/rust-parse-impl-path.hxx new file mode 100644 index 00000000000..41b220e49a7 --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl-path.hxx @@ -0,0 +1,679 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl.h + * This is also the reason why there are no include guards. */ + +#include "rust-parse.h" + +namespace Rust { + +// Parses a SimplePath AST node, if it exists. Does nothing otherwise. +template +tl::expected +Parser::parse_simple_path () +{ + bool has_opening_scope_resolution = false; + location_t locus = UNKNOWN_LOCATION; + + using Parse::Utils::is_simple_path_segment; + + // don't parse anything if not a path upfront + if (!is_simple_path_segment (lexer.peek_token ()->get_id ()) + && !is_simple_path_segment (lexer.peek_token (1)->get_id ())) + return Parse::Error::SimplePath::make_malformed (); + + /* Checks for opening scope resolution (i.e. global scope fully-qualified + * path) */ + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + has_opening_scope_resolution = true; + + locus = lexer.peek_token ()->get_locus (); + + lexer.skip_token (); + } + + // Parse single required simple path segment + auto segment = parse_simple_path_segment (); + + if (!segment) + return Parse::Error::SimplePath::make_malformed (); + + // get location if not gotten already + if (locus == UNKNOWN_LOCATION) + locus = segment->get_locus (); + + std::vector segments; + segments.push_back (std::move (segment.value ())); + + // Parse all other simple path segments + while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + auto new_segment = parse_simple_path_segment (1); + + using Error = Parse::Error::SimplePathSegment::Kind; + // Return path as currently constructed if segment in error state. + if (!new_segment) + { + if (new_segment.error ().kind == Error::INVALID_SIMPLE_PATH_TOKEN) + break; /* Could be end of path */ + else /* Any other error is an hard error */ + return Parse::Error::SimplePath::make_malformed (); + } + + segments.push_back (std::move (new_segment.value ())); + } + + return AST::SimplePath (std::move (segments), has_opening_scope_resolution, + locus); + /* TODO: now that is_simple_path_segment exists, could probably start + * actually making errors upon parse failure of segments and whatever */ +} + +/* Parses a single SimplePathSegment (does not handle the scope resolution + * operators) + * Starts parsing at an offset of base_peek */ +template +tl::expected +Parser::parse_simple_path_segment (int base_peek) +{ + using namespace Values; + const_TokenPtr t = lexer.peek_token (base_peek); + switch (t->get_id ()) + { + case IDENTIFIER: + lexer.skip_token (base_peek); + + return AST::SimplePathSegment (t->get_str (), t->get_locus ()); + case SUPER: + lexer.skip_token (base_peek); + + return AST::SimplePathSegment (Keywords::SUPER, t->get_locus ()); + case SELF: + lexer.skip_token (base_peek); + + return AST::SimplePathSegment (Keywords::SELF, t->get_locus ()); + case CRATE: + lexer.skip_token (base_peek); + + return AST::SimplePathSegment (Keywords::CRATE, t->get_locus ()); + case DOLLAR_SIGN: + if (lexer.peek_token (base_peek + 1)->get_id () == CRATE) + { + lexer.skip_token (base_peek + 1); + + return AST::SimplePathSegment ("$crate", t->get_locus ()); + } + gcc_fallthrough (); + default: + // do nothing but inactivates warning from gcc when compiling + /* could put the rust_error_at thing here but fallthrough (from failing + * $crate condition) isn't completely obvious if it is. */ + + return Parse::Error::SimplePathSegment::make_invalid_token_or_path_end (); + } + rust_unreachable (); +} + +// Parses a PathIdentSegment - an identifier segment of a non-SimplePath path. +template +tl::expected +Parser::parse_path_ident_segment () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + lexer.skip_token (); + + return AST::PathIdentSegment (t->get_str (), t->get_locus ()); + case SUPER: + lexer.skip_token (); + + return AST::PathIdentSegment (Values::Keywords::SUPER, t->get_locus ()); + case SELF: + lexer.skip_token (); + + return AST::PathIdentSegment (Values::Keywords::SELF, t->get_locus ()); + case SELF_ALIAS: + lexer.skip_token (); + + return AST::PathIdentSegment (Values::Keywords::SELF_ALIAS, + t->get_locus ()); + case CRATE: + lexer.skip_token (); + + return AST::PathIdentSegment (Values::Keywords::CRATE, t->get_locus ()); + case DOLLAR_SIGN: + if (lexer.peek_token (1)->get_id () == CRATE) + { + lexer.skip_token (1); + + return AST::PathIdentSegment ("$crate", t->get_locus ()); + } + gcc_fallthrough (); + default: + /* do nothing but inactivates warning from gcc when compiling + * could put the error_at thing here but fallthrough (from failing $crate + * condition) isn't completely obvious if it is. */ + + // test prevent error + return Parse::Error::PathIdentSegment::make_invalid_token (); + } + rust_unreachable (); +} + +// Parses a type path. +template +AST::TypePath +Parser::parse_type_path () +{ + bool has_opening_scope_resolution = false; + location_t locus = lexer.peek_token ()->get_locus (); + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + has_opening_scope_resolution = true; + lexer.skip_token (); + } + + // create segment vector + std::vector> segments; + + // parse required initial segment + std::unique_ptr initial_segment + = parse_type_path_segment (); + if (initial_segment == nullptr) + { + // skip after somewhere? + // don't necessarily throw error but yeah + return AST::TypePath::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + std::unique_ptr segment + = parse_type_path_segment (); + if (segment == nullptr) + { + // skip after somewhere? + Error error (t->get_locus (), "could not parse type path segment"); + add_error (std::move (error)); + + return AST::TypePath::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + return AST::TypePath (std::move (segments), locus, + has_opening_scope_resolution); +} + +/* Parses a single type path segment (not including opening scope resolution, + * but includes any internal ones). Includes generic args or type path + * functions too. */ +template +std::unique_ptr +Parser::parse_type_path_segment () +{ + location_t locus = lexer.peek_token ()->get_locus (); + // parse ident segment part + auto ident_segment_res = parse_path_ident_segment (); + if (!ident_segment_res) + { + // not necessarily an error + return nullptr; + } + auto ident_segment = ident_segment_res.value (); + + /* lookahead to determine if variants exist - only consume scope resolution + * then */ + bool has_separating_scope_resolution = false; + const_TokenPtr next = lexer.peek_token (1); + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION + && (next->get_id () == LEFT_ANGLE || next->get_id () == LEFT_PAREN)) + { + has_separating_scope_resolution = true; + lexer.skip_token (); + } + + // branch into variants on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_SHIFT: + case LEFT_ANGLE: + { + // parse generic args + AST::GenericArgs generic_args = parse_path_generic_args (); + + return std::unique_ptr ( + new AST::TypePathSegmentGeneric (std::move (ident_segment), + has_separating_scope_resolution, + std::move (generic_args), locus)); + } + case LEFT_PAREN: + { + // parse type path function + AST::TypePathFunction type_path_function + = parse_type_path_function (locus); + + if (type_path_function.is_error ()) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::TypePathSegmentFunction (std::move (ident_segment), + has_separating_scope_resolution, + std::move (type_path_function), + locus)); + } + default: + // neither of them + return std::unique_ptr ( + new AST::TypePathSegment (std::move (ident_segment), + has_separating_scope_resolution, locus)); + } + rust_unreachable (); +} + +// Parses a function call representation inside a type path. +template +AST::TypePathFunction +Parser::parse_type_path_function (location_t id_location) +{ + if (!skip_token (LEFT_PAREN)) + { + // skip somewhere? + return AST::TypePathFunction::create_error (); + } + + // parse function inputs + std::vector> inputs; + + while (lexer.peek_token ()->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + /* this is an error as there should've been a ')' there if there + * wasn't a type */ + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse type in parameters of type path function"); + add_error (std::move (error)); + + // skip somewhere? + return AST::TypePathFunction::create_error (); + } + + inputs.push_back (std::move (type)); + + // skip commas, including trailing commas + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return AST::TypePathFunction::create_error (); + } + + // parse optional return type + std::unique_ptr return_type = parse_function_return_type (); + + inputs.shrink_to_fit (); + return AST::TypePathFunction (std::move (inputs), id_location, + std::move (return_type)); +} + +// Parses a path inside an expression that allows generic arguments. +template +AST::PathInExpression +Parser::parse_path_in_expression () +{ + location_t locus = UNKNOWN_LOCATION; + bool has_opening_scope_resolution = false; + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + has_opening_scope_resolution = true; + + locus = lexer.peek_token ()->get_locus (); + + lexer.skip_token (); + } + + // create segment vector + std::vector segments; + + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + } + + // parse required initial segment + AST::PathExprSegment initial_segment = parse_path_expr_segment (); + if (initial_segment.is_error ()) + { + // skip after somewhere? + // don't necessarily throw error but yeah + return AST::PathInExpression::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + // skip after somewhere? + Error error (t->get_locus (), + "could not parse path expression segment"); + add_error (std::move (error)); + + return AST::PathInExpression::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + return AST::PathInExpression (std::move (segments), {}, locus, + has_opening_scope_resolution); +} + +/* Parses a single path in expression path segment (including generic + * arguments). */ +template +AST::PathExprSegment +Parser::parse_path_expr_segment () +{ + location_t locus = lexer.peek_token ()->get_locus (); + // parse ident segment + auto ident_result = parse_path_ident_segment (); + if (!ident_result) + { + // not necessarily an error? + return AST::PathExprSegment::create_error (); + } + auto ident = ident_result.value (); + + // parse generic args (and turbofish), if they exist + /* use lookahead to determine if they actually exist (don't want to + * accidently parse over next ident segment) */ + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION + && (lexer.peek_token (1)->get_id () == LEFT_ANGLE + || lexer.peek_token (1)->get_id () == LEFT_SHIFT)) + { + // skip scope resolution + lexer.skip_token (); + + // Let parse_path_generic_args split "<<" tokens + AST::GenericArgs generic_args = parse_path_generic_args (); + + return AST::PathExprSegment (std::move (ident), locus, + std::move (generic_args)); + } + + // return a generic parameter-less expr segment if not found + return AST::PathExprSegment (std::move (ident), locus); +} + +/* Parses a fully qualified path in expression (i.e. a pattern). FIXME does + * not parse outer attrs. */ +template +AST::QualifiedPathInExpression +Parser::parse_qualified_path_in_expression ( + location_t pratt_parsed_loc) +{ + /* Note: the Rust grammar is defined in such a way that it is impossible to + * determine whether a prospective qualified path is a + * QualifiedPathInExpression or QualifiedPathInType in all cases by the + * rules themselves (the only possible difference is a TypePathSegment with + * function, and lookahead to find this is too difficult). However, as this + * is a pattern and QualifiedPathInType is a type, I believe it that their + * construction will not be confused (due to rules regarding patterns vs + * types). + * As such, this function will not attempt to minimise errors created by + * their confusion. */ + + // parse the qualified path type (required) + AST::QualifiedPathType qual_path_type + = parse_qualified_path_type (pratt_parsed_loc); + if (qual_path_type.is_error ()) + { + // TODO: should this create a parse error? + return AST::QualifiedPathInExpression::create_error (); + } + location_t locus = qual_path_type.get_locus (); + + // parse path segments + std::vector segments; + + // parse initial required segment + if (!expect_token (SCOPE_RESOLUTION)) + { + // skip after somewhere? + + return AST::QualifiedPathInExpression::create_error (); + } + AST::PathExprSegment initial_segment = parse_path_expr_segment (); + if (initial_segment.is_error ()) + { + // skip after somewhere? + Error error (lexer.peek_token ()->get_locus (), + "required initial path expression segment in " + "qualified path in expression could not be parsed"); + add_error (std::move (error)); + + return AST::QualifiedPathInExpression::create_error (); + } + segments.push_back (std::move (initial_segment)); + + // parse optional segments (as long as scope resolution operator exists) + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + AST::PathExprSegment segment = parse_path_expr_segment (); + if (segment.is_error ()) + { + // skip after somewhere? + Error error (t->get_locus (), + "could not parse path expression segment in qualified " + "path in expression"); + add_error (std::move (error)); + + return AST::QualifiedPathInExpression::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + // FIXME: outer attr parsing + return AST::QualifiedPathInExpression (std::move (qual_path_type), + std::move (segments), {}, locus); +} + +// Parses the type syntactical construction at the start of a qualified path. +template +AST::QualifiedPathType +Parser::parse_qualified_path_type ( + location_t pratt_parsed_loc) +{ + location_t locus = pratt_parsed_loc; + /* TODO: should this actually be error? is there anywhere where this could + * be valid? */ + if (locus == UNKNOWN_LOCATION) + { + locus = lexer.peek_token ()->get_locus (); + + if (lexer.peek_token ()->get_id () == LEFT_SHIFT) + lexer.split_current_token (LEFT_ANGLE, LEFT_ANGLE); + + // skip after somewhere? + if (!skip_token (LEFT_ANGLE)) + return AST::QualifiedPathType::create_error (); + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in qualified path type"); + add_error (std::move (error)); + + // skip somewhere? + return AST::QualifiedPathType::create_error (); + } + + // parse optional as clause + AST::TypePath as_type_path = AST::TypePath::create_error (); + if (lexer.peek_token ()->get_id () == AS) + { + lexer.skip_token (); + + // parse type path, which is required now + as_type_path = parse_type_path (); + if (as_type_path.is_error ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "could not parse type path in as clause in qualified path type"); + add_error (std::move (error)); + + // skip somewhere? + return AST::QualifiedPathType::create_error (); + } + } + + /* NOTE: should actually be a right-angle token, so + * skip_generics_right_angle shouldn't be required */ + if (!skip_token (RIGHT_ANGLE)) + { + // skip after somewhere? + return AST::QualifiedPathType::create_error (); + } + + return AST::QualifiedPathType (std::move (type), locus, + std::move (as_type_path)); +} + +// Parses a fully qualified path in type (i.e. a type). +template +AST::QualifiedPathInType +Parser::parse_qualified_path_in_type () +{ + location_t locus = lexer.peek_token ()->get_locus (); + // parse the qualified path type (required) + AST::QualifiedPathType qual_path_type = parse_qualified_path_type (); + if (qual_path_type.is_error ()) + { + // TODO: should this create a parse error? + return AST::QualifiedPathInType::create_error (); + } + + // parse initial required segment + if (!expect_token (SCOPE_RESOLUTION)) + { + // skip after somewhere? + + return AST::QualifiedPathInType::create_error (); + } + std::unique_ptr initial_segment + = parse_type_path_segment (); + if (initial_segment == nullptr) + { + // skip after somewhere? + Error error (lexer.peek_token ()->get_locus (), + "required initial type path segment in qualified path in " + "type could not be parsed"); + add_error (std::move (error)); + + return AST::QualifiedPathInType::create_error (); + } + + // parse optional segments (as long as scope resolution operator exists) + std::vector> segments; + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == SCOPE_RESOLUTION) + { + // skip scope resolution operator + lexer.skip_token (); + + // parse the actual segment - it is an error if it doesn't exist now + std::unique_ptr segment + = parse_type_path_segment (); + if (segment == nullptr) + { + // skip after somewhere? + Error error ( + t->get_locus (), + "could not parse type path segment in qualified path in type"); + add_error (std::move (error)); + + return AST::QualifiedPathInType::create_error (); + } + + segments.push_back (std::move (segment)); + + t = lexer.peek_token (); + } + + segments.shrink_to_fit (); + + return AST::QualifiedPathInType (std::move (qual_path_type), + std::move (initial_segment), + std::move (segments), locus); +} +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl-pattern.hxx b/gcc/rust/parse/rust-parse-impl-pattern.hxx new file mode 100644 index 00000000000..c46a630fb1d --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl-pattern.hxx @@ -0,0 +1,1253 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl.h + * This is also the reason why there are no include guards. */ + +#include "rust-parse.h" + +namespace Rust { + +template +std::unique_ptr +Parser::parse_pattern () +{ + location_t start_locus = lexer.peek_token ()->get_locus (); + + /* skip optional starting pipe */ + maybe_skip_token (PIPE); + + auto first = parse_pattern_no_alt (); + + if (lexer.peek_token ()->get_id () != PIPE) + /* no alternates */ + return first; + + std::vector> alts; + if (first != nullptr) + alts.push_back (std::move (first)); + + do + { + lexer.skip_token (); + auto follow = parse_pattern_no_alt (); + if (follow != nullptr) + alts.push_back (std::move (follow)); + } + + while (lexer.peek_token ()->get_id () == PIPE); + + if (alts.empty ()) + return nullptr; + + /* alternates */ + return std::unique_ptr ( + new AST::AltPattern (std::move (alts), start_locus)); +} + +// Parses a pattern without alternates ('|') +// (will further disambiguate any pattern). +template +std::unique_ptr +Parser::parse_pattern_no_alt () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case TRUE_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (Values::Keywords::TRUE_LITERAL, + AST::Literal::BOOL, t->get_locus (), + t->get_type_hint ())); + case FALSE_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (Values::Keywords::FALSE_LITERAL, + AST::Literal::BOOL, t->get_locus (), + t->get_type_hint ())); + case CHAR_LITERAL: + case BYTE_CHAR_LITERAL: + case INT_LITERAL: + case FLOAT_LITERAL: + return parse_literal_or_range_pattern (); + case STRING_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (t->get_str (), AST::Literal::STRING, + t->get_locus (), t->get_type_hint ())); + case BYTE_STRING_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (t->get_str (), AST::Literal::BYTE_STRING, + t->get_locus (), t->get_type_hint ())); + case RAW_STRING_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::LiteralPattern (t->get_str (), AST::Literal::RAW_STRING, + t->get_locus (), t->get_type_hint ())); + // raw string and raw byte string literals too if they are readded to + // lexer + case MINUS: + if (lexer.peek_token (1)->get_id () == INT_LITERAL) + { + return parse_literal_or_range_pattern (); + } + else if (lexer.peek_token (1)->get_id () == FLOAT_LITERAL) + { + return parse_literal_or_range_pattern (); + } + else + { + Error error (t->get_locus (), "unexpected token %<-%> in pattern - " + "did you forget an integer literal"); + add_error (std::move (error)); + + return nullptr; + } + case UNDERSCORE: + lexer.skip_token (); + return std::unique_ptr ( + new AST::WildcardPattern (t->get_locus ())); + case DOT_DOT: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RestPattern (t->get_locus ())); + case REF: + case MUT: + return parse_identifier_pattern (); + case IDENTIFIER: + /* if identifier with no scope resolution afterwards, identifier + * pattern. if scope resolution afterwards, path pattern (or range + * pattern or struct pattern or tuple struct pattern) or macro + * invocation */ + return parse_ident_leading_pattern (); + case AMP: + case LOGICAL_AND: + // reference pattern + return parse_reference_pattern (); + case LEFT_PAREN: + // tuple pattern or grouped pattern + return parse_grouped_or_tuple_pattern (); + case LEFT_SQUARE: + // slice pattern + return parse_slice_pattern (); + case LEFT_SHIFT: + case LEFT_ANGLE: + { + // qualified path in expression or qualified range pattern bound + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (); + + if (lexer.peek_token ()->get_id () == DOT_DOT_EQ + || lexer.peek_token ()->get_id () == ELLIPSIS + || lexer.peek_token ()->get_id () == DOT_DOT) + { + // qualified range pattern bound, so parse rest of range pattern + AST::RangeKind kind + = AST::tokenid_to_rangekind (lexer.peek_token ()->get_id ()); + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundQualPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower_bound), + std::move (upper_bound), kind, + t->get_locus ())); + } + else + { + // just qualified path in expression + return std::unique_ptr ( + new AST::QualifiedPathInExpression (std::move (path))); + } + } + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case SCOPE_RESOLUTION: + case DOLLAR_SIGN: + { + // path in expression or range pattern bound + AST::PathInExpression path = parse_path_in_expression (); + + const_TokenPtr next = lexer.peek_token (); + switch (next->get_id ()) + { + case DOT_DOT_EQ: + case DOT_DOT: + case ELLIPSIS: + { + // qualified range pattern bound, so parse rest of range pattern + AST::RangeKind kind = AST::tokenid_to_rangekind (next->get_id ()); + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower_bound), + std::move (upper_bound), kind, + next->get_locus ())); + } + case EXCLAM: + return parse_macro_invocation_partial (std::move (path), + AST::AttrVec ()); + case LEFT_PAREN: + { + // tuple struct + lexer.skip_token (); + + // parse items + std::unique_ptr items + = parse_tuple_struct_items (); + if (items == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), + std::move (items))); + } + case LEFT_CURLY: + { + // struct + lexer.skip_token (); + + // parse elements (optional) + AST::StructPatternElements elems = parse_struct_pattern_elems (); + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPattern (std::move (path), t->get_locus (), + std::move (elems))); + } + default: + // assume path in expression + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } + } + default: + add_error (Error (t->get_locus (), "unexpected token %qs in pattern", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a single or double reference pattern. +template +std::unique_ptr +Parser::parse_reference_pattern () +{ + // parse double or single ref + bool is_double_ref = false; + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case AMP: + // still false + lexer.skip_token (); + break; + case LOGICAL_AND: + is_double_ref = true; + lexer.skip_token (); + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in reference pattern", + t->get_token_description ())); + + return nullptr; + } + + // parse mut (if it exists) + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + is_mut = true; + lexer.skip_token (); + } + + // parse pattern to get reference of (required) + std::unique_ptr pattern = parse_pattern_no_alt (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in reference pattern"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ReferencePattern (std::move (pattern), is_mut, is_double_ref, + t->get_locus ())); +} + +/* Parses a grouped pattern or tuple pattern. Prefers grouped over tuple if + * only a single element with no commas. */ +template +std::unique_ptr +Parser::parse_grouped_or_tuple_pattern () +{ + location_t paren_locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_PAREN); + + // detect '..' token (ranged with no lower range) + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + lexer.skip_token (); + + // parse new patterns while next token is a comma + std::vector> patterns; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if next token is ')' + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + break; + } + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse pattern inside ranged tuple pattern"); + add_error (std::move (error)); + + // skip somewhere? + return nullptr; + } + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + // skip somewhere? + return nullptr; + } + + // create tuple pattern items with only upper pattern items + std::unique_ptr items ( + new AST::TuplePatternItemsHasRest ( + std::vector> (), std::move (patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + else if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + skip_token (RIGHT_PAREN); + auto items = std::unique_ptr ( + new AST::TuplePatternItemsNoRest ( + std::vector> ())); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + + // parse initial pattern (required) + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in grouped or tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + + // branch on whether next token is a comma or not + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_PAREN: + // grouped pattern + lexer.skip_token (); + + return std::unique_ptr ( + new AST::GroupedPattern (std::move (initial_pattern), paren_locus)); + case COMMA: + { + // tuple pattern + lexer.skip_token (); + + // create vector of patterns + std::vector> patterns; + patterns.push_back (std::move (initial_pattern)); + + t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) + { + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + patterns.push_back (std::move (pattern)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + t = lexer.peek_token (); + if (t->get_id () == RIGHT_PAREN) + { + // non-ranged tuple pattern + lexer.skip_token (); + + std::unique_ptr items ( + new AST::TuplePatternItemsNoRest (std::move (patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + else if (t->get_id () == DOT_DOT) + { + // ranged tuple pattern + lexer.skip_token (); + + // parse upper patterns + std::vector> upper_patterns; + t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if end + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple pattern"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + std::unique_ptr items ( + new AST::TuplePatternItemsHasRest (std::move (patterns), + std::move (upper_patterns))); + return std::unique_ptr ( + new AST::TuplePattern (std::move (items), paren_locus)); + } + else + { + // some kind of error + Error error (t->get_locus (), + "failed to parse tuple pattern (probably) or maybe " + "grouped pattern"); + add_error (std::move (error)); + + return nullptr; + } + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in grouped or tuple pattern " + "after first pattern", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a slice pattern that can match arrays or slices. Parses the square + * brackets too. */ +template +std::unique_ptr +Parser::parse_slice_pattern () +{ + location_t square_locus = lexer.peek_token ()->get_locus (); + std::vector> patterns; + tl::optional>> upper_patterns + = tl::nullopt; + + // lambda function to determine which vector to push new patterns into + auto get_pattern_ref + = [&] () -> std::vector> & { + return upper_patterns.has_value () ? upper_patterns.value () : patterns; + }; + + skip_token (LEFT_SQUARE); + + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + { + skip_token (RIGHT_SQUARE); + std::unique_ptr items ( + new AST::SlicePatternItemsNoRest (std::move (patterns))); + return std::unique_ptr ( + new AST::SlicePattern (std::move (items), square_locus)); + } + + // parse initial pattern (required) + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + lexer.skip_token (); + upper_patterns = std::vector> (); + } + else + { + // Not a rest pattern `..`, parse normally + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse initial pattern in slice pattern"); + add_error (std::move (error)); + + return nullptr; + } + + patterns.push_back (std::move (initial_pattern)); + } + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if end bracket + if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) + break; + + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + if (upper_patterns.has_value ()) + { + // DOT_DOT has been parsed before + Error error (lexer.peek_token ()->get_locus (), "%s", + "`..` can only be used once per slice pattern"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns = std::vector> (); + lexer.skip_token (); + t = lexer.peek_token (); + continue; + } + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in slice pattern"); + add_error (std::move (error)); + + return nullptr; + } + get_pattern_ref ().push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_SQUARE)) + { + return nullptr; + } + + if (upper_patterns.has_value ()) + { + // Slice pattern with rest + std::unique_ptr items ( + new AST::SlicePatternItemsHasRest ( + std::move (patterns), std::move (upper_patterns.value ()))); + return std::unique_ptr ( + new AST::SlicePattern (std::move (items), square_locus)); + } + + // Rest-less slice pattern + std::unique_ptr items ( + new AST::SlicePatternItemsNoRest (std::move (patterns))); + return std::unique_ptr ( + new AST::SlicePattern (std::move (items), square_locus)); +} + +/* Parses an identifier pattern (pattern that binds a value matched to a + * variable). */ +template +std::unique_ptr +Parser::parse_identifier_pattern () +{ + location_t locus = lexer.peek_token ()->get_locus (); + + bool has_ref = false; + if (lexer.peek_token ()->get_id () == REF) + { + has_ref = true; + lexer.skip_token (); + + // DEBUG + rust_debug ("parsed ref in identifier pattern"); + } + + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + // parse identifier (required) + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + // skip somewhere? + return nullptr; + } + Identifier ident{ident_tok}; + + // DEBUG + rust_debug ("parsed identifier in identifier pattern"); + + // parse optional pattern binding thing + std::unique_ptr bind_pattern = nullptr; + if (lexer.peek_token ()->get_id () == PATTERN_BIND) + { + lexer.skip_token (); + + // parse required pattern to bind + bind_pattern = parse_pattern_no_alt (); + if (bind_pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern to bind in identifier pattern"); + add_error (std::move (error)); + + return nullptr; + } + } + + // DEBUG + rust_debug ("about to return identifier pattern"); + + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (ident), locus, has_ref, has_mut, + std::move (bind_pattern))); +} + +/* Parses a pattern that opens with an identifier. This includes identifier + * patterns, path patterns (and derivatives such as struct patterns, tuple + * struct patterns, and macro invocations), and ranges. */ +template +std::unique_ptr +Parser::parse_ident_leading_pattern () +{ + // ensure first token is actually identifier + const_TokenPtr initial_tok = lexer.peek_token (); + if (initial_tok->get_id () != IDENTIFIER) + { + return nullptr; + } + + // save initial identifier as it may be useful (but don't skip) + std::string initial_ident = initial_tok->get_str (); + + // parse next tokens as a PathInExpression + AST::PathInExpression path = parse_path_in_expression (); + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + return parse_macro_invocation_partial (std::move (path), AST::AttrVec ()); + case LEFT_PAREN: + { + // tuple struct + lexer.skip_token (); + + // DEBUG + rust_debug ("parsing tuple struct pattern"); + + // parse items + std::unique_ptr items + = parse_tuple_struct_items (); + if (items == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed tuple struct items"); + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed tuple struct pattern"); + + return std::unique_ptr ( + new AST::TupleStructPattern (std::move (path), std::move (items))); + } + case LEFT_CURLY: + { + // struct + lexer.skip_token (); + + // parse elements (optional) + AST::StructPatternElements elems = parse_struct_pattern_elems (); + + if (!skip_token (RIGHT_CURLY)) + { + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed struct pattern"); + + return std::unique_ptr ( + new AST::StructPattern (std::move (path), initial_tok->get_locus (), + std::move (elems))); + } + case DOT_DOT_EQ: + case DOT_DOT: + case ELLIPSIS: + { + // range + AST::RangeKind kind + = AST::tokenid_to_rangekind (lexer.peek_token ()->get_id ()); + + lexer.skip_token (); + + std::unique_ptr lower_bound ( + new AST::RangePatternBoundPath (std::move (path))); + std::unique_ptr upper_bound + = parse_range_pattern_bound (); + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower_bound), + std::move (upper_bound), kind, + t->get_locus ())); + } + case PATTERN_BIND: + { + // only allow on single-segment paths + if (path.is_single_segment ()) + { + // identifier with pattern bind + lexer.skip_token (); + + std::unique_ptr bind_pattern + = parse_pattern_no_alt (); + if (bind_pattern == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse pattern to bind to identifier pattern"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (initial_ident), + initial_tok->get_locus (), false, + false, std::move (bind_pattern))); + } + Error error ( + t->get_locus (), + "failed to parse pattern bind to a path, not an identifier"); + add_error (std::move (error)); + + return nullptr; + } + default: + // assume identifier if single segment + if (path.is_single_segment ()) + { + return std::unique_ptr ( + new AST::IdentifierPattern (std::move (initial_ident), + initial_tok->get_locus ())); + } + // return path otherwise + return std::unique_ptr ( + new AST::PathInExpression (std::move (path))); + } +} + +// Parses struct pattern elements if they exist. +template +AST::StructPatternElements +Parser::parse_struct_pattern_elems () +{ + std::vector> fields; + + AST::AttrVec etc_attrs; + bool has_rest = false; + + // try parsing struct pattern fields + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse etc (must be last in struct pattern, so breaks) + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + lexer.skip_token (); + etc_attrs = std::move (outer_attrs); + has_rest = true; + break; + } + + std::unique_ptr field + = parse_struct_pattern_field_partial (std::move (outer_attrs)); + if (field == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct pattern field"); + add_error (std::move (error)); + + // skip after somewhere? + return AST::StructPatternElements::create_empty (); + } + fields.push_back (std::move (field)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip comma + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (has_rest) + return AST::StructPatternElements (std::move (fields), + std::move (etc_attrs)); + else + return AST::StructPatternElements (std::move (fields)); +} + +/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or + * identifier). */ +template +std::unique_ptr +Parser::parse_struct_pattern_field () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + return parse_struct_pattern_field_partial (std::move (outer_attrs)); +} + +/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or + * identifier), with outer attributes passed in. */ +template +std::unique_ptr +Parser::parse_struct_pattern_field_partial ( + AST::AttrVec outer_attrs) +{ + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case INT_LITERAL: + { + // tuple index + std::string index_str = t->get_str (); + int index = atoi (index_str.c_str ()); + + lexer.skip_token (); + + if (!skip_token (COLON)) + { + return nullptr; + } + + // parse required pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error ( + t->get_locus (), + "failed to parse pattern in tuple index struct pattern field"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPatternFieldTuplePat (index, std::move (pattern), + std::move (outer_attrs), + t->get_locus ())); + } + case IDENTIFIER: + // identifier-pattern OR only identifier + // branch on next token + switch (lexer.peek_token (1)->get_id ()) + { + case COLON: + { + // identifier-pattern + Identifier ident{t}; + lexer.skip_token (); + + skip_token (COLON); + + // parse required pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in struct pattern field"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::StructPatternFieldIdentPat (std::move (ident), + std::move (pattern), + std::move (outer_attrs), + t->get_locus ())); + } + case COMMA: + case RIGHT_CURLY: + { + // identifier only + Identifier ident = {t}; + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructPatternFieldIdent (std::move (ident), false, false, + std::move (outer_attrs), + t->get_locus ())); + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in struct pattern field", + t->get_token_description ())); + + return nullptr; + } + case REF: + case MUT: + { + // only identifier + bool has_ref = false; + if (t->get_id () == REF) + { + has_ref = true; + lexer.skip_token (); + } + + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier ident{ident_tok}; + + return std::unique_ptr ( + new AST::StructPatternFieldIdent (std::move (ident), has_ref, has_mut, + std::move (outer_attrs), + t->get_locus ())); + } + default: + // not necessarily an error + return nullptr; + } +} + +/* Parses a literal pattern or range pattern. Assumes that literals passed in + * are valid range pattern bounds. Do not pass in paths in expressions, for + * instance. */ +template +std::unique_ptr +Parser::parse_literal_or_range_pattern () +{ + const_TokenPtr range_lower = lexer.peek_token (); + AST::Literal::LitType type = AST::Literal::STRING; + bool has_minus = false; + + // get lit type + switch (range_lower->get_id ()) + { + case CHAR_LITERAL: + type = AST::Literal::CHAR; + lexer.skip_token (); + break; + case BYTE_CHAR_LITERAL: + type = AST::Literal::BYTE; + lexer.skip_token (); + break; + case INT_LITERAL: + type = AST::Literal::INT; + lexer.skip_token (); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + lexer.skip_token (); + break; + case MINUS: + // branch on next token + range_lower = lexer.peek_token (1); + switch (range_lower->get_id ()) + { + case INT_LITERAL: + type = AST::Literal::INT; + has_minus = true; + lexer.skip_token (1); + break; + case FLOAT_LITERAL: + type = AST::Literal::FLOAT; + has_minus = true; + lexer.skip_token (1); + break; + default: + add_error (Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern " + "bound or literal after minus symbol", + range_lower->get_token_description ())); + + return nullptr; + } + break; + default: + add_error ( + Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern bound", + range_lower->get_token_description ())); + + return nullptr; + } + + const_TokenPtr next = lexer.peek_token (); + if (next->get_id () == DOT_DOT_EQ || next->get_id () == ELLIPSIS + || next->get_id () == DOT_DOT) + { + AST::RangeKind kind = AST::tokenid_to_rangekind (next->get_id ()); + // range pattern + lexer.skip_token (); + std::unique_ptr lower ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), type, + PrimitiveCoreType::CORETYPE_UNKNOWN), + range_lower->get_locus (), has_minus)); + + std::unique_ptr upper + = parse_range_pattern_bound (); + if (upper == nullptr) + { + Error error (next->get_locus (), + "failed to parse range pattern bound in range pattern"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::RangePattern (std::move (lower), std::move (upper), kind, + range_lower->get_locus ())); + } + else + { + // literal pattern + return std::unique_ptr ( + new AST::LiteralPattern (range_lower->get_str (), type, + range_lower->get_locus (), + range_lower->get_type_hint (), has_minus)); + } +} + +// Parses a range pattern bound (value only). +template +std::unique_ptr +Parser::parse_range_pattern_bound () +{ + const_TokenPtr range_lower = lexer.peek_token (); + location_t range_lower_locus = range_lower->get_locus (); + + // get lit type + switch (range_lower->get_id ()) + { + case CHAR_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::CHAR, + range_lower->get_type_hint ()), + range_lower_locus)); + case BYTE_CHAR_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::BYTE, + range_lower->get_type_hint ()), + range_lower_locus)); + case INT_LITERAL: + lexer.skip_token (); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::INT, + range_lower->get_type_hint ()), + range_lower_locus)); + case FLOAT_LITERAL: + lexer.skip_token (); + rust_debug ("warning: used deprecated float range pattern bound"); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, + range_lower->get_type_hint ()), + range_lower_locus)); + case MINUS: + // branch on next token + range_lower = lexer.peek_token (1); + switch (range_lower->get_id ()) + { + case INT_LITERAL: + lexer.skip_token (1); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::INT, + range_lower->get_type_hint ()), + range_lower_locus, true)); + case FLOAT_LITERAL: + lexer.skip_token (1); + rust_debug ("warning: used deprecated float range pattern bound"); + return std::unique_ptr ( + new AST::RangePatternBoundLiteral ( + AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, + range_lower->get_type_hint ()), + range_lower_locus, true)); + default: + add_error (Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern " + "bound after minus symbol", + range_lower->get_token_description ())); + + return nullptr; + } + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case SCOPE_RESOLUTION: + case DOLLAR_SIGN: + { + // path in expression + AST::PathInExpression path = parse_path_in_expression (); + if (path.is_error ()) + { + Error error ( + range_lower->get_locus (), + "failed to parse path in expression range pattern bound"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::RangePatternBoundPath (std::move (path))); + } + case LEFT_SHIFT: + case LEFT_ANGLE: + { + // qualified path in expression + AST::QualifiedPathInExpression path + = parse_qualified_path_in_expression (); + if (path.is_error ()) + { + Error error (range_lower->get_locus (), + "failed to parse qualified path in expression range " + "pattern bound"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::RangePatternBoundQualPath (std::move (path))); + } + default: + add_error ( + Error (range_lower->get_locus (), + "token type %qs cannot be parsed as range pattern bound", + range_lower->get_token_description ())); + + return nullptr; + } +} + +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl-proc-macro.cc b/gcc/rust/parse/rust-parse-impl-proc-macro.cc index 965c9146d1d..4960e06a3c6 100644 --- a/gcc/rust/parse/rust-parse-impl-proc-macro.cc +++ b/gcc/rust/parse/rust-parse-impl-proc-macro.cc @@ -16,7 +16,7 @@ // along with GCC; see the file COPYING3. If not see // . -#include "rust-parse-impl.h" +#include "rust-parse-impl.hxx" #include "rust-proc-macro-invoc-lexer.h" namespace Rust { diff --git a/gcc/rust/parse/rust-parse-impl-ttree.hxx b/gcc/rust/parse/rust-parse-impl-ttree.hxx new file mode 100644 index 00000000000..bb8d9aa1851 --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl-ttree.hxx @@ -0,0 +1,158 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl.h + * This is also the reason why there are no include guards. */ + +#include "rust-parse.h" +#include "rust-parse-error.h" +#include "expected.h" + +namespace Rust { + +/* Parses a TokenTree syntactical production. This is either a delimited token + * tree or a non-delimiter token. */ +template +tl::expected, Parse::Error::TokenTree> +Parser::parse_token_tree () +{ + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case LEFT_PAREN: + case LEFT_SQUARE: + case LEFT_CURLY: + { + // Parse delimited token tree + auto delim_token_tree = parse_delim_token_tree (); + if (!delim_token_tree) + return Parse::Error::TokenTree:: + make_malformed_delimited_token_tree (); + + // TODO: use move rather than copy constructor + return std::unique_ptr ( + new AST::DelimTokenTree (delim_token_tree.value ())); + } + case RIGHT_PAREN: + case RIGHT_SQUARE: + case RIGHT_CURLY: + // error - should not be called when this a token + add_error (Error (t->get_locus (), "unexpected closing delimiter %qs", + t->get_token_description ())); + + add_error (Error (Error::Kind::Hint, t->get_locus (), + "token tree requires either paired delimiters or " + "non-delimiter tokens")); + + lexer.skip_token (); + return Parse::Error::TokenTree::make_malformed (); + default: + // parse token itself as TokenTree + lexer.skip_token (); + return std::unique_ptr (new AST::Token (std::move (t))); + } +} + +// Parses a delimited token tree +template +tl::expected +Parser::parse_delim_token_tree () +{ + const_TokenPtr t = lexer.peek_token (); + lexer.skip_token (); + location_t initial_loc = t->get_locus (); + + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // Map tokens to DelimType + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs - expecting delimiters (for a " + "delimited token tree)", + t->get_token_description ())); + + return Parse::Error::DelimTokenTree::make_expected_delimiter (); + } + + // parse actual token tree vector - 0 or more + std::vector> token_trees_in_tree; + auto delim_open + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees_in_tree.push_back (std::move (delim_open)); + + // repeat loop until finding the matching delimiter + t = lexer.peek_token (); + while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type) + && t->get_id () != END_OF_FILE) + { + auto tok_tree = parse_token_tree (); + if (!tok_tree) + return Parse::Error::DelimTokenTree::make_invalid_token_tree (); + + token_trees_in_tree.push_back (std::move (tok_tree.value ())); + + // lexer.skip_token(); + t = lexer.peek_token (); + } + auto delim_close + = std::unique_ptr (new AST::Token (std::move (t))); + token_trees_in_tree.push_back (std::move (delim_close)); + + AST::DelimTokenTree token_tree (delim_type, std::move (token_trees_in_tree), + initial_loc); + + // parse end delimiters + t = lexer.peek_token (); + + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + return token_tree; + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a delimited token tree)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + return Parse::Error::DelimTokenTree::make_mismatched_delimiters (); + } +} + +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl-utils.hxx b/gcc/rust/parse/rust-parse-impl-utils.hxx new file mode 100644 index 00000000000..0edcf227dbc --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl-utils.hxx @@ -0,0 +1,252 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl.h + * This is also the reason why there are no include guards. */ + +#include "rust-parse.h" + +namespace Rust { + +// "Unexpected token" panic mode - flags gcc error at unexpected token +// TODO: seems to be unused, remove? +template +void +Parser::unexpected_token (const_TokenPtr t) +{ + Error error (t->get_locus (), "unexpected token %qs", + t->get_token_description ()); + add_error (std::move (error)); +} + +/* Crappy "error recovery" performed after error by skipping tokens until a + * semi-colon is found */ +template +void +Parser::skip_after_semicolon () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != END_OF_FILE && t->get_id () != SEMICOLON) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (t->get_id () == SEMICOLON) + lexer.skip_token (); +} + +/* Skips the current token */ +template +void +Parser::skip_token () +{ + lexer.skip_token (); +} + +/* Checks if current token has inputted id - skips it and returns true if so, + * diagnoses an error and returns false otherwise. */ +template +bool +Parser::skip_token (TokenId token_id) +{ + return expect_token (token_id) != const_TokenPtr (); +} + +/* Checks if current token is similar to inputted token - skips it and returns + * true if so, diagnoses an error and returns false otherwise. */ +template +bool +Parser::skip_token (const_TokenPtr token) +{ + return expect_token (token) != const_TokenPtr (); +} + +/* Checks if current token has inputted id - skips it and returns true if so, + * returns false otherwise without diagnosing an error */ +template +bool +Parser::maybe_skip_token (TokenId token_id) +{ + if (lexer.peek_token ()->get_id () != token_id) + return false; + else + return skip_token (token_id); +} + +/* Checks the current token - if id is same as expected, skips and returns it, + * otherwise diagnoses error and returns null. */ +template +const_TokenPtr +Parser::expect_token (TokenId token_id) +{ + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == token_id) + { + lexer.skip_token (); + return t; + } + else + { + Error error (t->get_locus (), "expecting %qs but %qs found", + get_token_description (token_id), + t->get_token_description ()); + add_error (std::move (error)); + + return const_TokenPtr (); + } +} + +/* Checks the current token - if same as expected, skips and returns it, + * otherwise diagnoses error and returns null. */ +template +const_TokenPtr +Parser::expect_token (const_TokenPtr token_expect) +{ + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == token_expect->get_id () + && (!t->should_have_str () || t->get_str () == token_expect->get_str ())) + { + lexer.skip_token (); + return t; + } + else + { + Error error (t->get_locus (), "expecting %qs but %qs found", + token_expect->get_token_description (), + t->get_token_description ()); + add_error (std::move (error)); + + return const_TokenPtr (); + } +} + +// Skips all tokens until EOF or }. Don't use. +template +void +Parser::skip_after_end () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != END_OF_FILE && t->get_id () != RIGHT_CURLY) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (t->get_id () == RIGHT_CURLY) + { + lexer.skip_token (); + } +} + +/* A slightly more aware error-handler that skips all tokens until it reaches + * the end of the block scope (i.e. when left curly brackets = right curly + * brackets). Note: assumes currently in the middle of a block. Use + * skip_after_next_block to skip based on the assumption that the block + * has not been entered yet. */ +template +void +Parser::skip_after_end_block () +{ + const_TokenPtr t = lexer.peek_token (); + int curly_count = 1; + + while (curly_count > 0 && t->get_id () != END_OF_FILE) + { + switch (t->get_id ()) + { + case LEFT_CURLY: + curly_count++; + break; + case RIGHT_CURLY: + curly_count--; + break; + default: + break; + } + lexer.skip_token (); + t = lexer.peek_token (); + } +} + +/* Skips tokens until the end of the next block. i.e. assumes that the block + * has not been entered yet. */ +template +void +Parser::skip_after_next_block () +{ + const_TokenPtr t = lexer.peek_token (); + + // initial loop - skip until EOF if no left curlies encountered + while (t->get_id () != END_OF_FILE && t->get_id () != LEFT_CURLY) + { + lexer.skip_token (); + + t = lexer.peek_token (); + } + + // if next token is left, skip it and then skip after the block ends + if (t->get_id () == LEFT_CURLY) + { + lexer.skip_token (); + + skip_after_end_block (); + } + // otherwise, do nothing as EOF +} + +/* Skips all tokens until ] (the end of an attribute) - does not skip the ] + * (as designed for attribute body use) */ +template +void +Parser::skip_after_end_attribute () +{ + const_TokenPtr t = lexer.peek_token (); + + while (t->get_id () != RIGHT_SQUARE && t->get_id () != END_OF_FILE) + { + lexer.skip_token (); + t = lexer.peek_token (); + } + + // Don't skip the RIGHT_SQUARE token +} + +// Returns true if the next token is END, ELSE, or EOF; +template +bool +Parser::done_end_or_else () +{ + const_TokenPtr t = lexer.peek_token (); + return (t->get_id () == RIGHT_CURLY || t->get_id () == ELSE + || t->get_id () == END_OF_FILE); +} + +// Returns true if the next token is END or EOF. +template +bool +Parser::done_end () +{ + const_TokenPtr t = lexer.peek_token (); + return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE); +} + +} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h deleted file mode 100644 index 56717709d2e..00000000000 --- a/gcc/rust/parse/rust-parse-impl.h +++ /dev/null @@ -1,14618 +0,0 @@ -// Copyright (C) 2020-2025 Free Software Foundation, Inc. - -// This file is part of GCC. - -// GCC is free software; you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3, or (at your option) any later -// version. - -// GCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. - -// You should have received a copy of the GNU General Public License -// along with GCC; see the file COPYING3. If not see -// . - -/* Template implementation for Rust::Parser. Previously in rust-parse.cc (before - * Parser was template). Separated from rust-parse.h for readability. */ - -/* DO NOT INCLUDE ANYWHERE - this is automatically included - * by rust-parse-impl-*.cc - * This is also the reason why there are no include guards. */ - -#include "expected.h" -#include "rust-ast.h" -#include "rust-common.h" -#include "rust-expr.h" -#include "rust-item.h" -#include "rust-common.h" -#include "rust-parse.h" -#include "rust-token.h" -#define INCLUDE_ALGORITHM -#include "rust-diagnostics.h" -#include "rust-dir-owner.h" -#include "rust-attribute-values.h" -#include "rust-keyword-values.h" -#include "rust-edition.h" -#include "rust-parse-error.h" - -#include "optional.h" - -namespace Rust { -// Left binding powers of operations. -enum binding_powers -{ - // Highest priority - LBP_HIGHEST = 100, - - LBP_PATH = 95, - - LBP_METHOD_CALL = 90, - - LBP_FIELD_EXPR = 85, - - LBP_FUNCTION_CALL = 80, - LBP_ARRAY_REF = LBP_FUNCTION_CALL, - - LBP_QUESTION_MARK = 75, // unary postfix - counts as left - - LBP_UNARY_PLUS = 70, // Used only when the null denotation is + - LBP_UNARY_MINUS = LBP_UNARY_PLUS, // Used only when the null denotation is - - LBP_UNARY_ASTERISK = LBP_UNARY_PLUS, // deref operator - unary prefix - LBP_UNARY_EXCLAM = LBP_UNARY_PLUS, - LBP_UNARY_AMP = LBP_UNARY_PLUS, - LBP_UNARY_AMP_MUT = LBP_UNARY_PLUS, - - LBP_AS = 65, - - LBP_MUL = 60, - LBP_DIV = LBP_MUL, - LBP_MOD = LBP_MUL, - - LBP_PLUS = 55, - LBP_MINUS = LBP_PLUS, - - LBP_L_SHIFT = 50, - LBP_R_SHIFT = LBP_L_SHIFT, - - LBP_AMP = 45, - - LBP_CARET = 40, - - LBP_PIPE = 35, - - LBP_EQUAL = 30, - LBP_NOT_EQUAL = LBP_EQUAL, - LBP_SMALLER_THAN = LBP_EQUAL, - LBP_SMALLER_EQUAL = LBP_EQUAL, - LBP_GREATER_THAN = LBP_EQUAL, - LBP_GREATER_EQUAL = LBP_EQUAL, - - LBP_LOGICAL_AND = 25, - - LBP_LOGICAL_OR = 20, - - LBP_DOT_DOT = 15, - LBP_DOT_DOT_EQ = LBP_DOT_DOT, - - // TODO: note all these assig operators are RIGHT associative! - LBP_ASSIG = 10, - LBP_PLUS_ASSIG = LBP_ASSIG, - LBP_MINUS_ASSIG = LBP_ASSIG, - LBP_MULT_ASSIG = LBP_ASSIG, - LBP_DIV_ASSIG = LBP_ASSIG, - LBP_MOD_ASSIG = LBP_ASSIG, - LBP_AMP_ASSIG = LBP_ASSIG, - LBP_PIPE_ASSIG = LBP_ASSIG, - LBP_CARET_ASSIG = LBP_ASSIG, - LBP_L_SHIFT_ASSIG = LBP_ASSIG, - LBP_R_SHIFT_ASSIG = LBP_ASSIG, - - // return, break, and closures as lowest priority? - LBP_RETURN = 5, - LBP_BREAK = LBP_RETURN, - LBP_CLOSURE = LBP_RETURN, // unary prefix operators - -#if 0 - // rust precedences - // used for closures - PREC_CLOSURE = -40, - // used for break, continue, return, and yield - PREC_JUMP = -30, - // used for range (although weird comment in rustc about this) - PREC_RANGE = -10, - // used for binary operators mentioned below - also cast, colon (type), - // assign, assign_op - PREC_BINOP = FROM_ASSOC_OP, - // used for box, address_of, let, unary (again, weird comment on let) - PREC_PREFIX = 50, - // used for await, call, method call, field, index, try, - // inline asm, macro invocation - PREC_POSTFIX = 60, - // used for array, repeat, tuple, literal, path, paren, if, - // while, for, 'loop', match, block, try block, async, struct - PREC_PAREN = 99, - PREC_FORCE_PAREN = 100, -#endif - - // lowest priority - LBP_LOWEST = 0 -}; - -/* Returns whether the token can start a type (i.e. there is a valid type - * beginning with the token). */ -inline bool -can_tok_start_type (TokenId id) -{ - switch (id) - { - case EXCLAM: - case LEFT_SQUARE: - case LEFT_ANGLE: - case UNDERSCORE: - case ASTERISK: - case AMP: - case LIFETIME: - case IDENTIFIER: - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case DOLLAR_SIGN: - case SCOPE_RESOLUTION: - case LEFT_PAREN: - case FOR: - case ASYNC: - case CONST: - case UNSAFE: - case EXTERN_KW: - case FN_KW: - case IMPL: - case DYN: - case QUESTION_MARK: - return true; - default: - return false; - } -} - -/* Returns whether the token id is (or is likely to be) a right angle bracket. - * i.e. '>', '>>', '>=' and '>>=' tokens. */ -inline bool -is_right_angle_tok (TokenId id) -{ - switch (id) - { - case RIGHT_ANGLE: - case RIGHT_SHIFT: - case GREATER_OR_EQUAL: - case RIGHT_SHIFT_EQ: - return true; - default: - return false; - } -} - -/* HACK-y special handling for skipping a right angle token at the end of - * generic arguments. - * Currently, this replaces the "current token" with one that is identical - * except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad - * for several reasons - it modifies the token stream to something that - * actually doesn't make syntactic sense, it may not worked if the token - * has already been skipped, etc. It was done because it would not - * actually require inserting new items into the token stream (which I - * thought would take more work to not mess up) and because I wasn't sure - * if the "already seen right angle" flag in the parser would work - * correctly. - * Those two other approaches listed are in my opinion actually better - * long-term - insertion is probably best as it reflects syntactically - * what occurs. On the other hand, I need to do a code audit to make sure - * that insertion doesn't mess anything up. So that's a FIXME. */ -template -bool -Parser::skip_generics_right_angle () -{ - /* OK, new great idea. Have a lexer method called - * "split_current_token(TokenType newLeft, TokenType newRight)", which is - * called here with whatever arguments are appropriate. That lexer method - * handles "replacing" the current token with the "newLeft" and "inserting" - * the next token with the "newRight" (and creating a location, etc. for it) - */ - - /* HACK: special handling for right shift '>>', greater or equal '>=', and - * right shift assig */ - // '>>=' - const_TokenPtr tok = lexer.peek_token (); - switch (tok->get_id ()) - { - case RIGHT_ANGLE: - // this is good - skip token - lexer.skip_token (); - return true; - case RIGHT_SHIFT: - { - // new implementation that should be better - lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE); - lexer.skip_token (); - return true; - } - case GREATER_OR_EQUAL: - { - // new implementation that should be better - lexer.split_current_token (RIGHT_ANGLE, EQUAL); - lexer.skip_token (); - return true; - } - case RIGHT_SHIFT_EQ: - { - // new implementation that should be better - lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL); - lexer.skip_token (); - return true; - } - default: - add_error (Error (tok->get_locus (), - "expected %<>%> at end of generic argument - found %qs", - tok->get_token_description ())); - return false; - } -} - -/* Gets left binding power for specified token. - * Not suitable for use at the moment or possibly ever because binding power - * cannot be purely determined from operator token with Rust grammar - e.g. - * method call and field access have - * different left binding powers but the same operator token. */ -template -int -Parser::left_binding_power (const_TokenPtr token) -{ - // HACK: called with "peek_token()", so lookahead is "peek_token(1)" - switch (token->get_id ()) - { - /* TODO: issue here - distinguish between method calls and field access - * somehow? Also would have to distinguish between paths and function - * calls (:: operator), maybe more stuff. */ - /* Current plan for tackling LBP - don't do it based on token, use - * lookahead. Or alternatively, only use Pratt parsing for OperatorExpr - * and handle other expressions without it. rustc only considers - * arithmetic, logical/relational, 'as', - * '?=', ranges, colons, and assignment to have operator precedence and - * associativity rules applicable. It then has - * a separate "ExprPrecedence" that also includes binary operators. */ - - // TODO: handle operator overloading - have a function replace the - // operator? - - /*case DOT: - return LBP_DOT;*/ - - case SCOPE_RESOLUTION: - rust_debug ( - "possible error - looked up LBP of scope resolution operator. should " - "be handled elsewhere."); - return LBP_PATH; - - /* Resolved by lookahead HACK that should work with current code. If next - * token is identifier and token after that isn't parenthesised expression - * list, it is a field reference. */ - case DOT: - if (lexer.peek_token (1)->get_id () == IDENTIFIER - && lexer.peek_token (2)->get_id () != LEFT_PAREN) - { - return LBP_FIELD_EXPR; - } - return LBP_METHOD_CALL; - - case LEFT_PAREN: - return LBP_FUNCTION_CALL; - - case LEFT_SQUARE: - return LBP_ARRAY_REF; - - // postfix question mark (i.e. error propagation expression) - case QUESTION_MARK: - return LBP_QUESTION_MARK; - - case AS: - return LBP_AS; - - case ASTERISK: - return LBP_MUL; - case DIV: - return LBP_DIV; - case PERCENT: - return LBP_MOD; - - case PLUS: - return LBP_PLUS; - case MINUS: - return LBP_MINUS; - - case LEFT_SHIFT: - return LBP_L_SHIFT; - case RIGHT_SHIFT: - return LBP_R_SHIFT; - - // binary & operator - case AMP: - return LBP_AMP; - - // binary ^ operator - case CARET: - return LBP_CARET; - - // binary | operator - case PIPE: - return LBP_PIPE; - - case EQUAL_EQUAL: - return LBP_EQUAL; - case NOT_EQUAL: - return LBP_NOT_EQUAL; - case RIGHT_ANGLE: - return LBP_GREATER_THAN; - case GREATER_OR_EQUAL: - return LBP_GREATER_EQUAL; - case LEFT_ANGLE: - return LBP_SMALLER_THAN; - case LESS_OR_EQUAL: - return LBP_SMALLER_EQUAL; - - case LOGICAL_AND: - return LBP_LOGICAL_AND; - - case OR: - return LBP_LOGICAL_OR; - - case DOT_DOT: - return LBP_DOT_DOT; - - case DOT_DOT_EQ: - return LBP_DOT_DOT_EQ; - - case EQUAL: - return LBP_ASSIG; - case PLUS_EQ: - return LBP_PLUS_ASSIG; - case MINUS_EQ: - return LBP_MINUS_ASSIG; - case ASTERISK_EQ: - return LBP_MULT_ASSIG; - case DIV_EQ: - return LBP_DIV_ASSIG; - case PERCENT_EQ: - return LBP_MOD_ASSIG; - case AMP_EQ: - return LBP_AMP_ASSIG; - case PIPE_EQ: - return LBP_PIPE_ASSIG; - case CARET_EQ: - return LBP_CARET_ASSIG; - case LEFT_SHIFT_EQ: - return LBP_L_SHIFT_ASSIG; - case RIGHT_SHIFT_EQ: - return LBP_R_SHIFT_ASSIG; - - /* HACK: float literal due to lexer misidentifying a dot then an integer as - * a float */ - case FLOAT_LITERAL: - return LBP_FIELD_EXPR; - // field expr is same as tuple expr in precedence, i imagine - // TODO: is this needed anymore? lexer shouldn't do that anymore - - // anything that can't appear in an infix position is given lowest priority - default: - return LBP_LOWEST; - } -} - -// Returns true when current token is EOF. -template -bool -Parser::done_end_of_file () -{ - return lexer.peek_token ()->get_id () == END_OF_FILE; -} - -// Parses a sequence of items within a module or the implicit top-level module -// in a crate -template -tl::expected>, Parse::Error::Items> -Parser::parse_items () -{ - std::vector> items; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != END_OF_FILE) - { - auto item = parse_item (false); - if (!item) - return Parse::Error::Items::make_malformed (std::move (items)); - - items.push_back (std::move (item.value ())); - - t = lexer.peek_token (); - } - - // GCC 5->7 bug doesn't threat lvalue as an rvalue for the overload -#if __GNUC__ <= 7 - return std::move (items); -#else - return items; -#endif -} - -// Parses a crate (compilation unit) - entry point -template -std::unique_ptr -Parser::parse_crate () -{ - // parse inner attributes - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse items - auto items - = parse_items ().value_or (std::vector>{}); - - // emit all errors - for (const auto &error : error_table) - error.emit (); - - return std::unique_ptr ( - new AST::Crate (std::move (items), std::move (inner_attrs))); -} - -// Parse a contiguous block of inner attributes. -template -AST::AttrVec -Parser::parse_inner_attributes () -{ - AST::AttrVec inner_attributes; - - auto has_valid_inner_attribute_prefix = [&] () { - auto id = lexer.peek_token ()->get_id (); - /* Outer attribute `#[` is not allowed, only accepts `#!` */ - return (id == HASH && lexer.peek_token (1)->get_id () == EXCLAM) - || id == INNER_DOC_COMMENT; - }; - - while (has_valid_inner_attribute_prefix ()) - { - auto inner_attr = parse_inner_attribute (); - - /* Ensure only valid inner attributes are added to the inner_attributes - * list */ - if (inner_attr) - { - inner_attributes.push_back (std::move (inner_attr.value ())); - } - else - { - /* If no more valid inner attributes, break out of loop (only - * contiguous inner attributes parsed). */ - break; - } - } - - inner_attributes.shrink_to_fit (); - return inner_attributes; -} - -// Parse a inner or outer doc comment into an doc attribute -template -std::tuple, location_t> -Parser::parse_doc_comment () -{ - const_TokenPtr token = lexer.peek_token (); - location_t locus = token->get_locus (); - AST::SimplePathSegment segment (Values::Attributes::DOC, locus); - std::vector segments; - segments.push_back (std::move (segment)); - AST::SimplePath attr_path (std::move (segments), false, locus); - AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING, - PrimitiveCoreType::CORETYPE_STR, {}, locus); - std::unique_ptr attr_input ( - new AST::AttrInputLiteral (std::move (lit_expr))); - lexer.skip_token (); - return std::make_tuple (std::move (attr_path), std::move (attr_input), locus); -} - -// Parse a single inner attribute. -template -tl::expected -Parser::parse_inner_attribute () -{ - if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) - { - auto values = parse_doc_comment (); - auto path = std::move (std::get<0> (values)); - auto input = std::move (std::get<1> (values)); - auto loc = std::get<2> (values); - return AST::Attribute (std::move (path), std::move (input), loc, true); - } - - rust_assert (lexer.peek_token ()->get_id () == HASH); - - lexer.skip_token (); - - if (lexer.peek_token ()->get_id () != EXCLAM) - { - Error error (lexer.peek_token ()->get_locus (), - "expected % or %<[%> for inner attribute"); - add_error (std::move (error)); - - return Parse::Error::Attribute::make_malformed (); - } - lexer.skip_token (); - - if (!skip_token (LEFT_SQUARE)) - return Parse::Error::Attribute::make_malformed (); - - auto body_res = parse_attribute_body (); - if (!body_res) - return Parse::Error::Attribute::make_malformed (); - auto body = std::move (body_res.value ()); - - auto actual_attribute - = AST::Attribute (std::move (body.path), std::move (body.input), body.locus, - true); - - if (!skip_token (RIGHT_SQUARE)) - return Parse::Error::Attribute::make_malformed (); - - return actual_attribute; -} - -// Parses the body of an attribute (inner or outer). -template -tl::expected -Parser::parse_attribute_body () -{ - location_t locus = lexer.peek_token ()->get_locus (); - - auto attr_path = parse_simple_path (); - // ensure path is valid to parse attribute input - if (!attr_path) - { - Error error (lexer.peek_token ()->get_locus (), - "empty simple path in attribute"); - add_error (std::move (error)); - - // Skip past potential further info in attribute (i.e. attr_input) - skip_after_end_attribute (); - return Parse::Error::AttributeBody::make_invalid_path (); - } - - auto attr_input = parse_attr_input (); - // AttrInput is allowed to be null, so no checks here - if (attr_input) - return Parse::AttributeBody{std::move (attr_path.value ()), - std::move (attr_input.value ()), locus}; - else if (attr_input.error ().kind == Parse::Error::AttrInput::Kind::MISSING) - return Parse::AttributeBody{std::move (attr_path.value ()), nullptr, locus}; - else - return Parse::Error::AttributeBody::make_invalid_attrinput (); -} - -// Parses a SimplePath AST node, if it exists. Does nothing otherwise. -template -tl::expected -Parser::parse_simple_path () -{ - bool has_opening_scope_resolution = false; - location_t locus = UNKNOWN_LOCATION; - - using Parse::Utils::is_simple_path_segment; - - // don't parse anything if not a path upfront - if (!is_simple_path_segment (lexer.peek_token ()->get_id ()) - && !is_simple_path_segment (lexer.peek_token (1)->get_id ())) - return Parse::Error::SimplePath::make_malformed (); - - /* Checks for opening scope resolution (i.e. global scope fully-qualified - * path) */ - if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) - { - has_opening_scope_resolution = true; - - locus = lexer.peek_token ()->get_locus (); - - lexer.skip_token (); - } - - // Parse single required simple path segment - auto segment = parse_simple_path_segment (); - - if (!segment) - return Parse::Error::SimplePath::make_malformed (); - - // get location if not gotten already - if (locus == UNKNOWN_LOCATION) - locus = segment->get_locus (); - - std::vector segments; - segments.push_back (std::move (segment.value ())); - - // Parse all other simple path segments - while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) - { - auto new_segment = parse_simple_path_segment (1); - - using Error = Parse::Error::SimplePathSegment::Kind; - // Return path as currently constructed if segment in error state. - if (!new_segment) - { - if (new_segment.error ().kind == Error::INVALID_SIMPLE_PATH_TOKEN) - break; /* Could be end of path */ - else /* Any other error is an hard error */ - return Parse::Error::SimplePath::make_malformed (); - } - - segments.push_back (std::move (new_segment.value ())); - } - - return AST::SimplePath (std::move (segments), has_opening_scope_resolution, - locus); - /* TODO: now that is_simple_path_segment exists, could probably start - * actually making errors upon parse failure of segments and whatever */ -} - -/* Parses a single SimplePathSegment (does not handle the scope resolution - * operators) - * Starts parsing at an offset of base_peek */ -template -tl::expected -Parser::parse_simple_path_segment (int base_peek) -{ - using namespace Values; - const_TokenPtr t = lexer.peek_token (base_peek); - switch (t->get_id ()) - { - case IDENTIFIER: - lexer.skip_token (base_peek); - - return AST::SimplePathSegment (t->get_str (), t->get_locus ()); - case SUPER: - lexer.skip_token (base_peek); - - return AST::SimplePathSegment (Keywords::SUPER, t->get_locus ()); - case SELF: - lexer.skip_token (base_peek); - - return AST::SimplePathSegment (Keywords::SELF, t->get_locus ()); - case CRATE: - lexer.skip_token (base_peek); - - return AST::SimplePathSegment (Keywords::CRATE, t->get_locus ()); - case DOLLAR_SIGN: - if (lexer.peek_token (base_peek + 1)->get_id () == CRATE) - { - lexer.skip_token (base_peek + 1); - - return AST::SimplePathSegment ("$crate", t->get_locus ()); - } - gcc_fallthrough (); - default: - // do nothing but inactivates warning from gcc when compiling - /* could put the rust_error_at thing here but fallthrough (from failing - * $crate condition) isn't completely obvious if it is. */ - - return Parse::Error::SimplePathSegment::make_invalid_token_or_path_end (); - } - rust_unreachable (); -} - -// Parses a PathIdentSegment - an identifier segment of a non-SimplePath path. -template -tl::expected -Parser::parse_path_ident_segment () -{ - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case IDENTIFIER: - lexer.skip_token (); - - return AST::PathIdentSegment (t->get_str (), t->get_locus ()); - case SUPER: - lexer.skip_token (); - - return AST::PathIdentSegment (Values::Keywords::SUPER, t->get_locus ()); - case SELF: - lexer.skip_token (); - - return AST::PathIdentSegment (Values::Keywords::SELF, t->get_locus ()); - case SELF_ALIAS: - lexer.skip_token (); - - return AST::PathIdentSegment (Values::Keywords::SELF_ALIAS, - t->get_locus ()); - case CRATE: - lexer.skip_token (); - - return AST::PathIdentSegment (Values::Keywords::CRATE, t->get_locus ()); - case DOLLAR_SIGN: - if (lexer.peek_token (1)->get_id () == CRATE) - { - lexer.skip_token (1); - - return AST::PathIdentSegment ("$crate", t->get_locus ()); - } - gcc_fallthrough (); - default: - /* do nothing but inactivates warning from gcc when compiling - * could put the error_at thing here but fallthrough (from failing $crate - * condition) isn't completely obvious if it is. */ - - // test prevent error - return Parse::Error::PathIdentSegment::make_invalid_token (); - } - rust_unreachable (); -} - -// Parses an AttrInput AST node (polymorphic, as AttrInput is abstract) -template -tl::expected, Parse::Error::AttrInput> -Parser::parse_attr_input () -{ - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_PAREN: - case LEFT_SQUARE: - case LEFT_CURLY: - { - auto dtoken_tree = parse_delim_token_tree (); - if (!dtoken_tree) - return Parse::Error::AttrInput::make_bad_token_tree (); - - // must be a delimited token tree, so parse that - std::unique_ptr input_tree ( - new AST::DelimTokenTree (dtoken_tree.value ())); - - return tl::expected, - Parse::Error::AttrInput>{std::move (input_tree)}; - } - case EQUAL: - { - // = LiteralExpr - lexer.skip_token (); - - t = lexer.peek_token (); - - // attempt to parse macro - // TODO: macros may/may not be allowed in attributes - // this is needed for "#[doc = include_str!(...)]" - if (Parse::Utils::is_simple_path_segment (t->get_id ())) - { - std::unique_ptr invoke - = parse_macro_invocation ({}); - - if (!invoke) - return Parse::Error::AttrInput::make_bad_macro_invocation (); - - return std::unique_ptr ( - new AST::AttrInputMacro (std::move (invoke))); - } - - /* Ensure token is a "literal expression" (literally only a literal - * token of any type) */ - if (!t->is_literal ()) - { - Error error ( - t->get_locus (), - "unknown token %qs in attribute body - literal expected", - t->get_token_description ()); - add_error (std::move (error)); - - skip_after_end_attribute (); - return Parse::Error::AttrInput::make_malformed (); - } - - AST::Literal::LitType lit_type = AST::Literal::STRING; - // Crappy mapping of token type to literal type - switch (t->get_id ()) - { - case INT_LITERAL: - lit_type = AST::Literal::INT; - break; - case FLOAT_LITERAL: - lit_type = AST::Literal::FLOAT; - break; - case CHAR_LITERAL: - lit_type = AST::Literal::CHAR; - break; - case BYTE_CHAR_LITERAL: - lit_type = AST::Literal::BYTE; - break; - case BYTE_STRING_LITERAL: - lit_type = AST::Literal::BYTE_STRING; - break; - case RAW_STRING_LITERAL: - lit_type = AST::Literal::RAW_STRING; - break; - case STRING_LITERAL: - default: - lit_type = AST::Literal::STRING; - break; // TODO: raw string? don't eliminate it from lexer? - } - - // create actual LiteralExpr - AST::LiteralExpr lit_expr (t->get_str (), lit_type, t->get_type_hint (), - {}, t->get_locus ()); - lexer.skip_token (); - - std::unique_ptr attr_input_lit ( - new AST::AttrInputLiteral (std::move (lit_expr))); - - // do checks or whatever? none required, really - - // FIXME: shouldn't a skip token be required here? - - return tl::expected, - Parse::Error::AttrInput>{ - std::move (attr_input_lit)}; - } - break; - case RIGHT_PAREN: - case RIGHT_SQUARE: - case RIGHT_CURLY: - case END_OF_FILE: - // means AttrInput is missing, which is allowed - return Parse::Error::AttrInput::make_missing_attrinput (); - default: - add_error ( - Error (t->get_locus (), - "unknown token %qs in attribute body - attribute input or " - "none expected", - t->get_token_description ())); - - skip_after_end_attribute (); - return Parse::Error::AttrInput::make_malformed (); - } - rust_unreachable (); - // TODO: find out how to stop gcc error on "no return value" -} - -// Parses a delimited token tree -template -tl::expected -Parser::parse_delim_token_tree () -{ - const_TokenPtr t = lexer.peek_token (); - lexer.skip_token (); - location_t initial_loc = t->get_locus (); - - // save delim type to ensure it is reused later - AST::DelimType delim_type = AST::PARENS; - - // Map tokens to DelimType - switch (t->get_id ()) - { - case LEFT_PAREN: - delim_type = AST::PARENS; - break; - case LEFT_SQUARE: - delim_type = AST::SQUARE; - break; - case LEFT_CURLY: - delim_type = AST::CURLY; - break; - default: - add_error (Error (t->get_locus (), - "unexpected token %qs - expecting delimiters (for a " - "delimited token tree)", - t->get_token_description ())); - - return Parse::Error::DelimTokenTree::make_expected_delimiter (); - } - - // parse actual token tree vector - 0 or more - std::vector> token_trees_in_tree; - auto delim_open - = std::unique_ptr (new AST::Token (std::move (t))); - token_trees_in_tree.push_back (std::move (delim_open)); - - // repeat loop until finding the matching delimiter - t = lexer.peek_token (); - while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type) - && t->get_id () != END_OF_FILE) - { - auto tok_tree = parse_token_tree (); - if (!tok_tree) - return Parse::Error::DelimTokenTree::make_invalid_token_tree (); - - token_trees_in_tree.push_back (std::move (tok_tree.value ())); - - // lexer.skip_token(); - t = lexer.peek_token (); - } - auto delim_close - = std::unique_ptr (new AST::Token (std::move (t))); - token_trees_in_tree.push_back (std::move (delim_close)); - - AST::DelimTokenTree token_tree (delim_type, std::move (token_trees_in_tree), - initial_loc); - - // parse end delimiters - t = lexer.peek_token (); - - if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) - { - // tokens match opening delimiter, so skip. - lexer.skip_token (); - return token_tree; - } - else - { - // tokens don't match opening delimiters, so produce error - Error error (t->get_locus (), - "unexpected token %qs - expecting closing delimiter %qs " - "(for a delimited token tree)", - t->get_token_description (), - (delim_type == AST::PARENS - ? ")" - : (delim_type == AST::SQUARE ? "]" : "}"))); - add_error (std::move (error)); - - return Parse::Error::DelimTokenTree::make_mismatched_delimiters (); - } -} - -// Parses an identifier/keyword as a Token -template -tl::expected, Parse::Error::Token> -Parser::parse_identifier_or_keyword_token () -{ - const_TokenPtr t = lexer.peek_token (); - - if (t->get_id () == IDENTIFIER || token_id_is_keyword (t->get_id ())) - { - lexer.skip_token (); - return std::unique_ptr (new AST::Token (std::move (t))); - } - else - { - add_error (Error (t->get_locus (), "expected keyword or identifier")); - return Parse::Error::Token::make_malformed (); - } -} - -/* Parses a TokenTree syntactical production. This is either a delimited token - * tree or a non-delimiter token. */ -template -tl::expected, Parse::Error::TokenTree> -Parser::parse_token_tree () -{ - const_TokenPtr t = lexer.peek_token (); - - switch (t->get_id ()) - { - case LEFT_PAREN: - case LEFT_SQUARE: - case LEFT_CURLY: - { - // Parse delimited token tree - auto delim_token_tree = parse_delim_token_tree (); - if (!delim_token_tree) - return Parse::Error::TokenTree:: - make_malformed_delimited_token_tree (); - - // TODO: use move rather than copy constructor - return std::unique_ptr ( - new AST::DelimTokenTree (delim_token_tree.value ())); - } - case RIGHT_PAREN: - case RIGHT_SQUARE: - case RIGHT_CURLY: - // error - should not be called when this a token - add_error (Error (t->get_locus (), "unexpected closing delimiter %qs", - t->get_token_description ())); - - add_error (Error (Error::Kind::Hint, t->get_locus (), - "token tree requires either paired delimiters or " - "non-delimiter tokens")); - - lexer.skip_token (); - return Parse::Error::TokenTree::make_malformed (); - default: - // parse token itself as TokenTree - lexer.skip_token (); - return std::unique_ptr (new AST::Token (std::move (t))); - } -} - -template -bool -Parser::is_macro_rules_def (const_TokenPtr t) -{ - auto macro_name = lexer.peek_token (2)->get_id (); - - bool allowed_macro_name = (macro_name == IDENTIFIER || macro_name == TRY); - - return t->get_str () == Values::WeakKeywords::MACRO_RULES - && lexer.peek_token (1)->get_id () == EXCLAM && allowed_macro_name; -} - -// Parses a single item -template -tl::expected, Parse::Error::Item> -Parser::parse_item (bool called_from_statement) -{ - // has a "called_from_statement" parameter for better error message handling - - // TODO: GCC 5 does not handle implicit return type correctly so we're forced - // to specify it almost every time until the baseline GCC gets bumped. - // Since this type is quite long and the code is dense we use an alias. - // - // When support for GCC 5 stops: remove this alias as well as the explicit - // ctor calls. - using RType = tl::expected, Parse::Error::Item>; - - // parse outer attributes for item - AST::AttrVec outer_attrs = parse_outer_attributes (); - const_TokenPtr t = lexer.peek_token (); - - switch (t->get_id ()) - { - case END_OF_FILE: - // not necessarily an error, unless we just read outer - // attributes which needs to be attached - if (!outer_attrs.empty ()) - { - Rust::AST::Attribute attr = outer_attrs.back (); - Error error (attr.get_locus (), - "expected item after outer attribute or doc comment"); - add_error (std::move (error)); - } - return Parse::Error::Item::make_end_of_file (); - - case ASYNC: - case PUB: - case MOD: - case EXTERN_KW: - case USE: - case FN_KW: - case TYPE: - case STRUCT_KW: - case ENUM_KW: - case CONST: - case STATIC_KW: - case AUTO: - case TRAIT: - case IMPL: - case MACRO: - /* TODO: implement union keyword but not really because of - * context-dependence crappy hack way to parse a union written below to - * separate it from the good code. */ - // case UNION: - case UNSAFE: // maybe - unsafe traits are a thing - // if any of these (should be all possible VisItem prefixes), parse a - // VisItem - { - auto vis_item = parse_vis_item (std::move (outer_attrs)); - if (!vis_item) - return Parse::Error::Item::make_malformed (); - return RType{std::move (vis_item)}; - } - case SUPER: - case SELF: - case CRATE: - case DOLLAR_SIGN: - // almost certainly macro invocation semi - { - auto macro_invoc_semi - = parse_macro_invocation_semi (std::move (outer_attrs)); - if (!macro_invoc_semi) - return Parse::Error::Item::make_malformed (); - return RType{std::move (macro_invoc_semi)}; - } - // crappy hack to do union "keyword" - case IDENTIFIER: - // TODO: ensure std::string and literal comparison works - if (t->get_str () == Values::WeakKeywords::UNION - && lexer.peek_token (1)->get_id () == IDENTIFIER) - { - auto vis_item = parse_vis_item (std::move (outer_attrs)); - if (!vis_item) - return Parse::Error::Item::make_malformed (); - return RType{std::move (vis_item)}; - // or should this go straight to parsing union? - } - else if (t->get_str () == Values::WeakKeywords::DEFAULT - && lexer.peek_token (1)->get_id () != EXCLAM) - { - add_error (Error (t->get_locus (), - "%qs is only allowed on items within %qs blocks", - "default", "impl")); - return Parse::Error::Item::make_malformed (); - } - else if (is_macro_rules_def (t)) - { - // macro_rules! macro item - auto macro_rule_def = parse_macro_rules_def (std::move (outer_attrs)); - if (!macro_rule_def) - return Parse::Error::Item::make_malformed (); - return RType{std::move (macro_rule_def)}; - } - else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION - || lexer.peek_token (1)->get_id () == EXCLAM) - { - /* path (probably) or macro invocation, so probably a macro invocation - * semi */ - auto macro_invocation_semi - = parse_macro_invocation_semi (std::move (outer_attrs)); - if (!macro_invocation_semi) - return Parse::Error::Item::make_malformed (); - return RType{std::move (macro_invocation_semi)}; - } - gcc_fallthrough (); - default: - // otherwise unrecognised - add_error (Error (t->get_locus (), - "unrecognised token %qs for start of %s", - t->get_token_description (), - called_from_statement ? "statement" : "item")); - - // skip somewhere? - return Parse::Error::Item::make_malformed (); - break; - } -} - -// Parses a contiguous block of outer attributes. -template -AST::AttrVec -Parser::parse_outer_attributes () -{ - AST::AttrVec outer_attributes; - - auto has_valid_attribute_prefix = [&] () { - auto id = lexer.peek_token ()->get_id (); - /* We allow inner attributes `#!` and catch the error later */ - return id == HASH || id == OUTER_DOC_COMMENT || id == INNER_DOC_COMMENT; - }; - - while (has_valid_attribute_prefix ()) /* For error handling. */ - { - auto outer_attr = parse_outer_attribute (); - - /* Ensure only valid outer attributes are added to the outer_attributes - * list */ - if (outer_attr) - { - outer_attributes.push_back (std::move (outer_attr.value ())); - } - else - { - /* If no more valid outer attributes, break out of loop (only - * contiguous outer attributes parsed). */ - break; - } - } - - outer_attributes.shrink_to_fit (); - return outer_attributes; - - /* TODO: this shares basically all code with parse_inner_attributes except - * function call - find way of making it more modular? function pointer? */ -} - -// Parse a single outer attribute. -template -tl::expected -Parser::parse_outer_attribute () -{ - if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT) - { - auto values = parse_doc_comment (); - auto path = std::move (std::get<0> (values)); - auto input = std::move (std::get<1> (values)); - auto loc = std::get<2> (values); - return AST::Attribute (std::move (path), std::move (input), loc, false); - } - - if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) - { - Error error ( - lexer.peek_token ()->get_locus (), ErrorCode::E0753, - "expected outer doc comment, inner doc (% or %) only " - "allowed at start of item " - "and before any outer attribute or doc (%<#[%>, % or %)"); - add_error (std::move (error)); - lexer.skip_token (); - return Parse::Error::Attribute::make_unexpected_inner (); - } - - /* OuterAttribute -> '#' '[' Attr ']' */ - - if (lexer.peek_token ()->get_id () != HASH) - return Parse::Error::Attribute::make_malformed (); - - lexer.skip_token (); - - TokenId id = lexer.peek_token ()->get_id (); - if (id != LEFT_SQUARE) - { - if (id == EXCLAM) - { - // this is inner attribute syntax, so throw error - // inner attributes were either already parsed or not allowed here. - Error error ( - lexer.peek_token ()->get_locus (), - "token % found, indicating inner attribute definition. Inner " - "attributes are not possible at this location"); - add_error (std::move (error)); - } - return Parse::Error::Attribute::make_unexpected_inner (); - } - - lexer.skip_token (); - - auto body_res = parse_attribute_body (); - if (!body_res) - return Parse::Error::Attribute::make_malformed_body (); - auto body = std::move (body_res.value ()); - - auto actual_attribute - = AST::Attribute (std::move (body.path), std::move (body.input), body.locus, - false); - - if (lexer.peek_token ()->get_id () != RIGHT_SQUARE) - return Parse::Error::Attribute::make_malformed (); - - lexer.skip_token (); - - return actual_attribute; -} - -// Parses a VisItem (item that can have non-default visibility). -template -std::unique_ptr -Parser::parse_vis_item (AST::AttrVec outer_attrs) -{ - // parse visibility, which may or may not exist - auto vis_res = parse_visibility (); - if (!vis_res) - return nullptr; - auto vis = vis_res.value (); - - // select VisItem to create depending on keyword - const_TokenPtr t = lexer.peek_token (); - - switch (t->get_id ()) - { - case MOD: - return parse_module (std::move (vis), std::move (outer_attrs)); - case EXTERN_KW: - // lookahead to resolve syntactical production - t = lexer.peek_token (1); - - switch (t->get_id ()) - { - case CRATE: - return parse_extern_crate (std::move (vis), std::move (outer_attrs)); - case FN_KW: // extern function - return parse_function (std::move (vis), std::move (outer_attrs)); - case LEFT_CURLY: // extern block - return parse_extern_block (std::move (vis), std::move (outer_attrs)); - case STRING_LITERAL: // for specifying extern ABI - // could be extern block or extern function, so more lookahead - t = lexer.peek_token (2); - - switch (t->get_id ()) - { - case FN_KW: - return parse_function (std::move (vis), std::move (outer_attrs)); - case LEFT_CURLY: - return parse_extern_block (std::move (vis), - std::move (outer_attrs)); - default: - add_error ( - Error (t->get_locus (), - "unexpected token %qs in some sort of extern production", - t->get_token_description ())); - - lexer.skip_token (2); // TODO: is this right thing to do? - return nullptr; - } - default: - add_error ( - Error (t->get_locus (), - "unexpected token %qs in some sort of extern production", - t->get_token_description ())); - - lexer.skip_token (1); // TODO: is this right thing to do? - return nullptr; - } - case USE: - return parse_use_decl (std::move (vis), std::move (outer_attrs)); - case FN_KW: - return parse_function (std::move (vis), std::move (outer_attrs)); - case TYPE: - return parse_type_alias (std::move (vis), std::move (outer_attrs)); - case STRUCT_KW: - return parse_struct (std::move (vis), std::move (outer_attrs)); - case ENUM_KW: - return parse_enum (std::move (vis), std::move (outer_attrs)); - // TODO: implement union keyword but not really because of - // context-dependence case UNION: crappy hack to do union "keyword" - case IDENTIFIER: - if (t->get_str () == Values::WeakKeywords::UNION - && lexer.peek_token (1)->get_id () == IDENTIFIER) - { - return parse_union (std::move (vis), std::move (outer_attrs)); - // or should item switch go straight to parsing union? - } - else - { - break; - } - case CONST: - // lookahead to resolve syntactical production - t = lexer.peek_token (1); - - switch (t->get_id ()) - { - case IDENTIFIER: - case UNDERSCORE: - return parse_const_item (std::move (vis), std::move (outer_attrs)); - case ASYNC: - return parse_async_item (std::move (vis), std::move (outer_attrs)); - case UNSAFE: - case EXTERN_KW: - case FN_KW: - return parse_function (std::move (vis), std::move (outer_attrs)); - default: - add_error ( - Error (t->get_locus (), - "unexpected token %qs in some sort of const production", - t->get_token_description ())); - - lexer.skip_token (1); // TODO: is this right thing to do? - return nullptr; - } - // for async functions - case ASYNC: - return parse_async_item (std::move (vis), std::move (outer_attrs)); - - case STATIC_KW: - return parse_static_item (std::move (vis), std::move (outer_attrs)); - case AUTO: - case TRAIT: - return parse_trait (std::move (vis), std::move (outer_attrs)); - case IMPL: - return parse_impl (std::move (vis), std::move (outer_attrs)); - case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls), - // lookahead to resolve syntactical production - t = lexer.peek_token (1); - - switch (t->get_id ()) - { - case AUTO: - case TRAIT: - return parse_trait (std::move (vis), std::move (outer_attrs)); - case EXTERN_KW: - case FN_KW: - return parse_function (std::move (vis), std::move (outer_attrs)); - case IMPL: - return parse_impl (std::move (vis), std::move (outer_attrs)); - case MOD: - return parse_module (std::move (vis), std::move (outer_attrs)); - default: - add_error ( - Error (t->get_locus (), - "unexpected token %qs in some sort of unsafe production", - t->get_token_description ())); - - lexer.skip_token (1); // TODO: is this right thing to do? - return nullptr; - } - case MACRO: - return parse_decl_macro_def (std::move (vis), std::move (outer_attrs)); - default: - // otherwise vis item clearly doesn't exist, which is not an error - // has a catch-all post-switch return to allow other breaks to occur - break; - } - return nullptr; -} - -template -std::unique_ptr -Parser::parse_async_item (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - auto offset = (lexer.peek_token ()->get_id () == CONST) ? 1 : 0; - const_TokenPtr t = lexer.peek_token (offset); - - if (get_rust_edition () == Edition::E2015) - { - add_error (Error (t->get_locus (), ErrorCode::E0670, - "% is not permitted in Rust 2015")); - add_error ( - Error::Hint (t->get_locus (), - "to use %, switch to Rust 2018 or later")); - } - - t = lexer.peek_token (offset + 1); - - switch (t->get_id ()) - { - case UNSAFE: - case FN_KW: - return parse_function (std::move (vis), std::move (outer_attrs)); - - default: - add_error ( - Error (t->get_locus (), "expected item, found keyword %")); - - lexer.skip_token (1); - return nullptr; - } -} - -// Parses a macro rules definition syntax extension whatever thing. -template -std::unique_ptr -Parser::parse_macro_rules_def (AST::AttrVec outer_attrs) -{ - // ensure that first token is identifier saying "macro_rules" - const_TokenPtr t = lexer.peek_token (); - if (t->get_id () != IDENTIFIER - || t->get_str () != Values::WeakKeywords::MACRO_RULES) - { - Error error ( - t->get_locus (), - "macro rules definition does not start with %"); - add_error (std::move (error)); - - // skip after somewhere? - return nullptr; - } - lexer.skip_token (); - location_t macro_locus = t->get_locus (); - - if (!skip_token (EXCLAM)) - { - // skip after somewhere? - return nullptr; - } - - // parse macro name - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - { - return nullptr; - } - Identifier rule_name{ident_tok}; - - // DEBUG - rust_debug ("in macro rules def, about to parse parens."); - - // save delim type to ensure it is reused later - AST::DelimType delim_type = AST::PARENS; - - // Map tokens to DelimType - t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_PAREN: - delim_type = AST::PARENS; - break; - case LEFT_SQUARE: - delim_type = AST::SQUARE; - break; - case LEFT_CURLY: - delim_type = AST::CURLY; - break; - default: - add_error (Error (t->get_locus (), - "unexpected token %qs - expecting delimiters (for a " - "macro rules definition)", - t->get_token_description ())); - - return nullptr; - } - lexer.skip_token (); - - // parse actual macro rules - std::vector macro_rules; - - // must be at least one macro rule, so parse it - AST::MacroRule initial_rule = parse_macro_rule (); - if (initial_rule.is_error ()) - { - Error error (lexer.peek_token ()->get_locus (), - "required first macro rule in macro rules definition " - "could not be parsed"); - add_error (std::move (error)); - - // skip after somewhere? - return nullptr; - } - macro_rules.push_back (std::move (initial_rule)); - - // DEBUG - rust_debug ("successfully pushed back initial macro rule"); - - t = lexer.peek_token (); - // parse macro rules - while (t->get_id () == SEMICOLON) - { - // skip semicolon - lexer.skip_token (); - - // don't parse if end of macro rules - if (Parse::Utils::token_id_matches_delims (lexer.peek_token ()->get_id (), - delim_type)) - { - // DEBUG - rust_debug ( - "broke out of parsing macro rules loop due to finding delim"); - - break; - } - - // try to parse next rule - AST::MacroRule rule = parse_macro_rule (); - if (rule.is_error ()) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse macro rule in macro rules definition"); - add_error (std::move (error)); - - return nullptr; - } - - macro_rules.push_back (std::move (rule)); - - // DEBUG - rust_debug ("successfully pushed back another macro rule"); - - t = lexer.peek_token (); - } - - // parse end delimiters - t = lexer.peek_token (); - if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) - { - // tokens match opening delimiter, so skip. - lexer.skip_token (); - - if (delim_type != AST::CURLY) - { - // skip semicolon at end of non-curly macro definitions - if (!skip_token (SEMICOLON)) - { - // as this is the end, allow recovery (probably) - may change - return std::unique_ptr ( - AST::MacroRulesDefinition::mbe ( - std::move (rule_name), delim_type, std::move (macro_rules), - std::move (outer_attrs), macro_locus)); - } - } - - return std::unique_ptr ( - AST::MacroRulesDefinition::mbe (std::move (rule_name), delim_type, - std::move (macro_rules), - std::move (outer_attrs), macro_locus)); - } - else - { - // tokens don't match opening delimiters, so produce error - Error error (t->get_locus (), - "unexpected token %qs - expecting closing delimiter %qs " - "(for a macro rules definition)", - t->get_token_description (), - (delim_type == AST::PARENS - ? ")" - : (delim_type == AST::SQUARE ? "]" : "}"))); - add_error (std::move (error)); - - /* return empty macro definiton despite possibly parsing mostly valid one - * - TODO is this a good idea? */ - return nullptr; - } -} - -// Parses a declarative macro 2.0 definition. -template -std::unique_ptr -Parser::parse_decl_macro_def (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - // ensure that first token is identifier saying "macro" - const_TokenPtr t = lexer.peek_token (); - if (t->get_id () != MACRO) - { - Error error ( - t->get_locus (), - "declarative macro definition does not start with %"); - add_error (std::move (error)); - - // skip after somewhere? - return nullptr; - } - lexer.skip_token (); - location_t macro_locus = t->get_locus (); - - // parse macro name - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - { - return nullptr; - } - Identifier rule_name{ident_tok}; - - t = lexer.peek_token (); - if (t->get_id () == LEFT_PAREN) - { - // single definiton of macro rule - // e.g. `macro foo($e:expr) {}` - - // parse macro matcher - location_t locus = lexer.peek_token ()->get_locus (); - AST::MacroMatcher matcher = parse_macro_matcher (); - if (matcher.is_error ()) - return nullptr; - - // check delimiter of macro matcher - if (matcher.get_delim_type () != AST::DelimType::PARENS) - { - Error error (locus, "only parenthesis can be used for a macro " - "matcher in declarative macro definition"); - add_error (std::move (error)); - return nullptr; - } - - location_t transcriber_loc = lexer.peek_token ()->get_locus (); - auto delim_tok_tree = parse_delim_token_tree (); - if (!delim_tok_tree) - return nullptr; - - AST::MacroTranscriber transcriber (delim_tok_tree.value (), - transcriber_loc); - - if (transcriber.get_token_tree ().get_delim_type () - != AST::DelimType::CURLY) - { - Error error (transcriber_loc, - "only braces can be used for a macro transcriber " - "in declarative macro definition"); - add_error (std::move (error)); - return nullptr; - } - - std::vector macro_rules; - macro_rules.emplace_back (std::move (matcher), std::move (transcriber), - locus); - - return std::unique_ptr ( - AST::MacroRulesDefinition::decl_macro (std::move (rule_name), - macro_rules, - std::move (outer_attrs), - macro_locus, vis)); - } - else if (t->get_id () == LEFT_CURLY) - { - // multiple definitions of macro rule separated by comma - // e.g. `macro foo { () => {}, ($e:expr) => {}, }` - - // parse left curly - const_TokenPtr left_curly = expect_token (LEFT_CURLY); - if (left_curly == nullptr) - { - return nullptr; - } - - // parse actual macro rules - std::vector macro_rules; - - // must be at least one macro rule, so parse it - AST::MacroRule initial_rule = parse_macro_rule (); - if (initial_rule.is_error ()) - { - Error error ( - lexer.peek_token ()->get_locus (), - "required first macro rule in declarative macro definition " - "could not be parsed"); - add_error (std::move (error)); - - // skip after somewhere? - return nullptr; - } - macro_rules.push_back (std::move (initial_rule)); - - t = lexer.peek_token (); - // parse macro rules - while (t->get_id () == COMMA) - { - // skip comma - lexer.skip_token (); - - // don't parse if end of macro rules - if (Parse::Utils::token_id_matches_delims ( - lexer.peek_token ()->get_id (), AST::CURLY)) - { - break; - } - - // try to parse next rule - AST::MacroRule rule = parse_macro_rule (); - if (rule.is_error ()) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse macro rule in declarative macro definition"); - add_error (std::move (error)); - - return nullptr; - } - - macro_rules.push_back (std::move (rule)); - - t = lexer.peek_token (); - } - - // parse right curly - const_TokenPtr right_curly = expect_token (RIGHT_CURLY); - if (right_curly == nullptr) - { - return nullptr; - } - - return std::unique_ptr ( - AST::MacroRulesDefinition::decl_macro (std::move (rule_name), - std::move (macro_rules), - std::move (outer_attrs), - macro_locus, vis)); - } - else - { - add_error (Error (t->get_locus (), - "unexpected token %qs - expecting delimiters " - "(for a declarative macro definiton)", - t->get_token_description ())); - return nullptr; - } -} - -// Parses a semi-coloned (except for full block) macro invocation item. -template -std::unique_ptr -Parser::parse_macro_invocation_semi ( - AST::AttrVec outer_attrs) -{ - location_t macro_locus = lexer.peek_token ()->get_locus (); - auto path = parse_simple_path (); - if (!path) - return nullptr; - - if (!skip_token (EXCLAM)) - { - // skip after somewhere? - return nullptr; - } - - // save delim type to ensure it is reused later - AST::DelimType delim_type = AST::PARENS; - - // Map tokens to DelimType - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_PAREN: - delim_type = AST::PARENS; - break; - case LEFT_SQUARE: - delim_type = AST::SQUARE; - break; - case LEFT_CURLY: - delim_type = AST::CURLY; - break; - default: - add_error (Error (t->get_locus (), - "unexpected token %qs - expecting delimiters (for a " - "macro invocation semi body)", - t->get_token_description ())); - - return nullptr; - } - location_t tok_tree_locus = t->get_locus (); - lexer.skip_token (); - - // parse actual token trees - std::vector> token_trees; - auto delim_open - = std::unique_ptr (new AST::Token (std::move (t))); - token_trees.push_back (std::move (delim_open)); - - t = lexer.peek_token (); - // parse token trees until the initial delimiter token is found again - while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type) - && t->get_id () != END_OF_FILE) - { - auto tree = parse_token_tree (); - if (!tree) - return nullptr; - - token_trees.push_back (std::move (tree.value ())); - - t = lexer.peek_token (); - } - auto delim_close - = std::unique_ptr (new AST::Token (std::move (t))); - token_trees.push_back (std::move (delim_close)); - - AST::DelimTokenTree delim_tok_tree (delim_type, std::move (token_trees), - tok_tree_locus); - AST::MacroInvocData invoc_data (std::move (path.value ()), - std::move (delim_tok_tree)); - - // parse end delimiters - t = lexer.peek_token (); - if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) - { - // tokens match opening delimiter, so skip. - lexer.skip_token (); - - if (delim_type != AST::CURLY) - { - // skip semicolon at end of non-curly macro invocation semis - if (!skip_token (SEMICOLON)) - { - // as this is the end, allow recovery (probably) - may change - - return AST::MacroInvocation::Regular (std::move (invoc_data), - std::move (outer_attrs), - macro_locus, true); - } - } - - // DEBUG: - rust_debug ("skipped token is '%s', next token (current peek) is '%s'", - t->get_token_description (), - lexer.peek_token ()->get_token_description ()); - - return AST::MacroInvocation::Regular (std::move (invoc_data), - std::move (outer_attrs), - macro_locus, true); - } - else - { - // tokens don't match opening delimiters, so produce error - Error error (t->get_locus (), - "unexpected token %qs - expecting closing delimiter %qs " - "(for a macro invocation semi)", - t->get_token_description (), - (delim_type == AST::PARENS - ? ")" - : (delim_type == AST::SQUARE ? "]" : "}"))); - add_error (std::move (error)); - - /* return empty macro invocation despite possibly parsing mostly valid one - * - TODO is this a good idea? */ - return nullptr; - } -} - -// Parses a non-semicoloned macro invocation (i.e. as pattern or expression). -template -std::unique_ptr -Parser::parse_macro_invocation (AST::AttrVec outer_attrs) -{ - // parse macro path - auto macro_path = parse_simple_path (); - if (!macro_path) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse macro invocation path"); - add_error (std::move (error)); - - // skip? - return nullptr; - } - - if (!skip_token (EXCLAM)) - { - // skip after somewhere? - return nullptr; - } - - // parse internal delim token tree - auto delim_tok_tree = parse_delim_token_tree (); - if (!delim_tok_tree) - return nullptr; - - location_t macro_locus = macro_path->get_locus (); - - return AST::MacroInvocation::Regular ( - AST::MacroInvocData (std::move (macro_path.value ()), - std::move (delim_tok_tree.value ())), - std::move (outer_attrs), macro_locus); -} - -// Parses a macro rule definition - does not parse semicolons. -template -AST::MacroRule -Parser::parse_macro_rule () -{ - location_t locus = lexer.peek_token ()->get_locus (); - - // parse macro matcher - AST::MacroMatcher matcher = parse_macro_matcher (); - - if (matcher.is_error ()) - return AST::MacroRule::create_error (locus); - - if (!skip_token (MATCH_ARROW)) - { - // skip after somewhere? - return AST::MacroRule::create_error (locus); - } - - // parse transcriber (this is just a delim token tree) - location_t token_tree_loc = lexer.peek_token ()->get_locus (); - auto delim_token_tree = parse_delim_token_tree (); - if (!delim_token_tree) - return AST::MacroRule::create_error (token_tree_loc); - - AST::MacroTranscriber transcriber (delim_token_tree.value (), token_tree_loc); - - return AST::MacroRule (std::move (matcher), std::move (transcriber), locus); -} - -// Parses a macro matcher (part of a macro rule definition). -template -AST::MacroMatcher -Parser::parse_macro_matcher () -{ - // save delim type to ensure it is reused later - AST::DelimType delim_type = AST::PARENS; - - // DEBUG - rust_debug ("begun parsing macro matcher"); - - // Map tokens to DelimType - const_TokenPtr t = lexer.peek_token (); - location_t locus = t->get_locus (); - switch (t->get_id ()) - { - case LEFT_PAREN: - delim_type = AST::PARENS; - break; - case LEFT_SQUARE: - delim_type = AST::SQUARE; - break; - case LEFT_CURLY: - delim_type = AST::CURLY; - break; - default: - add_error (Error ( - t->get_locus (), - "unexpected token %qs - expecting delimiters (for a macro matcher)", - t->get_token_description ())); - - return AST::MacroMatcher::create_error (t->get_locus ()); - } - lexer.skip_token (); - - // parse actual macro matches - std::vector> matches; - // Set of possible preceding macro matches to make sure follow-set - // restrictions are respected. - // TODO: Consider using std::reference_wrapper instead of raw pointers? - std::vector last_matches; - - t = lexer.peek_token (); - // parse token trees until the initial delimiter token is found again - while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) - { - std::unique_ptr match = parse_macro_match (); - - if (match == nullptr) - { - Error error ( - t->get_locus (), - "failed to parse macro match for macro matcher - found %qs", - t->get_token_description ()); - add_error (std::move (error)); - - return AST::MacroMatcher::create_error (t->get_locus ()); - } - - if (matches.size () > 0) - { - const auto *last_match = matches.back ().get (); - - // We want to check if we are dealing with a zeroable repetition - bool zeroable = false; - if (last_match->get_macro_match_type () - == AST::MacroMatch::MacroMatchType::Repetition) - { - auto repetition - = static_cast (last_match); - - if (repetition->get_op () - != AST::MacroMatchRepetition::MacroRepOp::ONE_OR_MORE) - zeroable = true; - } - - if (!zeroable) - last_matches.clear (); - - last_matches.emplace_back (last_match); - - for (auto last : last_matches) - if (!is_match_compatible (*last, *match)) - return AST::MacroMatcher::create_error ( - match->get_match_locus ()); - } - - matches.push_back (std::move (match)); - - // DEBUG - rust_debug ("pushed back a match in macro matcher"); - - t = lexer.peek_token (); - } - - // parse end delimiters - t = lexer.peek_token (); - if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) - { - // tokens match opening delimiter, so skip. - lexer.skip_token (); - - return AST::MacroMatcher (delim_type, std::move (matches), locus); - } - else - { - // tokens don't match opening delimiters, so produce error - Error error (t->get_locus (), - "unexpected token %qs - expecting closing delimiter %qs " - "(for a macro matcher)", - t->get_token_description (), - (delim_type == AST::PARENS - ? ")" - : (delim_type == AST::SQUARE ? "]" : "}"))); - add_error (std::move (error)); - - /* return error macro matcher despite possibly parsing mostly correct one? - * TODO is this the best idea? */ - return AST::MacroMatcher::create_error (t->get_locus ()); - } -} - -// Parses a macro match (syntax match inside a matcher in a macro rule). -template -std::unique_ptr -Parser::parse_macro_match () -{ - // branch based on token available - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_PAREN: - case LEFT_SQUARE: - case LEFT_CURLY: - { - // must be macro matcher as delimited - AST::MacroMatcher matcher = parse_macro_matcher (); - if (matcher.is_error ()) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse macro matcher in macro match"); - add_error (std::move (error)); - - return nullptr; - } - return std::unique_ptr ( - new AST::MacroMatcher (std::move (matcher))); - } - case DOLLAR_SIGN: - { - // have to do more lookahead to determine if fragment or repetition - const_TokenPtr t2 = lexer.peek_token (1); - switch (t2->get_id ()) - { - case IDENTIFIER: - case UNDERSCORE: - // macro fragment - return parse_macro_match_fragment (); - case LEFT_PAREN: - // macro repetition - return parse_macro_match_repetition (); - default: - if (token_id_is_keyword (t2->get_id ()) && t2->get_id () != CRATE) - { - // keyword as macro fragment - return parse_macro_match_fragment (); - } - else - { - // error: unrecognised - add_error (Error ( - t2->get_locus (), - "unrecognised token combination %<$%s%> at start of " - "macro match - did you mean %<$identifier%> or %<$(%>?", - t2->get_token_description ())); - - // skip somewhere? - return nullptr; - } - } - } - case RIGHT_PAREN: - case RIGHT_SQUARE: - case RIGHT_CURLY: - // not allowed - add_error (Error ( - t->get_locus (), - "closing delimiters like %qs are not allowed at the start of a macro " - "match", - t->get_token_description ())); - - // skip somewhere? - return nullptr; - default: - // just the token - lexer.skip_token (); - return std::unique_ptr (new AST::Token (std::move (t))); - } -} - -// Parses a fragment macro match. -template -std::unique_ptr -Parser::parse_macro_match_fragment () -{ - location_t fragment_locus = lexer.peek_token ()->get_locus (); - skip_token (DOLLAR_SIGN); - - Identifier ident; - auto identifier = lexer.peek_token (); - if (identifier->get_id () == UNDERSCORE) - ident = {Values::Keywords::UNDERSCORE, identifier->get_locus ()}; - else - ident = {identifier}; - - if (ident.empty ()) - { - Error error (lexer.peek_token ()->get_locus (), - "missing identifier in macro match fragment"); - add_error (std::move (error)); - - return nullptr; - } - skip_token (identifier->get_id ()); - - if (!skip_token (COLON)) - { - // skip after somewhere? - return nullptr; - } - - // get MacroFragSpec for macro - const_TokenPtr t = expect_token (IDENTIFIER); - if (t == nullptr) - return nullptr; - - AST::MacroFragSpec frag - = AST::MacroFragSpec::get_frag_spec_from_str (t->get_str ()); - if (frag.is_error ()) - { - Error error (t->get_locus (), - "invalid fragment specifier %qs in fragment macro match", - t->get_str ().c_str ()); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::MacroMatchFragment (std::move (ident), frag, fragment_locus)); -} - -// Parses a repetition macro match. -template -std::unique_ptr -Parser::parse_macro_match_repetition () -{ - skip_token (DOLLAR_SIGN); - skip_token (LEFT_PAREN); - - std::vector> matches; - - // parse required first macro match - std::unique_ptr initial_match = parse_macro_match (); - if (initial_match == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "could not parse required first macro match in macro match repetition"); - add_error (std::move (error)); - - // skip after somewhere? - return nullptr; - } - matches.push_back (std::move (initial_match)); - - // parse optional later macro matches - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_PAREN) - { - std::unique_ptr match = parse_macro_match (); - - if (match == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse macro match in macro match repetition"); - add_error (std::move (error)); - - return nullptr; - } - - matches.push_back (std::move (match)); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_PAREN)) - { - // skip after somewhere? - return nullptr; - } - - t = lexer.peek_token (); - // see if separator token exists - std::unique_ptr separator = nullptr; - switch (t->get_id ()) - { - // repetition operators - case ASTERISK: - case PLUS: - case QUESTION_MARK: - // delimiters - case LEFT_PAREN: - case LEFT_CURLY: - case LEFT_SQUARE: - case RIGHT_PAREN: - case RIGHT_CURLY: - case RIGHT_SQUARE: - // separator does not exist, so still null and don't skip token - break; - default: - // separator does exist - separator = std::unique_ptr (new AST::Token (std::move (t))); - lexer.skip_token (); - break; - } - - // parse repetition operator - t = lexer.peek_token (); - AST::MacroMatchRepetition::MacroRepOp op = AST::MacroMatchRepetition::NONE; - switch (t->get_id ()) - { - case ASTERISK: - op = AST::MacroMatchRepetition::ANY; - lexer.skip_token (); - break; - case PLUS: - op = AST::MacroMatchRepetition::ONE_OR_MORE; - lexer.skip_token (); - break; - case QUESTION_MARK: - op = AST::MacroMatchRepetition::ZERO_OR_ONE; - lexer.skip_token (); - - if (separator != nullptr) - { - add_error ( - Error (separator->get_locus (), - "the % macro repetition operator does not take a " - "separator")); - separator = nullptr; - } - - break; - default: - add_error ( - Error (t->get_locus (), - "expected macro repetition operator (%<*%>, %<+%>, or %) in " - "macro match - found %qs", - t->get_token_description ())); - - // skip after somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::MacroMatchRepetition (std::move (matches), op, - std::move (separator), t->get_locus ())); -} - -/* Parses a visibility syntactical production (i.e. creating a non-default - * visibility) */ -template -tl::expected -Parser::parse_visibility () -{ - // check for no visibility - if (lexer.peek_token ()->get_id () != PUB) - { - return AST::Visibility::create_private (); - } - - auto vis_loc = lexer.peek_token ()->get_locus (); - lexer.skip_token (); - - // create simple pub visibility if - // - found no parentheses - // - found unit type `()` - if (lexer.peek_token ()->get_id () != LEFT_PAREN - || lexer.peek_token (1)->get_id () == RIGHT_PAREN) - { - return AST::Visibility::create_public (vis_loc); - // or whatever - } - - lexer.skip_token (); - - const_TokenPtr t = lexer.peek_token (); - auto path_loc = t->get_locus (); - - switch (t->get_id ()) - { - case CRATE: - lexer.skip_token (); - - skip_token (RIGHT_PAREN); - - return AST::Visibility::create_crate (path_loc, vis_loc); - case SELF: - lexer.skip_token (); - - skip_token (RIGHT_PAREN); - - return AST::Visibility::create_self (path_loc, vis_loc); - case SUPER: - lexer.skip_token (); - - skip_token (RIGHT_PAREN); - - return AST::Visibility::create_super (path_loc, vis_loc); - case IN: - { - lexer.skip_token (); - - // parse the "in" path as well - auto path = parse_simple_path (); - if (!path) - { - Error error (lexer.peek_token ()->get_locus (), - "missing path in pub(in path) visibility"); - add_error (std::move (error)); - - // skip after somewhere? - return Parse::Error::Visibility::make_missing_path (); - } - - skip_token (RIGHT_PAREN); - - return AST::Visibility::create_in_path (std::move (path.value ()), - vis_loc); - } - default: - add_error (Error (t->get_locus (), "unexpected token %qs in visibility", - t->get_token_description ())); - - lexer.skip_token (); - return Parse::Error::Visibility::make_malformed (); - } -} - -// Parses a module - either a bodied module or a module defined in another file. -template -std::unique_ptr -Parser::parse_module (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - - Unsafety safety = Unsafety::Normal; - if (lexer.peek_token ()->get_id () == UNSAFE) - { - safety = Unsafety::Unsafe; - skip_token (UNSAFE); - } - - skip_token (MOD); - - const_TokenPtr module_name = expect_token (IDENTIFIER); - if (module_name == nullptr) - { - return nullptr; - } - Identifier name{module_name}; - - const_TokenPtr t = lexer.peek_token (); - - switch (t->get_id ()) - { - case SEMICOLON: - lexer.skip_token (); - - // Construct an external module - return std::unique_ptr ( - new AST::Module (std::move (name), std::move (vis), - std::move (outer_attrs), locus, safety, - lexer.get_filename (), inline_module_stack)); - case LEFT_CURLY: - { - lexer.skip_token (); - - // parse inner attributes - AST::AttrVec inner_attrs = parse_inner_attributes (); - - std::string default_path = name.as_string (); - - if (inline_module_stack.empty ()) - { - std::string filename = lexer.get_filename (); - auto slash_idx = filename.rfind (file_separator); - if (slash_idx == std::string::npos) - slash_idx = 0; - else - slash_idx++; - filename = filename.substr (slash_idx); - - std::string subdir; - if (get_file_subdir (filename, subdir)) - default_path = subdir + file_separator + name.as_string (); - } - - std::string module_path_name - = extract_module_path (inner_attrs, outer_attrs, default_path); - InlineModuleStackScope scope (*this, std::move (module_path_name)); - - // parse items - std::vector> items; - const_TokenPtr tok = lexer.peek_token (); - while (tok->get_id () != RIGHT_CURLY) - { - auto item = parse_item (false); - if (!item) - { - Error error (tok->get_locus (), - "failed to parse item in module"); - add_error (std::move (error)); - - return nullptr; - } - - items.push_back (std::move (item.value ())); - - tok = lexer.peek_token (); - } - - if (!skip_token (RIGHT_CURLY)) - { - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::Module (std::move (name), locus, std::move (items), - std::move (vis), safety, std::move (inner_attrs), - std::move (outer_attrs))); // module name? - } - default: - add_error ( - Error (t->get_locus (), - "unexpected token %qs in module declaration/definition item", - t->get_token_description ())); - - lexer.skip_token (); - return nullptr; - } -} - -// Parses an extern crate declaration (dependency on external crate) -template -std::unique_ptr -Parser::parse_extern_crate (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - if (!skip_token (EXTERN_KW)) - { - skip_after_semicolon (); - return nullptr; - } - - if (!skip_token (CRATE)) - { - skip_after_semicolon (); - return nullptr; - } - - /* parse crate reference name - this has its own syntactical rule in reference - * but seems to not be used elsewhere, so i'm putting it here */ - const_TokenPtr crate_name_tok = lexer.peek_token (); - std::string crate_name; - - switch (crate_name_tok->get_id ()) - { - case IDENTIFIER: - crate_name = crate_name_tok->get_str (); - lexer.skip_token (); - break; - case SELF: - crate_name = Values::Keywords::SELF; - lexer.skip_token (); - break; - default: - add_error ( - Error (crate_name_tok->get_locus (), - "expecting crate name (identifier or %), found %qs", - crate_name_tok->get_token_description ())); - - skip_after_semicolon (); - return nullptr; - } - - // don't parse as clause if it doesn't exist - if (lexer.peek_token ()->get_id () == SEMICOLON) - { - lexer.skip_token (); - - return std::unique_ptr ( - new AST::ExternCrate (std::move (crate_name), std::move (vis), - std::move (outer_attrs), locus)); - } - - /* parse as clause - this also has its own syntactical rule in reference and - * also seems to not be used elsewhere, so including here again. */ - if (!skip_token (AS)) - { - skip_after_semicolon (); - return nullptr; - } - - const_TokenPtr as_name_tok = lexer.peek_token (); - std::string as_name; - - switch (as_name_tok->get_id ()) - { - case IDENTIFIER: - as_name = as_name_tok->get_str (); - lexer.skip_token (); - break; - case UNDERSCORE: - as_name = Values::Keywords::UNDERSCORE; - lexer.skip_token (); - break; - default: - add_error ( - Error (as_name_tok->get_locus (), - "expecting as clause name (identifier or %<_%>), found %qs", - as_name_tok->get_token_description ())); - - skip_after_semicolon (); - return nullptr; - } - - if (!skip_token (SEMICOLON)) - { - skip_after_semicolon (); - return nullptr; - } - - return std::unique_ptr ( - new AST::ExternCrate (std::move (crate_name), std::move (vis), - std::move (outer_attrs), locus, std::move (as_name))); -} - -// Parses a use declaration. -template -std::unique_ptr -Parser::parse_use_decl (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - if (!skip_token (USE)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse use tree, which is required - std::unique_ptr use_tree = parse_use_tree (); - if (use_tree == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse use tree in use declaration"); - add_error (std::move (error)); - - skip_after_semicolon (); - return nullptr; - } - - if (!skip_token (SEMICOLON)) - { - skip_after_semicolon (); - return nullptr; - } - - return std::unique_ptr ( - new AST::UseDeclaration (std::move (use_tree), std::move (vis), - std::move (outer_attrs), locus)); -} - -// Parses a use tree (which can be recursive and is actually a base class). -template -std::unique_ptr -Parser::parse_use_tree () -{ - /* potential syntax definitions in attempt to get algorithm: - * Glob: - * <- SimplePath :: * - * <- :: * - * <- * - * Nested tree thing: - * <- SimplePath :: { COMPLICATED_INNER_TREE_THING } - * <- :: COMPLICATED_INNER_TREE_THING } - * <- { COMPLICATED_INNER_TREE_THING } - * Rebind thing: - * <- SimplePath as IDENTIFIER - * <- SimplePath as _ - * <- SimplePath - */ - - /* current plan of attack: try to parse SimplePath first - if fails, one of - * top two then try parse :: - if fails, one of top two. Next is deciding - * character for top two. */ - - /* Thus, parsing smaller parts of use tree may require feeding into function - * via parameters (or could handle all in this single function because other - * use tree types aren't recognised as separate in the spec) */ - - // TODO: I think this function is too complex, probably should split it - - location_t locus = lexer.peek_token ()->get_locus (); - - // bool has_path = false; - auto path = parse_simple_path (); - - if (!path) - { - // has no path, so must be glob or nested tree UseTree type - - bool is_global = false; - - // check for global scope resolution operator - if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) - { - lexer.skip_token (); - is_global = true; - } - - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case ASTERISK: - // glob UseTree type - lexer.skip_token (); - - if (is_global) - return std::unique_ptr ( - new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL, - AST::SimplePath::create_empty (), locus)); - else - return std::unique_ptr ( - new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH, - AST::SimplePath::create_empty (), locus)); - case LEFT_CURLY: - { - // nested tree UseTree type - lexer.skip_token (); - - std::vector> use_trees; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - std::unique_ptr use_tree = parse_use_tree (); - if (use_tree == nullptr) - { - break; - } - - use_trees.push_back (std::move (use_tree)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - t = lexer.peek_token (); - } - - // skip end curly delimiter - if (!skip_token (RIGHT_CURLY)) - { - // skip after somewhere? - return nullptr; - } - - if (is_global) - return std::unique_ptr ( - new AST::UseTreeList (AST::UseTreeList::GLOBAL, - AST::SimplePath::create_empty (), - std::move (use_trees), locus)); - else - return std::unique_ptr ( - new AST::UseTreeList (AST::UseTreeList::NO_PATH, - AST::SimplePath::create_empty (), - std::move (use_trees), locus)); - } - case AS: - // this is not allowed - add_error (Error ( - t->get_locus (), - "use declaration with rebind % requires a valid simple path - " - "none found")); - - skip_after_semicolon (); - return nullptr; - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in use tree with " - "no valid simple path (i.e. list" - " or glob use tree)", - t->get_token_description ())); - - skip_after_semicolon (); - return nullptr; - } - } - else - { - const_TokenPtr t = lexer.peek_token (); - - switch (t->get_id ()) - { - case AS: - { - // rebind UseTree type - lexer.skip_token (); - - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case IDENTIFIER: - // skip lexer token - lexer.skip_token (); - - return std::unique_ptr ( - new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER, - std::move (path.value ()), locus, t)); - case UNDERSCORE: - // skip lexer token - lexer.skip_token (); - - return std::unique_ptr ( - new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD, - std::move (path.value ()), locus, - {Values::Keywords::UNDERSCORE, - t->get_locus ()})); - default: - add_error (Error ( - t->get_locus (), - "unexpected token %qs in use tree with as clause - expected " - "identifier or %<_%>", - t->get_token_description ())); - - skip_after_semicolon (); - return nullptr; - } - } - case SEMICOLON: - // rebind UseTree type without rebinding - path only - - // don't skip semicolon - handled in parse_use_tree - // lexer.skip_token(); - case COMMA: - case RIGHT_CURLY: - // this may occur in recursive calls - assume it is ok and ignore it - return std::unique_ptr ( - new AST::UseTreeRebind (AST::UseTreeRebind::NONE, - std::move (path.value ()), locus)); - case SCOPE_RESOLUTION: - // keep going - break; - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in use tree with valid path", - t->get_token_description ())); - return nullptr; - } - - skip_token (); - t = lexer.peek_token (); - - switch (t->get_id ()) - { - case ASTERISK: - // glob UseTree type - lexer.skip_token (); - - return std::unique_ptr ( - new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED, - std::move (path.value ()), locus)); - case LEFT_CURLY: - { - // nested tree UseTree type - lexer.skip_token (); - - std::vector> use_trees; - - // TODO: think of better control structure - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - std::unique_ptr use_tree = parse_use_tree (); - if (use_tree == nullptr) - { - break; - } - - use_trees.push_back (std::move (use_tree)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - t = lexer.peek_token (); - } - - // skip end curly delimiter - if (!skip_token (RIGHT_CURLY)) - { - // skip after somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED, - std::move (path.value ()), - std::move (use_trees), locus)); - } - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in use tree with valid path", - t->get_token_description ())); - - // skip_after_semicolon(); - return nullptr; - } - } -} - -// Parses a function (not a method). -template -std::unique_ptr -Parser::parse_function (AST::Visibility vis, - AST::AttrVec outer_attrs, - bool is_external) -{ - location_t locus = lexer.peek_token ()->get_locus (); - // Get qualifiers for function if they exist - AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); - - skip_token (FN_KW); - - // Save function name token - const_TokenPtr function_name_tok = expect_token (IDENTIFIER); - if (function_name_tok == nullptr) - { - skip_after_next_block (); - return nullptr; - } - Identifier function_name{function_name_tok}; - - // parse generic params - if exist - std::vector> generic_params - = parse_generic_params_in_angles (); - - if (!skip_token (LEFT_PAREN)) - { - Error error (lexer.peek_token ()->get_locus (), - "function declaration missing opening parentheses before " - "parameter list"); - add_error (std::move (error)); - - skip_after_next_block (); - return nullptr; - } - - auto initial_param = parse_self_param (); - - if (!initial_param.has_value () - && initial_param.error () != ParseSelfError::NOT_SELF) - return nullptr; - - if (initial_param.has_value () && lexer.peek_token ()->get_id () == COMMA) - skip_token (); - - // parse function parameters (only if next token isn't right paren) - std::vector> function_params; - - if (lexer.peek_token ()->get_id () != RIGHT_PAREN) - function_params - = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); - - if (initial_param.has_value ()) - function_params.insert (function_params.begin (), - std::move (*initial_param)); - - if (!skip_token (RIGHT_PAREN)) - { - Error error (lexer.peek_token ()->get_locus (), - "function declaration missing closing parentheses after " - "parameter list"); - add_error (std::move (error)); - - skip_after_next_block (); - return nullptr; - } - - // parse function return type - if exists - std::unique_ptr return_type = parse_function_return_type (); - - // parse where clause - if exists - AST::WhereClause where_clause = parse_where_clause (); - - tl::optional> body = tl::nullopt; - if (lexer.peek_token ()->get_id () == SEMICOLON) - lexer.skip_token (); - else - { - std::unique_ptr block_expr = parse_block_expr (); - if (block_expr == nullptr) - return nullptr; - body = std::move (block_expr); - } - - return std::unique_ptr ( - new AST::Function (std::move (function_name), std::move (qualifiers), - std::move (generic_params), std::move (function_params), - std::move (return_type), std::move (where_clause), - std::move (body), std::move (vis), - std::move (outer_attrs), locus, false, is_external)); -} - -// Parses function or method qualifiers (i.e. const, unsafe, and extern). -template -AST::FunctionQualifiers -Parser::parse_function_qualifiers () -{ - Async async_status = Async::No; - Const const_status = Const::No; - Unsafety unsafe_status = Unsafety::Normal; - bool has_extern = false; - std::string abi; - - const_TokenPtr t; - location_t locus; - // Check in order of const, unsafe, then extern - for (int i = 0; i < 2; i++) - { - t = lexer.peek_token (); - locus = t->get_locus (); - - switch (t->get_id ()) - { - case CONST: - lexer.skip_token (); - const_status = Const::Yes; - break; - case ASYNC: - lexer.skip_token (); - async_status = Async::Yes; - break; - default: - // const status is still none - break; - } - } - - if (lexer.peek_token ()->get_id () == UNSAFE) - { - lexer.skip_token (); - unsafe_status = Unsafety::Unsafe; - } - - if (lexer.peek_token ()->get_id () == EXTERN_KW) - { - lexer.skip_token (); - has_extern = true; - - // detect optional abi name - const_TokenPtr next_tok = lexer.peek_token (); - if (next_tok->get_id () == STRING_LITERAL) - { - lexer.skip_token (); - abi = next_tok->get_str (); - } - } - - return AST::FunctionQualifiers (locus, async_status, const_status, - unsafe_status, has_extern, std::move (abi)); -} - -// Parses generic (lifetime or type) params inside angle brackets (optional). -template -std::vector> -Parser::parse_generic_params_in_angles () -{ - if (lexer.peek_token ()->get_id () != LEFT_ANGLE) - { - // seems to be no generic params, so exit with empty vector - return std::vector> (); - } - lexer.skip_token (); - - // DEBUG: - rust_debug ("skipped left angle in generic param"); - - std::vector> generic_params - = parse_generic_params (is_right_angle_tok); - - // DEBUG: - rust_debug ("finished parsing actual generic params (i.e. inside angles)"); - - if (!skip_generics_right_angle ()) - { - // DEBUG - rust_debug ("failed to skip generics right angle - returning empty " - "generic params"); - - return std::vector> (); - } - - return generic_params; -} - -template -template -std::unique_ptr -Parser::parse_generic_param (EndTokenPred is_end_token) -{ - auto outer_attrs = parse_outer_attributes (); - std::unique_ptr param; - auto token = lexer.peek_token (); - - switch (token->get_id ()) - { - case LIFETIME: - { - auto lifetime = parse_lifetime (false); - if (!lifetime) - { - rust_error_at ( - token->get_locus (), - "failed to parse lifetime in generic parameter list"); - return nullptr; - } - - std::vector lifetime_bounds; - if (lexer.peek_token ()->get_id () == COLON) - { - lexer.skip_token (); - // parse required bounds - lifetime_bounds - = parse_lifetime_bounds ([is_end_token] (TokenId id) { - return is_end_token (id) || id == COMMA; - }); - } - - param = std::unique_ptr (new AST::LifetimeParam ( - std::move (lifetime.value ()), std::move (lifetime_bounds), - std::move (outer_attrs), token->get_locus ())); - break; - } - case IDENTIFIER: - { - auto type_ident = token->get_str (); - lexer.skip_token (); - - std::vector> type_param_bounds; - if (lexer.peek_token ()->get_id () == COLON) - { - lexer.skip_token (); - - // parse optional type param bounds - type_param_bounds = parse_type_param_bounds (); - } - - std::unique_ptr type = nullptr; - if (lexer.peek_token ()->get_id () == EQUAL) - { - lexer.skip_token (); - - // parse required type - type = parse_type (); - if (!type) - { - rust_error_at ( - lexer.peek_token ()->get_locus (), - "failed to parse type in type param in generic params"); - return nullptr; - } - } - - param = std::unique_ptr ( - new AST::TypeParam (std::move (type_ident), token->get_locus (), - std::move (type_param_bounds), std::move (type), - std::move (outer_attrs))); - break; - } - case CONST: - { - lexer.skip_token (); - - auto name_token = expect_token (IDENTIFIER); - - if (!name_token || !expect_token (COLON)) - return nullptr; - - auto type = parse_type (); - if (!type) - return nullptr; - - // optional default value - tl::optional default_expr = tl::nullopt; - if (lexer.peek_token ()->get_id () == EQUAL) - { - lexer.skip_token (); - auto tok = lexer.peek_token (); - default_expr = parse_generic_arg (); - - if (!default_expr) - { - rust_error_at (tok->get_locus (), - "invalid token for start of default value for " - "const generic parameter: expected %, " - "% or %, got %qs", - token_id_to_str (tok->get_id ())); - return nullptr; - } - - // At this point, we *know* that we are parsing a const - // expression - if (default_expr.value ().get_kind () - == AST::GenericArg::Kind::Either) - default_expr = default_expr.value ().disambiguate_to_const (); - } - - param = std::unique_ptr ( - new AST::ConstGenericParam (name_token->get_str (), std::move (type), - default_expr, std::move (outer_attrs), - token->get_locus ())); - - break; - } - default: - // FIXME: Can we clean this last call with a method call? - rust_error_at (token->get_locus (), - "unexpected token when parsing generic parameters: %qs", - token->as_string ().c_str ()); - return nullptr; - } - - return param; -} - -/* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost - * always parse_generic_params_in_angles is what is wanted. */ -template -template -std::vector> -Parser::parse_generic_params (EndTokenPred is_end_token) -{ - std::vector> generic_params; - - /* can't parse lifetime and type params separately due to lookahead issues - * thus, parse them all here */ - - /* HACK: used to retain attribute data if a lifetime param is tentatively - * parsed but it turns out to be type param */ - AST::Attribute parsed_outer_attr = AST::Attribute::create_empty (); - - // Did we parse a generic type param yet - auto type_seen = false; - // Did we parse a const param with a default value yet - auto const_with_default_seen = false; - // Did the user write a lifetime parameter after a type one - auto order_error = false; - // Did the user write a const param with a default value after a type one - auto const_with_default_order_error = false; - - // parse lifetime params - while (!is_end_token (lexer.peek_token ()->get_id ())) - { - auto param = parse_generic_param (is_end_token); - if (param) - { - if (param->get_kind () == AST::GenericParam::Kind::Type) - { - type_seen = true; - if (const_with_default_seen) - const_with_default_order_error = true; - } - else if (param->get_kind () == AST::GenericParam::Kind::Lifetime - && type_seen) - { - order_error = true; - if (const_with_default_seen) - const_with_default_order_error = true; - } - else if (param->get_kind () == AST::GenericParam::Kind::Const) - { - type_seen = true; - AST::ConstGenericParam *const_param - = static_cast (param.get ()); - if (const_param->has_default_value ()) - const_with_default_seen = true; - else if (const_with_default_seen) - const_with_default_order_error = true; - } - - generic_params.emplace_back (std::move (param)); - maybe_skip_token (COMMA); - } - else - break; - } - - // FIXME: Add reordering hint - if (order_error) - { - Error error (generic_params.front ()->get_locus (), - "invalid order for generic parameters: lifetime parameters " - "must be declared prior to type and const parameters"); - add_error (std::move (error)); - } - if (const_with_default_order_error) - { - Error error (generic_params.front ()->get_locus (), - "invalid order for generic parameters: generic parameters " - "with a default must be trailing"); - add_error (std::move (error)); - } - - generic_params.shrink_to_fit (); - return generic_params; -} - -/* Parses lifetime generic parameters (pointers). Will also consume any - * trailing comma. No extra checks for end token. */ -template -std::vector> -Parser::parse_lifetime_params () -{ - std::vector> lifetime_params; - - while (lexer.peek_token ()->get_id () != END_OF_FILE) - { - auto lifetime_param = parse_lifetime_param (); - - if (!lifetime_param) - { - // can't treat as error as only way to get out with trailing comma - break; - } - - lifetime_params.emplace_back ( - new AST::LifetimeParam (std::move (lifetime_param.value ()))); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip commas, including trailing commas - lexer.skip_token (); - } - - lifetime_params.shrink_to_fit (); - - return lifetime_params; -} - -/* Parses lifetime generic parameters (pointers). Will also consume any - * trailing comma. Has extra is_end_token predicate checking. */ -template -template -std::vector> -Parser::parse_lifetime_params (EndTokenPred is_end_token) -{ - std::vector> lifetime_params; - - // if end_token is not specified, it defaults to EOF, so should work fine - while (!is_end_token (lexer.peek_token ()->get_id ())) - { - auto lifetime_param = parse_lifetime_param (); - - if (!lifetime_param) - { - /* TODO: is it worth throwing away all lifetime params just because - * one failed? */ - Error error (lexer.peek_token ()->get_locus (), - "failed to parse lifetime param in lifetime params"); - add_error (std::move (error)); - - return {}; - } - - lifetime_params.emplace_back ( - new AST::LifetimeParam (std::move (lifetime_param))); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip commas, including trailing commas - lexer.skip_token (); - } - - lifetime_params.shrink_to_fit (); - - return lifetime_params; -} - -/* Parses lifetime generic parameters (objects). Will also consume any - * trailing comma. No extra checks for end token. - * TODO: is this best solution? implements most of the same algorithm. - * TODO: seems to be unused, remove? */ -template -std::vector -Parser::parse_lifetime_params_objs () -{ - std::vector lifetime_params; - - // bad control structure as end token cannot be guaranteed - while (true) - { - auto lifetime_param = parse_lifetime_param (); - - if (!lifetime_param) - { - // not an error as only way to exit if trailing comma - break; - } - - lifetime_params.push_back (std::move (lifetime_param.value ())); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip commas, including trailing commas - lexer.skip_token (); - } - - lifetime_params.shrink_to_fit (); - - return lifetime_params; -} - -/* Parses lifetime generic parameters (objects). Will also consume any - * trailing comma. Has extra is_end_token predicate checking. - * TODO: is this best solution? implements most of the same algorithm. */ -template -template -std::vector -Parser::parse_lifetime_params_objs ( - EndTokenPred is_end_token) -{ - std::vector lifetime_params; - - while (!is_end_token (lexer.peek_token ()->get_id ())) - { - auto lifetime_param = parse_lifetime_param (); - - if (!lifetime_param) - { - /* TODO: is it worth throwing away all lifetime params just because - * one failed? */ - Error error (lexer.peek_token ()->get_locus (), - "failed to parse lifetime param in lifetime params"); - add_error (std::move (error)); - - return {}; - } - - lifetime_params.push_back (std::move (lifetime_param.value ())); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip commas, including trailing commas - lexer.skip_token (); - } - - lifetime_params.shrink_to_fit (); - - return lifetime_params; -} - -/* Parses a sequence of a certain grammar rule in object form (not pointer or - * smart pointer), delimited by commas and ending when 'is_end_token' is - * satisfied (templated). Will also consume any trailing comma. - * FIXME: this cannot be used due to member function pointer problems (i.e. - * parsing_function cannot be specified properly) */ -template -template -auto -Parser::parse_non_ptr_sequence ( - ParseFunction parsing_function, EndTokenPred is_end_token, - std::string error_msg) -> std::vector -{ - std::vector params; - - while (!is_end_token (lexer.peek_token ()->get_id ())) - { - auto param = parsing_function (); - - if (param.is_error ()) - { - // TODO: is it worth throwing away all params just because one - // failed? - Error error (lexer.peek_token ()->get_locus (), - std::move (error_msg)); - add_error (std::move (error)); - - return {}; - } - - params.push_back (std::move (param)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip commas, including trailing commas - lexer.skip_token (); - } - - params.shrink_to_fit (); - - return params; -} - -/* Parses a single lifetime generic parameter (not including comma). */ -template -tl::expected -Parser::parse_lifetime_param () -{ - // parse outer attributes, which are optional and may not exist - auto outer_attrs = parse_outer_attributes (); - - // save lifetime token - required - const_TokenPtr lifetime_tok = lexer.peek_token (); - if (lifetime_tok->get_id () != LIFETIME) - { - // if lifetime is missing, must not be a lifetime param, so return error - return tl::make_unexpected ({}); - } - lexer.skip_token (); - AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (), - lifetime_tok->get_locus ()); - - // parse lifetime bounds, if it exists - std::vector lifetime_bounds; - if (lexer.peek_token ()->get_id () == COLON) - { - // parse lifetime bounds - lifetime_bounds = parse_lifetime_bounds (); - // TODO: have end token passed in? - } - - return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds), - std::move (outer_attrs), - lifetime_tok->get_locus ()); -} - -// Parses type generic parameters. Will also consume any trailing comma. -template -std::vector> -Parser::parse_type_params () -{ - std::vector> type_params; - - // infinite loop with break on failure as no info on ending token - while (true) - { - std::unique_ptr type_param = parse_type_param (); - - if (type_param == nullptr) - { - // break if fails to parse - break; - } - - type_params.push_back (std::move (type_param)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip commas, including trailing commas - lexer.skip_token (); - } - - type_params.shrink_to_fit (); - return type_params; -} - -// Parses type generic parameters. Will also consume any trailing comma. -template -template -std::vector> -Parser::parse_type_params (EndTokenPred is_end_token) -{ - std::vector> type_params; - - while (!is_end_token (lexer.peek_token ()->get_id ())) - { - std::unique_ptr type_param = parse_type_param (); - - if (type_param == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse type param in type params"); - add_error (std::move (error)); - - return {}; - } - - type_params.push_back (std::move (type_param)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip commas, including trailing commas - lexer.skip_token (); - } - - type_params.shrink_to_fit (); - return type_params; - /* TODO: this shares most code with parse_lifetime_params - good place to - * use template (i.e. parse_non_ptr_sequence if doable) */ -} - -/* Parses a single type (generic) parameter, not including commas. May change - * to return value. */ -template -std::unique_ptr -Parser::parse_type_param () -{ - // parse outer attributes, which are optional and may not exist - auto outer_attrs = parse_outer_attributes (); - - const_TokenPtr identifier_tok = lexer.peek_token (); - if (identifier_tok->get_id () != IDENTIFIER) - { - // return null as type param can't exist without this required - // identifier - return nullptr; - } - Identifier ident{identifier_tok}; - lexer.skip_token (); - - // parse type param bounds (if they exist) - std::vector> type_param_bounds; - if (lexer.peek_token ()->get_id () == COLON) - { - lexer.skip_token (); - - // parse type param bounds, which may or may not exist - type_param_bounds = parse_type_param_bounds (); - } - - // parse type (if it exists) - std::unique_ptr type = nullptr; - if (lexer.peek_token ()->get_id () == EQUAL) - { - lexer.skip_token (); - - // parse type (now required) - type = parse_type (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse type in type param"); - add_error (std::move (error)); - - return nullptr; - } - } - - return std::unique_ptr ( - new AST::TypeParam (std::move (ident), identifier_tok->get_locus (), - std::move (type_param_bounds), std::move (type), - std::move (outer_attrs))); -} - -/* Parses regular (i.e. non-generic) parameters in functions or methods. Also - * has end token handling. */ -template -template -std::vector> -Parser::parse_function_params (EndTokenPred is_end_token) -{ - std::vector> params; - - if (is_end_token (lexer.peek_token ()->get_id ())) - return params; - - auto initial_param = parse_function_param (); - - // Return empty parameter list if no parameter there - if (initial_param == nullptr) - { - // TODO: is this an error? - return params; - } - - params.push_back (std::move (initial_param)); - - // maybe think of a better control structure here - do-while with an initial - // error state? basically, loop through parameter list until can't find any - // more params - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - // skip comma if applies - lexer.skip_token (); - - // TODO: strictly speaking, shouldn't there be no trailing comma? - if (is_end_token (lexer.peek_token ()->get_id ())) - break; - - // now, as right paren would break, function param is required - auto param = parse_function_param (); - if (param == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse function param (in function params)"); - add_error (std::move (error)); - - // skip somewhere? - return std::vector> (); - } - - params.push_back (std::move (param)); - - t = lexer.peek_token (); - } - - params.shrink_to_fit (); - return params; -} - -/* Parses a single regular (i.e. non-generic) parameter in a function or - * method, i.e. the "name: type" bit. Also handles it not existing. */ -template -std::unique_ptr -Parser::parse_function_param () -{ - // parse outer attributes if they exist - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // TODO: should saved location be at start of outer attributes or pattern? - location_t locus = lexer.peek_token ()->get_locus (); - - if (lexer.peek_token ()->get_id () == ELLIPSIS) // Unnamed variadic - { - lexer.skip_token (); // Skip ellipsis - return std::make_unique ( - AST::VariadicParam (std::move (outer_attrs), locus)); - } - - std::unique_ptr param_pattern = parse_pattern (); - - // create error function param if it doesn't exist - if (param_pattern == nullptr) - { - // skip after something - return nullptr; - } - - if (!skip_token (COLON)) - { - // skip after something - return nullptr; - } - - if (lexer.peek_token ()->get_id () == ELLIPSIS) // Named variadic - { - lexer.skip_token (); // Skip ellipsis - return std::make_unique ( - AST::VariadicParam (std::move (param_pattern), std::move (outer_attrs), - locus)); - } - else - { - std::unique_ptr param_type = parse_type (); - if (param_type == nullptr) - { - return nullptr; - } - return std::make_unique ( - AST::FunctionParam (std::move (param_pattern), std::move (param_type), - std::move (outer_attrs), locus)); - } -} - -/* Parses a function or method return type syntactical construction. Also - * handles a function return type not existing. */ -template -std::unique_ptr -Parser::parse_function_return_type () -{ - if (lexer.peek_token ()->get_id () != RETURN_TYPE) - return nullptr; - - // skip return type, as it now obviously exists - lexer.skip_token (); - - std::unique_ptr type = parse_type (); - - return type; -} - -/* Parses a "where clause" (in a function, struct, method, etc.). Also handles - * a where clause not existing, in which it will return - * WhereClause::create_empty(), which can be checked via - * WhereClause::is_empty(). */ -template -AST::WhereClause -Parser::parse_where_clause () -{ - const_TokenPtr where_tok = lexer.peek_token (); - if (where_tok->get_id () != WHERE) - { - // where clause doesn't exist, so create empty one - return AST::WhereClause::create_empty (); - } - - lexer.skip_token (); - - /* parse where clause items - this is not a separate rule in the reference - * so won't be here */ - std::vector> where_clause_items; - - std::vector for_lifetimes; - if (lexer.peek_token ()->get_id () == FOR) - for_lifetimes = parse_for_lifetimes (); - - /* HACK: where clauses end with a right curly or semicolon or equals in all - * uses currently */ - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON - && t->get_id () != EQUAL) - { - std::unique_ptr where_clause_item - = parse_where_clause_item (for_lifetimes); - - if (where_clause_item == nullptr) - { - Error error (t->get_locus (), "failed to parse where clause item"); - add_error (std::move (error)); - - return AST::WhereClause::create_empty (); - } - - where_clause_items.push_back (std::move (where_clause_item)); - - // also skip comma if it exists - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - t = lexer.peek_token (); - } - - where_clause_items.shrink_to_fit (); - return AST::WhereClause (std::move (where_clause_items)); -} - -/* Parses a where clause item (lifetime or type bound). Does not parse any - * commas. */ -template -std::unique_ptr -Parser::parse_where_clause_item ( - const std::vector &outer_for_lifetimes) -{ - // shitty cheat way of determining lifetime or type bound - test for - // lifetime - const_TokenPtr t = lexer.peek_token (); - - if (t->get_id () == LIFETIME) - return parse_lifetime_where_clause_item (); - else - return parse_type_bound_where_clause_item (outer_for_lifetimes); -} - -// Parses a lifetime where clause item. -template -std::unique_ptr -Parser::parse_lifetime_where_clause_item () -{ - auto parsed_lifetime = parse_lifetime (false); - if (!parsed_lifetime) - { - // TODO: error here? - return nullptr; - } - auto lifetime = parsed_lifetime.value (); - - if (!skip_token (COLON)) - { - // TODO: skip after somewhere - return nullptr; - } - - std::vector lifetime_bounds = parse_lifetime_bounds (); - // TODO: have end token passed in? - - location_t locus = lifetime.get_locus (); - - return std::unique_ptr ( - new AST::LifetimeWhereClauseItem (std::move (lifetime), - std::move (lifetime_bounds), locus)); -} - -// Parses a type bound where clause item. -template -std::unique_ptr -Parser::parse_type_bound_where_clause_item ( - const std::vector &outer_for_lifetimes) -{ - std::vector for_lifetimes = outer_for_lifetimes; - - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - return nullptr; - } - - if (!skip_token (COLON)) - { - // TODO: skip after somewhere - return nullptr; - } - - if (lexer.peek_token ()->get_id () == FOR) - { - auto for_lifetimes_inner = parse_for_lifetimes (); - for_lifetimes.insert (for_lifetimes.end (), for_lifetimes_inner.begin (), - for_lifetimes_inner.end ()); - } - - // parse type param bounds if they exist - std::vector> type_param_bounds - = parse_type_param_bounds (); - - location_t locus = lexer.peek_token ()->get_locus (); - - return std::unique_ptr ( - new AST::TypeBoundWhereClauseItem (std::move (for_lifetimes), - std::move (type), - std::move (type_param_bounds), locus)); -} - -// Parses a for lifetimes clause, including the for keyword and angle -// brackets. -template -std::vector -Parser::parse_for_lifetimes () -{ - std::vector params; - - if (!skip_token (FOR)) - { - // skip after somewhere? - return params; - } - - if (!skip_token (LEFT_ANGLE)) - { - // skip after somewhere? - return params; - } - - /* cannot specify end token due to parsing problems with '>' tokens being - * nested */ - params = parse_lifetime_params_objs (is_right_angle_tok); - - if (!skip_generics_right_angle ()) - { - // DEBUG - rust_debug ("failed to skip generics right angle after (supposedly) " - "finished parsing where clause items"); - // ok, well this gets called. - - // skip after somewhere? - return params; - } - - return params; -} - -// Parses type parameter bounds in where clause or generic arguments. -template -std::vector> -Parser::parse_type_param_bounds () -{ - std::vector> type_param_bounds; - - std::unique_ptr initial_bound - = parse_type_param_bound (); - - // quick exit if null - if (initial_bound == nullptr) - { - /* error? type param bounds must have at least one term, but are bounds - * optional? */ - return type_param_bounds; - } - type_param_bounds.push_back (std::move (initial_bound)); - - while (lexer.peek_token ()->get_id () == PLUS) - { - lexer.skip_token (); - - std::unique_ptr bound = parse_type_param_bound (); - if (bound == nullptr) - { - /* not an error: bound is allowed to be null as trailing plus is - * allowed */ - return type_param_bounds; - } - - type_param_bounds.push_back (std::move (bound)); - } - - type_param_bounds.shrink_to_fit (); - return type_param_bounds; -} - -/* Parses type parameter bounds in where clause or generic arguments, with end - * token handling. */ -template -template -std::vector> -Parser::parse_type_param_bounds (EndTokenPred is_end_token) -{ - std::vector> type_param_bounds; - - std::unique_ptr initial_bound - = parse_type_param_bound (); - - // quick exit if null - if (initial_bound == nullptr) - { - /* error? type param bounds must have at least one term, but are bounds - * optional? */ - return type_param_bounds; - } - type_param_bounds.push_back (std::move (initial_bound)); - - while (lexer.peek_token ()->get_id () == PLUS) - { - lexer.skip_token (); - - // break if end token character - if (is_end_token (lexer.peek_token ()->get_id ())) - break; - - std::unique_ptr bound = parse_type_param_bound (); - if (bound == nullptr) - { - // TODO how wise is it to ditch all bounds if only one failed? - Error error (lexer.peek_token ()->get_locus (), - "failed to parse type param bound in type param bounds"); - add_error (std::move (error)); - - return {}; - } - - type_param_bounds.push_back (std::move (bound)); - } - - type_param_bounds.shrink_to_fit (); - return type_param_bounds; -} - -/* Parses a single type parameter bound in a where clause or generic argument. - * Does not parse the '+' between arguments. */ -template -std::unique_ptr -Parser::parse_type_param_bound () -{ - // shitty cheat way of determining lifetime or trait bound - test for - // lifetime - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LIFETIME: - return std::unique_ptr ( - new AST::Lifetime (parse_lifetime (false).value ())); - case LEFT_PAREN: - case QUESTION_MARK: - case FOR: - case IDENTIFIER: - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case DOLLAR_SIGN: - case SCOPE_RESOLUTION: - return parse_trait_bound (); - default: - // don't error - assume this is fine TODO - return nullptr; - } -} - -// Parses a trait bound type param bound. -template -std::unique_ptr -Parser::parse_trait_bound () -{ - bool has_parens = false; - bool has_question_mark = false; - - location_t locus = lexer.peek_token ()->get_locus (); - - /* parse optional `for lifetimes`. */ - std::vector for_lifetimes; - if (lexer.peek_token ()->get_id () == FOR) - for_lifetimes = parse_for_lifetimes (); - - // handle trait bound being in parentheses - if (lexer.peek_token ()->get_id () == LEFT_PAREN) - { - has_parens = true; - lexer.skip_token (); - } - - // handle having question mark (optional) - if (lexer.peek_token ()->get_id () == QUESTION_MARK) - { - has_question_mark = true; - lexer.skip_token (); - } - - // handle TypePath - AST::TypePath type_path = parse_type_path (); - - // handle closing parentheses - if (has_parens) - { - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - } - - return std::unique_ptr ( - new AST::TraitBound (std::move (type_path), locus, has_parens, - has_question_mark, std::move (for_lifetimes))); -} - -// Parses lifetime bounds. -template -std::vector -Parser::parse_lifetime_bounds () -{ - std::vector lifetime_bounds; - - while (true) - { - auto lifetime = parse_lifetime (false); - - // quick exit for parsing failure - if (!lifetime) - break; - - lifetime_bounds.push_back (std::move (lifetime.value ())); - - /* plus is maybe not allowed at end - spec defines it weirdly, so - * assuming allowed at end */ - if (lexer.peek_token ()->get_id () != PLUS) - break; - - lexer.skip_token (); - } - - lifetime_bounds.shrink_to_fit (); - return lifetime_bounds; -} - -// Parses lifetime bounds, with added check for ending token. -template -template -std::vector -Parser::parse_lifetime_bounds (EndTokenPred is_end_token) -{ - std::vector lifetime_bounds; - - while (!is_end_token (lexer.peek_token ()->get_id ())) - { - auto lifetime = parse_lifetime (false); - - if (!lifetime) - { - /* TODO: is it worth throwing away all lifetime bound info just - * because one failed? */ - Error error (lexer.peek_token ()->get_locus (), - "failed to parse lifetime in lifetime bounds"); - add_error (std::move (error)); - - return {}; - } - - lifetime_bounds.push_back (std::move (lifetime.value ())); - - /* plus is maybe not allowed at end - spec defines it weirdly, so - * assuming allowed at end */ - if (lexer.peek_token ()->get_id () != PLUS) - break; - - lexer.skip_token (); - } - - lifetime_bounds.shrink_to_fit (); - return lifetime_bounds; -} - -/* Parses a lifetime token (named, 'static, or '_). Also handles lifetime not - * existing. */ -template -tl::expected -Parser::parse_lifetime (bool allow_elided) -{ - const_TokenPtr lifetime_tok = lexer.peek_token (); - if (lifetime_tok->get_id () != LIFETIME) - { - if (allow_elided) - { - return AST::Lifetime::elided (); - } - else - { - return tl::make_unexpected ({}); - } - } - lexer.skip_token (); - - return lifetime_from_token (lifetime_tok); -} - -template -AST::Lifetime -Parser::lifetime_from_token (const_TokenPtr tok) -{ - location_t locus = tok->get_locus (); - std::string lifetime_ident = tok->get_str (); - - if (lifetime_ident == "static") - { - return AST::Lifetime (AST::Lifetime::STATIC, "", locus); - } - else if (lifetime_ident == "_") - { - // Explicitly and implicitly elided lifetimes follow the same rules. - return AST::Lifetime (AST::Lifetime::WILDCARD, "", locus); - } - else - { - return AST::Lifetime (AST::Lifetime::NAMED, std::move (lifetime_ident), - locus); - } -} - -template -std::unique_ptr -Parser::parse_external_type_item (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (TYPE); - - const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); - if (alias_name_tok == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse identifier in external opaque type"); - add_error (std::move (error)); - - skip_after_semicolon (); - return nullptr; - } - - if (!skip_token (SEMICOLON)) - return nullptr; - - return std::unique_ptr ( - new AST::ExternalTypeItem (alias_name_tok->get_str (), std::move (vis), - std::move (outer_attrs), std::move (locus))); -} - -// Parses a "type alias" (typedef) item. -template -std::unique_ptr -Parser::parse_type_alias (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (TYPE); - - // TODO: use this token for identifier when finished that - const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); - if (alias_name_tok == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse identifier in type alias"); - add_error (std::move (error)); - - skip_after_semicolon (); - return nullptr; - } - Identifier alias_name{alias_name_tok}; - - // parse generic params, which may not exist - std::vector> generic_params - = parse_generic_params_in_angles (); - - // parse where clause, which may not exist - AST::WhereClause where_clause = parse_where_clause (); - - if (!skip_token (EQUAL)) - { - skip_after_semicolon (); - return nullptr; - } - - std::unique_ptr type_to_alias = parse_type (); - - if (!skip_token (SEMICOLON)) - { - // should be skipping past this, not the next line - return nullptr; - } - - return std::unique_ptr ( - new AST::TypeAlias (std::move (alias_name), std::move (generic_params), - std::move (where_clause), std::move (type_to_alias), - std::move (vis), std::move (outer_attrs), locus)); -} - -// Parse a struct item AST node. -template -std::unique_ptr -Parser::parse_struct (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - /* TODO: determine best way to parse the proper struct vs tuple struct - - * share most of initial constructs so lookahead might be impossible, and if - * not probably too expensive. Best way is probably unified parsing for the - * initial parts and then pass them in as params to more derived functions. - * Alternatively, just parse everything in this one function - do this if - * function not too long. */ - - /* Proper struct <- 'struct' IDENTIFIER generic_params? where_clause? ( '{' - * struct_fields? '}' | ';' ) */ - /* Tuple struct <- 'struct' IDENTIFIER generic_params? '(' tuple_fields? ')' - * where_clause? ';' */ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (STRUCT_KW); - - // parse struct name - const_TokenPtr name_tok = expect_token (IDENTIFIER); - if (name_tok == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse struct or tuple struct identifier"); - add_error (std::move (error)); - - // skip after somewhere? - return nullptr; - } - Identifier struct_name{name_tok}; - - // parse generic params, which may or may not exist - std::vector> generic_params - = parse_generic_params_in_angles (); - - // branch on next token - determines whether proper struct or tuple struct - if (lexer.peek_token ()->get_id () == LEFT_PAREN) - { - // tuple struct - - // skip left parenthesis - lexer.skip_token (); - - // parse tuple fields - std::vector tuple_fields; - // Might be empty tuple for unit tuple struct. - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - tuple_fields = std::vector (); - else - tuple_fields = parse_tuple_fields (); - - // tuple parameters must have closing parenthesis - if (!skip_token (RIGHT_PAREN)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse where clause, which is optional - AST::WhereClause where_clause = parse_where_clause (); - - if (!skip_token (SEMICOLON)) - { - // can't skip after semicolon because it's meant to be here - return nullptr; - } - - return std::unique_ptr ( - new AST::TupleStruct (std::move (tuple_fields), std::move (struct_name), - std::move (generic_params), - std::move (where_clause), std::move (vis), - std::move (outer_attrs), locus)); - } - - // assume it is a proper struct being parsed and continue outside of switch - // - label only here to suppress warning - - // parse where clause, which is optional - AST::WhereClause where_clause = parse_where_clause (); - - // branch on next token - determines whether struct is a unit struct - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_CURLY: - { - // struct with body - - // skip curly bracket - lexer.skip_token (); - - // parse struct fields, if any - std::vector struct_fields - = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); - - if (!skip_token (RIGHT_CURLY)) - { - // skip somewhere? - return nullptr; - } - - return std::unique_ptr (new AST::StructStruct ( - std::move (struct_fields), std::move (struct_name), - std::move (generic_params), std::move (where_clause), false, - std::move (vis), std::move (outer_attrs), locus)); - } - case SEMICOLON: - // unit struct declaration - - lexer.skip_token (); - - return std::unique_ptr ( - new AST::StructStruct (std::move (struct_name), - std::move (generic_params), - std::move (where_clause), std::move (vis), - std::move (outer_attrs), locus)); - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in struct declaration", - t->get_token_description ())); - - // skip somewhere? - return nullptr; - } -} - -// Parses struct fields in struct declarations. -template -std::vector -Parser::parse_struct_fields () -{ - std::vector fields; - - AST::StructField initial_field = parse_struct_field (); - - // Return empty field list if no field there - if (initial_field.is_error ()) - return fields; - - fields.push_back (std::move (initial_field)); - - while (lexer.peek_token ()->get_id () == COMMA) - { - lexer.skip_token (); - - AST::StructField field = parse_struct_field (); - - if (field.is_error ()) - { - // would occur with trailing comma, so allowed - break; - } - - fields.push_back (std::move (field)); - } - - fields.shrink_to_fit (); - return fields; - // TODO: template if possible (parse_non_ptr_seq) -} - -// Parses struct fields in struct declarations. -template -template -std::vector -Parser::parse_struct_fields (EndTokenPred is_end_tok) -{ - std::vector fields; - - AST::StructField initial_field = parse_struct_field (); - - // Return empty field list if no field there - if (initial_field.is_error ()) - return fields; - - fields.push_back (std::move (initial_field)); - - while (lexer.peek_token ()->get_id () == COMMA) - { - lexer.skip_token (); - - if (is_end_tok (lexer.peek_token ()->get_id ())) - break; - - AST::StructField field = parse_struct_field (); - if (field.is_error ()) - { - /* TODO: should every field be ditched just because one couldn't be - * parsed? */ - Error error (lexer.peek_token ()->get_locus (), - "failed to parse struct field in struct fields"); - add_error (std::move (error)); - - return {}; - } - - fields.push_back (std::move (field)); - } - - fields.shrink_to_fit (); - return fields; - // TODO: template if possible (parse_non_ptr_seq) -} - -// Parses a single struct field (in a struct definition). Does not parse -// commas. -template -AST::StructField -Parser::parse_struct_field () -{ - // parse outer attributes, if they exist - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // parse visibility, if it exists - auto vis = parse_visibility (); - if (!vis) - return AST::StructField::create_error (); - - location_t locus = lexer.peek_token ()->get_locus (); - - // parse field name - const_TokenPtr field_name_tok = lexer.peek_token (); - if (field_name_tok->get_id () != IDENTIFIER) - { - // if not identifier, assumes there is no struct field and exits - not - // necessarily error - return AST::StructField::create_error (); - } - Identifier field_name{field_name_tok}; - lexer.skip_token (); - - if (!skip_token (COLON)) - { - // skip after somewhere? - return AST::StructField::create_error (); - } - - // parse field type - this is required - std::unique_ptr field_type = parse_type (); - if (field_type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse type in struct field definition"); - add_error (std::move (error)); - - // skip after somewhere - return AST::StructField::create_error (); - } - - return AST::StructField (std::move (field_name), std::move (field_type), - std::move (vis.value ()), locus, - std::move (outer_attrs)); -} - -// Parses tuple fields in tuple/tuple struct declarations. -template -std::vector -Parser::parse_tuple_fields () -{ - std::vector fields; - - AST::TupleField initial_field = parse_tuple_field (); - - // Return empty field list if no field there - if (initial_field.is_error ()) - { - return fields; - } - - fields.push_back (std::move (initial_field)); - - // maybe think of a better control structure here - do-while with an initial - // error state? basically, loop through field list until can't find any more - // params HACK: all current syntax uses of tuple fields have them ending - // with a right paren token - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - // skip comma if applies - e.g. trailing comma - lexer.skip_token (); - - // break out due to right paren if it exists - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - { - break; - } - - AST::TupleField field = parse_tuple_field (); - if (field.is_error ()) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse tuple field in tuple fields"); - add_error (std::move (error)); - - return std::vector (); - } - - fields.push_back (std::move (field)); - - t = lexer.peek_token (); - } - - fields.shrink_to_fit (); - return fields; - - // TODO: this shares basically all code with function params and struct - // fields - // - templates? -} - -/* Parses a single tuple struct field in a tuple struct definition. Does not - * parse commas. */ -template -AST::TupleField -Parser::parse_tuple_field () -{ - // parse outer attributes if they exist - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // parse visibility if it exists - auto visibility = parse_visibility (); - if (!visibility) - return AST::TupleField::create_error (); - - location_t locus = lexer.peek_token ()->get_locus (); - - // parse type, which is required - std::unique_ptr field_type = parse_type (); - if (field_type == nullptr) - { - // error if null - Error error (lexer.peek_token ()->get_locus (), - "could not parse type in tuple struct field"); - add_error (std::move (error)); - - // skip after something - return AST::TupleField::create_error (); - } - - return AST::TupleField (std::move (field_type), - std::move (visibility.value ()), locus, - std::move (outer_attrs)); -} - -// Parses a Rust "enum" tagged union item definition. -template -std::unique_ptr -Parser::parse_enum (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (ENUM_KW); - - // parse enum name - const_TokenPtr enum_name_tok = expect_token (IDENTIFIER); - if (enum_name_tok == nullptr) - return nullptr; - - Identifier enum_name = {enum_name_tok}; - - // parse generic params (of enum container, not enum variants) if they exist - std::vector> generic_params - = parse_generic_params_in_angles (); - - // parse where clause if it exists - AST::WhereClause where_clause = parse_where_clause (); - - if (!skip_token (LEFT_CURLY)) - { - skip_after_end_block (); - return nullptr; - } - - // parse actual enum variant definitions - std::vector> enum_items - = parse_enum_items ([] (TokenId id) { return id == RIGHT_CURLY; }); - - if (!skip_token (RIGHT_CURLY)) - { - skip_after_end_block (); - return nullptr; - } - - return std::unique_ptr ( - new AST::Enum (std::move (enum_name), std::move (vis), - std::move (generic_params), std::move (where_clause), - std::move (enum_items), std::move (outer_attrs), locus)); -} - -// Parses the enum variants inside an enum definiton. -template -std::vector> -Parser::parse_enum_items () -{ - std::vector> items; - - std::unique_ptr initial_item = parse_enum_item (); - - // Return empty item list if no field there - if (initial_item == nullptr) - return items; - - items.push_back (std::move (initial_item)); - - while (lexer.peek_token ()->get_id () == COMMA) - { - lexer.skip_token (); - - std::unique_ptr item = parse_enum_item (); - if (item == nullptr) - { - // this would occur with a trailing comma, which is allowed - break; - } - - items.push_back (std::move (item)); - } - - items.shrink_to_fit (); - return items; - - /* TODO: use template if doable (parse_non_ptr_sequence) */ -} - -// Parses the enum variants inside an enum definiton. -template -template -std::vector> -Parser::parse_enum_items (EndTokenPred is_end_tok) -{ - std::vector> items; - - std::unique_ptr initial_item = parse_enum_item (); - - // Return empty item list if no field there - if (initial_item == nullptr) - return items; - - items.push_back (std::move (initial_item)); - - while (lexer.peek_token ()->get_id () == COMMA) - { - lexer.skip_token (); - - if (is_end_tok (lexer.peek_token ()->get_id ())) - break; - - std::unique_ptr item = parse_enum_item (); - if (item == nullptr) - { - /* TODO should this ignore all successfully parsed enum items just - * because one failed? */ - Error error (lexer.peek_token ()->get_locus (), - "failed to parse enum item in enum items"); - add_error (std::move (error)); - - return {}; - } - - items.push_back (std::move (item)); - } - - items.shrink_to_fit (); - return items; - - /* TODO: use template if doable (parse_non_ptr_sequence) */ -} - -/* Parses a single enum variant item in an enum definition. Does not parse - * commas. */ -template -std::unique_ptr -Parser::parse_enum_item () -{ - // parse outer attributes if they exist - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // parse visibility, which may or may not exist - auto vis_res = parse_visibility (); - if (!vis_res) - return nullptr; - auto vis = vis_res.value (); - - // parse name for enum item, which is required - const_TokenPtr item_name_tok = lexer.peek_token (); - if (item_name_tok->get_id () != IDENTIFIER) - { - // this may not be an error but it means there is no enum item here - return nullptr; - } - lexer.skip_token (); - Identifier item_name{item_name_tok}; - - // branch based on next token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_PAREN: - { - // tuple enum item - lexer.skip_token (); - - std::vector tuple_fields; - // Might be empty tuple for unit tuple enum variant. - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - tuple_fields = std::vector (); - else - tuple_fields = parse_tuple_fields (); - - if (!skip_token (RIGHT_PAREN)) - { - // skip after somewhere - return nullptr; - } - - return std::unique_ptr (new AST::EnumItemTuple ( - std::move (item_name), std::move (vis), std::move (tuple_fields), - std::move (outer_attrs), item_name_tok->get_locus ())); - } - case LEFT_CURLY: - { - // struct enum item - lexer.skip_token (); - - std::vector struct_fields - = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); - - if (!skip_token (RIGHT_CURLY)) - { - // skip after somewhere - return nullptr; - } - - return std::unique_ptr (new AST::EnumItemStruct ( - std::move (item_name), std::move (vis), std::move (struct_fields), - std::move (outer_attrs), item_name_tok->get_locus ())); - } - case EQUAL: - { - // discriminant enum item - lexer.skip_token (); - - std::unique_ptr discriminant_expr = parse_expr (); - - return std::unique_ptr ( - new AST::EnumItemDiscriminant (std::move (item_name), std::move (vis), - std::move (discriminant_expr), - std::move (outer_attrs), - item_name_tok->get_locus ())); - } - default: - // regular enum with just an identifier - return std::unique_ptr ( - new AST::EnumItem (std::move (item_name), std::move (vis), - std::move (outer_attrs), - item_name_tok->get_locus ())); - } -} - -// Parses a C-style (and C-compat) untagged union declaration. -template -std::unique_ptr -Parser::parse_union (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - /* hack - "weak keyword" by finding identifier called "union" (lookahead in - * item switch) */ - const_TokenPtr union_keyword = expect_token (IDENTIFIER); - rust_assert (union_keyword->get_str () == Values::WeakKeywords::UNION); - location_t locus = union_keyword->get_locus (); - - // parse actual union name - const_TokenPtr union_name_tok = expect_token (IDENTIFIER); - if (union_name_tok == nullptr) - { - skip_after_next_block (); - return nullptr; - } - Identifier union_name{union_name_tok}; - - // parse optional generic parameters - std::vector> generic_params - = parse_generic_params_in_angles (); - - // parse optional where clause - AST::WhereClause where_clause = parse_where_clause (); - - if (!skip_token (LEFT_CURLY)) - { - skip_after_end_block (); - return nullptr; - } - - /* parse union inner items as "struct fields" because hey, syntax reuse. - * Spec said so. */ - std::vector union_fields - = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); - - if (!skip_token (RIGHT_CURLY)) - { - // skip after somewhere - return nullptr; - } - - return std::unique_ptr ( - new AST::Union (std::move (union_name), std::move (vis), - std::move (generic_params), std::move (where_clause), - std::move (union_fields), std::move (outer_attrs), locus)); -} - -/* Parses a "constant item" (compile-time constant to maybe "inline" - * throughout the program - like constexpr). */ -template -std::unique_ptr -Parser::parse_const_item (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (CONST); - - /* get constant identifier - this is either a proper identifier or the _ - * wildcard */ - const_TokenPtr ident_tok = lexer.peek_token (); - // make default identifier the underscore wildcard one - std::string ident (Values::Keywords::UNDERSCORE); - switch (ident_tok->get_id ()) - { - case IDENTIFIER: - ident = ident_tok->get_str (); - lexer.skip_token (); - break; - case UNDERSCORE: - // do nothing - identifier is already "_" - lexer.skip_token (); - break; - default: - add_error ( - Error (ident_tok->get_locus (), - "expected item name (identifier or %<_%>) in constant item " - "declaration - found %qs", - ident_tok->get_token_description ())); - - skip_after_semicolon (); - return nullptr; - } - - if (!skip_token (COLON)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse constant type (required) - std::unique_ptr type = parse_type (); - - // A const with no given expression value - if (lexer.peek_token ()->get_id () == SEMICOLON) - { - lexer.skip_token (); - return std::unique_ptr ( - new AST::ConstantItem (std::move (ident), std::move (vis), - std::move (type), std::move (outer_attrs), - locus)); - } - - if (!skip_token (EQUAL)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse constant expression (required) - std::unique_ptr expr = parse_expr (); - - if (!skip_token (SEMICOLON)) - { - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ConstantItem (std::move (ident), std::move (vis), std::move (type), - std::move (expr), std::move (outer_attrs), locus)); -} - -// Parses a "static item" (static storage item, with 'static lifetime). -template -std::unique_ptr -Parser::parse_static_item (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (STATIC_KW); - - // determine whether static item is mutable - bool is_mut = false; - if (lexer.peek_token ()->get_id () == MUT) - { - is_mut = true; - lexer.skip_token (); - } - - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - return nullptr; - - Identifier ident{ident_tok}; - - if (!skip_token (COLON)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse static item type (required) - std::unique_ptr type = parse_type (); - - if (!skip_token (EQUAL)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse static item expression (required) - std::unique_ptr expr = parse_expr (); - - if (!skip_token (SEMICOLON)) - { - // skip after somewhere - return nullptr; - } - - return std::unique_ptr ( - new AST::StaticItem (std::move (ident), is_mut, std::move (type), - std::move (expr), std::move (vis), - std::move (outer_attrs), locus)); -} - -// Parses a trait definition item, including unsafe ones. -template -std::unique_ptr -Parser::parse_trait (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - bool is_unsafe = false; - bool is_auto_trait = false; - - if (lexer.peek_token ()->get_id () == UNSAFE) - { - is_unsafe = true; - lexer.skip_token (); - } - - if (lexer.peek_token ()->get_id () == AUTO) - { - is_auto_trait = true; - lexer.skip_token (); - } - - skip_token (TRAIT); - - // parse trait name - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - return nullptr; - - Identifier ident{ident_tok}; - - // parse generic parameters (if they exist) - std::vector> generic_params - = parse_generic_params_in_angles (); - - // create placeholder type param bounds in case they don't exist - std::vector> type_param_bounds; - - // parse type param bounds (if they exist) - if (lexer.peek_token ()->get_id () == COLON) - { - lexer.skip_token (); - - type_param_bounds = parse_type_param_bounds ( - [] (TokenId id) { return id == WHERE || id == LEFT_CURLY; }); - // type_param_bounds = parse_type_param_bounds (); - } - - // parse where clause (if it exists) - AST::WhereClause where_clause = parse_where_clause (); - - if (!skip_token (LEFT_CURLY)) - { - skip_after_end_block (); - return nullptr; - } - - // parse inner attrs (if they exist) - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse trait items - std::vector> trait_items; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - std::unique_ptr trait_item = parse_trait_item (); - - if (trait_item == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse trait item in trait"); - add_error (std::move (error)); - - return nullptr; - } - trait_items.push_back (std::move (trait_item)); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_CURLY)) - { - // skip after something - return nullptr; - } - - trait_items.shrink_to_fit (); - return std::unique_ptr ( - new AST::Trait (std::move (ident), is_unsafe, is_auto_trait, - std::move (generic_params), std::move (type_param_bounds), - std::move (where_clause), std::move (trait_items), - std::move (vis), std::move (outer_attrs), - std::move (inner_attrs), locus)); -} - -// Parses a trait item used inside traits (not trait, the Item). -template -std::unique_ptr -Parser::parse_trait_item () -{ - // parse outer attributes (if they exist) - AST::AttrVec outer_attrs = parse_outer_attributes (); - - auto vis_res = parse_visibility (); - if (!vis_res) - return nullptr; - - auto vis = vis_res.value (); - - // lookahead to determine what type of trait item to parse - const_TokenPtr tok = lexer.peek_token (); - switch (tok->get_id ()) - { - case SUPER: - case SELF: - case CRATE: - case DOLLAR_SIGN: - // these seem to be SimplePath tokens, so this is a macro invocation - // semi - return parse_macro_invocation_semi (std::move (outer_attrs)); - case IDENTIFIER: - if (lexer.peek_token ()->get_str () == Values::WeakKeywords::DEFAULT) - return parse_function (std::move (vis), std::move (outer_attrs)); - else - return parse_macro_invocation_semi (std::move (outer_attrs)); - case TYPE: - return parse_trait_type (std::move (outer_attrs), vis); - case CONST: - // disambiguate with function qualifier - if (lexer.peek_token (1)->get_id () == IDENTIFIER) - { - return parse_trait_const (std::move (outer_attrs)); - } - // else, fallthrough to function - // TODO: find out how to disable gcc "implicit fallthrough" error - gcc_fallthrough (); - case ASYNC: - case UNSAFE: - case EXTERN_KW: - case FN_KW: - return parse_function (std::move (vis), std::move (outer_attrs)); - default: - break; - } - add_error (Error (tok->get_locus (), - "unrecognised token %qs for item in trait", - tok->get_token_description ())); - // skip? - return nullptr; -} - -// Parse a typedef trait item. -template -std::unique_ptr -Parser::parse_trait_type (AST::AttrVec outer_attrs, - AST::Visibility vis) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (TYPE); - - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - return nullptr; - - Identifier ident{ident_tok}; - - // Parse optional generic parameters for GATs (Generic Associated Types) - std::vector> generic_params; - if (lexer.peek_token ()->get_id () == LEFT_ANGLE) - { - generic_params = parse_generic_params_in_angles (); - } - - std::vector> bounds; - - // parse optional colon - if (lexer.peek_token ()->get_id () == COLON) - { - lexer.skip_token (); - - // parse optional type param bounds - bounds - = parse_type_param_bounds ([] (TokenId id) { return id == SEMICOLON; }); - // bounds = parse_type_param_bounds (); - } - - if (!skip_token (SEMICOLON)) - { - // skip? - return nullptr; - } - - return std::unique_ptr ( - new AST::TraitItemType (std::move (ident), std::move (generic_params), - std::move (bounds), std::move (outer_attrs), vis, - locus)); -} - -// Parses a constant trait item. -template -std::unique_ptr -Parser::parse_trait_const (AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (CONST); - - // parse constant item name - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - return nullptr; - - Identifier ident{ident_tok}; - - if (!skip_token (COLON)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse constant trait item type - std::unique_ptr type = parse_type (); - - // parse constant trait body expression, if it exists - std::unique_ptr const_body = nullptr; - if (lexer.peek_token ()->get_id () == EQUAL) - { - lexer.skip_token (); - - // expression must exist, so parse it - const_body = parse_expr (); - } - - if (!skip_token (SEMICOLON)) - { - // skip after something? - return nullptr; - } - - return std::unique_ptr (new AST::ConstantItem ( - std::move (ident), AST::Visibility::create_private (), std::move (type), - std::move (const_body), std::move (outer_attrs), locus)); -} - -/* Parses a struct "impl" item (both inherent impl and trait impl can be - * parsed here), */ -template -std::unique_ptr -Parser::parse_impl (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - /* Note that only trait impls are allowed to be unsafe. So if unsafe, it - * must be a trait impl. However, this isn't enough for full disambiguation, - * so don't branch here. */ - location_t locus = lexer.peek_token ()->get_locus (); - bool is_unsafe = false; - if (lexer.peek_token ()->get_id () == UNSAFE) - { - lexer.skip_token (); - is_unsafe = true; - } - - if (!skip_token (IMPL)) - { - skip_after_next_block (); - return nullptr; - } - - // parse generic params (shared by trait and inherent impls) - std::vector> generic_params - = parse_generic_params_in_angles (); - - // Again, trait impl-only feature, but optional one, so can be used for - // branching yet. - bool has_exclam = false; - if (lexer.peek_token ()->get_id () == EXCLAM) - { - lexer.skip_token (); - has_exclam = true; - } - - /* FIXME: code that doesn't look shit for TypePath. Also, make sure this - * doesn't parse too much and not work. */ - AST::TypePath type_path = parse_type_path (); - if (type_path.is_error () || lexer.peek_token ()->get_id () != FOR) - { - /* cannot parse type path (or not for token next, at least), so must be - * inherent impl */ - - // hacky conversion of TypePath stack object to Type pointer - std::unique_ptr type = nullptr; - if (!type_path.is_error ()) - type = std::unique_ptr ( - new AST::TypePath (std::move (type_path))); - else - type = parse_type (); - - // Type is required, so error if null - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse type in inherent impl"); - add_error (std::move (error)); - - skip_after_next_block (); - return nullptr; - } - - // parse optional where clause - AST::WhereClause where_clause = parse_where_clause (); - - if (!skip_token (LEFT_CURLY)) - { - // TODO: does this still skip properly? - skip_after_end_block (); - return nullptr; - } - - // parse inner attributes (optional) - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse inherent impl items - std::vector> impl_items; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - std::unique_ptr impl_item - = parse_inherent_impl_item (); - - if (impl_item == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse inherent impl item in inherent impl"); - add_error (std::move (error)); - - return nullptr; - } - - impl_items.push_back (std::move (impl_item)); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_CURLY)) - { - // skip somewhere - return nullptr; - } - - // DEBUG - rust_debug ("successfully parsed inherent impl"); - - impl_items.shrink_to_fit (); - - return std::unique_ptr (new AST::InherentImpl ( - std::move (impl_items), std::move (generic_params), std::move (type), - std::move (where_clause), std::move (vis), std::move (inner_attrs), - std::move (outer_attrs), locus)); - } - else - { - // type path must both be valid and next token is for, so trait impl - if (!skip_token (FOR)) - { - skip_after_next_block (); - return nullptr; - } - - // parse type - std::unique_ptr type = parse_type (); - // ensure type is included as it is required - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse type in trait impl"); - add_error (std::move (error)); - - skip_after_next_block (); - return nullptr; - } - - // parse optional where clause - AST::WhereClause where_clause = parse_where_clause (); - - if (!skip_token (LEFT_CURLY)) - { - // TODO: does this still skip properly? - skip_after_end_block (); - return nullptr; - } - - // parse inner attributes (optional) - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse trait impl items - std::vector> impl_items; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - std::unique_ptr impl_item - = parse_trait_impl_item (); - - if (impl_item == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse trait impl item in trait impl"); - add_error (std::move (error)); - - return nullptr; - } - - impl_items.push_back (std::move (impl_item)); - - t = lexer.peek_token (); - - // DEBUG - rust_debug ("successfully parsed a trait impl item"); - } - // DEBUG - rust_debug ("successfully finished trait impl items"); - - if (!skip_token (RIGHT_CURLY)) - { - // skip somewhere - return nullptr; - } - - // DEBUG - rust_debug ("successfully parsed trait impl"); - - impl_items.shrink_to_fit (); - - return std::unique_ptr ( - new AST::TraitImpl (std::move (type_path), is_unsafe, has_exclam, - std::move (impl_items), std::move (generic_params), - std::move (type), std::move (where_clause), - std::move (vis), std::move (inner_attrs), - std::move (outer_attrs), locus)); - } -} - -// Parses a single inherent impl item (item inside an inherent impl block). -template -std::unique_ptr -Parser::parse_inherent_impl_item () -{ - // parse outer attributes (if they exist) - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // TODO: cleanup - currently an unreadable mess - - // branch on next token: - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case IDENTIFIER: - // FIXME: Arthur: Do we need to some lookahead here? - return parse_macro_invocation_semi (outer_attrs); - case SUPER: - case SELF: - case CRATE: - case PUB: - { - // visibility, so not a macro invocation semi - must be constant, - // function, or method - auto vis_res = parse_visibility (); - if (!vis_res) - return nullptr; - auto vis = vis_res.value (); - - // TODO: is a recursive call to parse_inherent_impl_item better? - switch (lexer.peek_token ()->get_id ()) - { - case EXTERN_KW: - case UNSAFE: - case FN_KW: - // function or method - return parse_inherent_impl_function_or_method (std::move (vis), - std::move ( - outer_attrs)); - case CONST: - // lookahead to resolve production - could be function/method or - // const item - t = lexer.peek_token (1); - - switch (t->get_id ()) - { - case IDENTIFIER: - case UNDERSCORE: - return parse_const_item (std::move (vis), - std::move (outer_attrs)); - case UNSAFE: - case EXTERN_KW: - case FN_KW: - return parse_inherent_impl_function_or_method (std::move (vis), - std::move ( - outer_attrs)); - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in some sort of const " - "item in inherent impl", - t->get_token_description ())); - - lexer.skip_token (1); // TODO: is this right thing to do? - return nullptr; - } - default: - add_error ( - Error (t->get_locus (), - "unrecognised token %qs for item in inherent impl", - t->get_token_description ())); - // skip? - return nullptr; - } - } - case ASYNC: - case EXTERN_KW: - case UNSAFE: - case FN_KW: - // function or method - return parse_inherent_impl_function_or_method ( - AST::Visibility::create_private (), std::move (outer_attrs)); - case CONST: - /* lookahead to resolve production - could be function/method or const - * item */ - t = lexer.peek_token (1); - - switch (t->get_id ()) - { - case IDENTIFIER: - case UNDERSCORE: - return parse_const_item (AST::Visibility::create_private (), - std::move (outer_attrs)); - case UNSAFE: - case EXTERN_KW: - case FN_KW: - return parse_inherent_impl_function_or_method ( - AST::Visibility::create_private (), std::move (outer_attrs)); - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in some sort of const item " - "in inherent impl", - t->get_token_description ())); - - lexer.skip_token (1); // TODO: is this right thing to do? - return nullptr; - } - rust_unreachable (); - default: - add_error (Error (t->get_locus (), - "unrecognised token %qs for item in inherent impl", - t->get_token_description ())); - - // skip? - return nullptr; - } -} - -/* For internal use only by parse_inherent_impl_item() - splits giant method - * into smaller ones and prevents duplication of logic. Strictly, this parses - * a function or method item inside an inherent impl item block. */ -// TODO: make this a templated function with "return type" as type param - -// InherentImplItem is this specialisation of the template while TraitImplItem -// will be the other. -template -std::unique_ptr -Parser::parse_inherent_impl_function_or_method ( - AST::Visibility vis, AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - // parse function or method qualifiers - AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); - - skip_token (FN_KW); - - // parse function or method name - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - return nullptr; - - Identifier ident{ident_tok}; - - // parse generic params - std::vector> generic_params - = parse_generic_params_in_angles (); - - if (!skip_token (LEFT_PAREN)) - { - // skip after somewhere? - return nullptr; - } - - // now for function vs method disambiguation - method has opening "self" - // param - auto initial_param = parse_self_param (); - - if (!initial_param.has_value () - && initial_param.error () != ParseSelfError::NOT_SELF) - return nullptr; - - /* FIXME: ensure that self param doesn't accidently consume tokens for a - * function one idea is to lookahead up to 4 tokens to see whether self is - * one of them */ - bool is_method = false; - if (initial_param.has_value ()) - { - if ((*initial_param)->is_self ()) - is_method = true; - - /* skip comma so function and method regular params can be parsed in - * same way */ - if (lexer.peek_token ()->get_id () == COMMA) - lexer.skip_token (); - } - - // parse trait function params - std::vector> function_params - = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); - - if (initial_param.has_value ()) - function_params.insert (function_params.begin (), - std::move (*initial_param)); - - if (!skip_token (RIGHT_PAREN)) - { - skip_after_end_block (); - return nullptr; - } - - // parse return type (optional) - std::unique_ptr return_type = parse_function_return_type (); - - // parse where clause (optional) - AST::WhereClause where_clause = parse_where_clause (); - - tl::optional> body = tl::nullopt; - if (lexer.peek_token ()->get_id () == SEMICOLON) - lexer.skip_token (); - else - { - auto result = parse_block_expr (); - - if (result == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "could not parse definition in inherent impl %s definition", - is_method ? "method" : "function"); - add_error (std::move (error)); - - skip_after_end_block (); - return nullptr; - } - body = std::move (result); - } - - return std::unique_ptr ( - new AST::Function (std::move (ident), std::move (qualifiers), - std::move (generic_params), std::move (function_params), - std::move (return_type), std::move (where_clause), - std::move (body), std::move (vis), - std::move (outer_attrs), locus)); -} - -// Parses a single trait impl item (item inside a trait impl block). -template -std::unique_ptr -Parser::parse_trait_impl_item () -{ - // parse outer attributes (if they exist) - AST::AttrVec outer_attrs = parse_outer_attributes (); - - auto vis_res = parse_visibility (); - if (!vis_res) - return nullptr; - auto visibility = vis_res.value (); - - // branch on next token: - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case SUPER: - case SELF: - case CRATE: - case DOLLAR_SIGN: - // these seem to be SimplePath tokens, so this is a macro invocation - // semi - return parse_macro_invocation_semi (std::move (outer_attrs)); - case IDENTIFIER: - if (lexer.peek_token ()->get_str () == Values::WeakKeywords::DEFAULT) - return parse_trait_impl_function_or_method (visibility, - std::move (outer_attrs)); - else - return parse_macro_invocation_semi (std::move (outer_attrs)); - case TYPE: - return parse_type_alias (visibility, std::move (outer_attrs)); - case EXTERN_KW: - case UNSAFE: - case FN_KW: - // function or method - return parse_trait_impl_function_or_method (visibility, - std::move (outer_attrs)); - case ASYNC: - return parse_async_item (visibility, std::move (outer_attrs)); - case CONST: - // lookahead to resolve production - could be function/method or const - // item - t = lexer.peek_token (1); - - switch (t->get_id ()) - { - case IDENTIFIER: - case UNDERSCORE: - return parse_const_item (visibility, std::move (outer_attrs)); - case UNSAFE: - case EXTERN_KW: - case FN_KW: - return parse_trait_impl_function_or_method (visibility, - std::move (outer_attrs)); - default: - add_error (Error ( - t->get_locus (), - "unexpected token %qs in some sort of const item in trait impl", - t->get_token_description ())); - - lexer.skip_token (1); // TODO: is this right thing to do? - return nullptr; - } - rust_unreachable (); - default: - break; - } - add_error (Error (t->get_locus (), - "unrecognised token %qs for item in trait impl", - t->get_token_description ())); - - // skip? - return nullptr; -} - -/* For internal use only by parse_trait_impl_item() - splits giant method into - * smaller ones and prevents duplication of logic. Strictly, this parses a - * function or method item inside a trait impl item block. */ -template -std::unique_ptr -Parser::parse_trait_impl_function_or_method ( - AST::Visibility vis, AST::AttrVec outer_attrs) -{ - // this shares virtually all logic with - // parse_inherent_impl_function_or_method - // - template? - location_t locus = lexer.peek_token ()->get_locus (); - - auto is_default = false; - auto t = lexer.peek_token (); - if (t->get_id () == IDENTIFIER - && t->get_str () == Values::WeakKeywords::DEFAULT) - { - is_default = true; - lexer.skip_token (); - } - - // parse function or method qualifiers - AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); - - skip_token (FN_KW); - - // parse function or method name - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - { - return nullptr; - } - Identifier ident{ident_tok}; - - // DEBUG: - rust_debug ( - "about to start parsing generic params in trait impl function or method"); - - // parse generic params - std::vector> generic_params - = parse_generic_params_in_angles (); - - // DEBUG: - rust_debug ( - "finished parsing generic params in trait impl function or method"); - - if (!skip_token (LEFT_PAREN)) - { - // skip after somewhere? - return nullptr; - } - - // now for function vs method disambiguation - method has opening "self" - // param - auto initial_param = parse_self_param (); - - if (!initial_param.has_value () - && initial_param.error () != ParseSelfError::NOT_SELF) - return nullptr; - - // FIXME: ensure that self param doesn't accidently consume tokens for a - // function - bool is_method = false; - if (initial_param.has_value ()) - { - if ((*initial_param)->is_self ()) - is_method = true; - - // skip comma so function and method regular params can be parsed in - // same way - if (lexer.peek_token ()->get_id () == COMMA) - { - lexer.skip_token (); - } - - // DEBUG - rust_debug ("successfully parsed self param in method trait impl item"); - } - - // DEBUG - rust_debug ( - "started to parse function params in function or method trait impl item"); - - // parse trait function params (only if next token isn't right paren) - std::vector> function_params; - if (lexer.peek_token ()->get_id () != RIGHT_PAREN) - { - function_params - = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); - - if (function_params.empty ()) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse function params in trait impl %s definition", - is_method ? "method" : "function"); - add_error (std::move (error)); - - skip_after_next_block (); - return nullptr; - } - } - - if (initial_param.has_value ()) - function_params.insert (function_params.begin (), - std::move (*initial_param)); - - // DEBUG - rust_debug ("successfully parsed function params in function or method " - "trait impl item"); - - if (!skip_token (RIGHT_PAREN)) - { - skip_after_next_block (); - return nullptr; - } - - // parse return type (optional) - std::unique_ptr return_type = parse_function_return_type (); - - // DEBUG - rust_debug ( - "successfully parsed return type in function or method trait impl item"); - - // parse where clause (optional) - AST::WhereClause where_clause = parse_where_clause (); - - // DEBUG - rust_debug ( - "successfully parsed where clause in function or method trait impl item"); - - // parse function definition (in block) - semicolon not allowed - tl::optional> body = tl::nullopt; - - if (lexer.peek_token ()->get_id () == SEMICOLON) - lexer.skip_token (); - else - { - auto result = parse_block_expr (); - if (result == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse definition in trait impl %s definition", - is_method ? "method" : "function"); - add_error (std::move (error)); - - skip_after_end_block (); - return nullptr; - } - body = std::move (result); - } - - return std::unique_ptr ( - new AST::Function (std::move (ident), std::move (qualifiers), - std::move (generic_params), std::move (function_params), - std::move (return_type), std::move (where_clause), - std::move (body), std::move (vis), - std::move (outer_attrs), locus, is_default)); -} - -// Parses an extern block of declarations. -template -std::unique_ptr -Parser::parse_extern_block (AST::Visibility vis, - AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (EXTERN_KW); - - // detect optional abi name - std::string abi; - const_TokenPtr next_tok = lexer.peek_token (); - if (next_tok->get_id () == STRING_LITERAL) - { - lexer.skip_token (); - abi = next_tok->get_str (); - } - - if (!skip_token (LEFT_CURLY)) - { - skip_after_end_block (); - return nullptr; - } - - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse declarations inside extern block - std::vector> extern_items; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - std::unique_ptr extern_item = parse_external_item (); - - if (extern_item == nullptr) - { - Error error (t->get_locus (), - "failed to parse external item despite not reaching " - "end of extern block"); - add_error (std::move (error)); - - return nullptr; - } - - extern_items.push_back (std::move (extern_item)); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_CURLY)) - { - // skip somewhere - return nullptr; - } - - extern_items.shrink_to_fit (); - - return std::unique_ptr ( - new AST::ExternBlock (std::move (abi), std::move (extern_items), - std::move (vis), std::move (inner_attrs), - std::move (outer_attrs), locus)); -} - -// Parses a single extern block item (static or function declaration). -template -std::unique_ptr -Parser::parse_external_item () -{ - // parse optional outer attributes - AST::AttrVec outer_attrs = parse_outer_attributes (); - - location_t locus = lexer.peek_token ()->get_locus (); - - // parse optional visibility - auto vis_res = parse_visibility (); - if (!vis_res) - return nullptr; - auto vis = vis_res.value (); - - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case IDENTIFIER: - return parse_macro_invocation_semi (outer_attrs); - case STATIC_KW: - { - // parse extern static item - lexer.skip_token (); - - // parse mut (optional) - bool has_mut = false; - if (lexer.peek_token ()->get_id () == MUT) - { - lexer.skip_token (); - has_mut = true; - } - - // parse identifier - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - { - skip_after_semicolon (); - return nullptr; - } - Identifier ident{ident_tok}; - - if (!skip_token (COLON)) - { - skip_after_semicolon (); - return nullptr; - } - - // parse type (required) - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse type in external static item"); - add_error (std::move (error)); - - skip_after_semicolon (); - return nullptr; - } - - if (!skip_token (SEMICOLON)) - { - // skip after somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ExternalStaticItem (std::move (ident), std::move (type), - has_mut, std::move (vis), - std::move (outer_attrs), locus)); - } - case FN_KW: - return parse_function (std::move (vis), std::move (outer_attrs), true); - - case TYPE: - return parse_external_type_item (std::move (vis), - std::move (outer_attrs)); - default: - // error - add_error ( - Error (t->get_locus (), - "unrecognised token %qs in extern block item declaration", - t->get_token_description ())); - - skip_after_semicolon (); - return nullptr; - } -} - -// Parses a statement (will further disambiguate any statement). -template -std::unique_ptr -Parser::parse_stmt (ParseRestrictions restrictions) -{ - // quick exit for empty statement - // FIXME: Can we have empty statements without semicolons? Just nothing? - const_TokenPtr t = lexer.peek_token (); - if (t->get_id () == SEMICOLON) - { - lexer.skip_token (); - return std::unique_ptr ( - new AST::EmptyStmt (t->get_locus ())); - } - - // parse outer attributes - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // parsing this will be annoying because of the many different possibilities - /* best may be just to copy paste in parse_item switch, and failing that try - * to parse outer attributes, and then pass them in to either a let - * statement or (fallback) expression statement. */ - // FIXME: think of a way to do this without such a large switch? - t = lexer.peek_token (); - switch (t->get_id ()) - { - case LET: - // let statement - return parse_let_stmt (std::move (outer_attrs), restrictions); - case PUB: - case MOD: - case EXTERN_KW: - case USE: - case FN_KW: - case TYPE: - case STRUCT_KW: - case ENUM_KW: - case CONST: - case STATIC_KW: - case AUTO: - case TRAIT: - case IMPL: - case MACRO: - /* TODO: implement union keyword but not really because of - * context-dependence crappy hack way to parse a union written below to - * separate it from the good code. */ - // case UNION: - case UNSAFE: // maybe - unsafe traits are a thing - /* if any of these (should be all possible VisItem prefixes), parse a - * VisItem can't parse item because would require reparsing outer - * attributes */ - // may also be unsafe block - if (lexer.peek_token (1)->get_id () == LEFT_CURLY) - { - return parse_expr_stmt (std::move (outer_attrs), restrictions); - } - else - { - return parse_vis_item (std::move (outer_attrs)); - } - break; - // crappy hack to do union "keyword" - case IDENTIFIER: - if (t->get_str () == Values::WeakKeywords::UNION - && lexer.peek_token (1)->get_id () == IDENTIFIER) - { - return parse_vis_item (std::move (outer_attrs)); - // or should this go straight to parsing union? - } - else if (is_macro_rules_def (t)) - { - // macro_rules! macro item - return parse_macro_rules_def (std::move (outer_attrs)); - } - gcc_fallthrough (); - // TODO: find out how to disable gcc "implicit fallthrough" warning - default: - // fallback: expression statement - return parse_expr_stmt (std::move (outer_attrs), restrictions); - break; - } -} - -// Parses a let statement. -template -std::unique_ptr -Parser::parse_let_stmt (AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (LET); - - // parse pattern (required) - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in let statement"); - add_error (std::move (error)); - - skip_after_semicolon (); - return nullptr; - } - - // parse type declaration (optional) - std::unique_ptr type = nullptr; - if (lexer.peek_token ()->get_id () == COLON) - { - // must have a type declaration - lexer.skip_token (); - - type = parse_type (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse type in let statement"); - add_error (std::move (error)); - - skip_after_semicolon (); - return nullptr; - } - } - - // parse expression to set variable to (optional) - std::unique_ptr expr = nullptr; - if (lexer.peek_token ()->get_id () == EQUAL) - { - // must have an expression - lexer.skip_token (); - - expr = parse_expr (); - if (expr == nullptr) - { - skip_after_semicolon (); - return nullptr; - } - } - - tl::optional> else_expr = tl::nullopt; - if (maybe_skip_token (ELSE)) - else_expr = parse_block_expr (); - - if (restrictions.consume_semi) - { - // `stmt` macro variables are parsed without a semicolon, but should be - // parsed as a full statement when interpolated. This should be handled - // by having the interpolated statement be distinguishable from normal - // tokens, e.g. by NT tokens. - if (restrictions.allow_close_after_expr_stmt) - maybe_skip_token (SEMICOLON); - else if (!skip_token (SEMICOLON)) - return nullptr; - } - - return std::unique_ptr ( - new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type), - std::move (else_expr), std::move (outer_attrs), locus)); -} - -// Parses a type path. -template -AST::TypePath -Parser::parse_type_path () -{ - bool has_opening_scope_resolution = false; - location_t locus = lexer.peek_token ()->get_locus (); - if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) - { - has_opening_scope_resolution = true; - lexer.skip_token (); - } - - // create segment vector - std::vector> segments; - - // parse required initial segment - std::unique_ptr initial_segment - = parse_type_path_segment (); - if (initial_segment == nullptr) - { - // skip after somewhere? - // don't necessarily throw error but yeah - return AST::TypePath::create_error (); - } - segments.push_back (std::move (initial_segment)); - - // parse optional segments (as long as scope resolution operator exists) - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == SCOPE_RESOLUTION) - { - // skip scope resolution operator - lexer.skip_token (); - - // parse the actual segment - it is an error if it doesn't exist now - std::unique_ptr segment - = parse_type_path_segment (); - if (segment == nullptr) - { - // skip after somewhere? - Error error (t->get_locus (), "could not parse type path segment"); - add_error (std::move (error)); - - return AST::TypePath::create_error (); - } - - segments.push_back (std::move (segment)); - - t = lexer.peek_token (); - } - - segments.shrink_to_fit (); - - return AST::TypePath (std::move (segments), locus, - has_opening_scope_resolution); -} - -template -tl::optional -Parser::parse_generic_arg () -{ - auto tok = lexer.peek_token (); - std::unique_ptr expr = nullptr; - - switch (tok->get_id ()) - { - case IDENTIFIER: - { - // This is a bit of a weird situation: With an identifier token, we - // could either have a valid type or a macro (FIXME: anything else?). So - // we need one bit of lookahead to differentiate if this is really - auto next_tok = lexer.peek_token (1); - if (next_tok->get_id () == LEFT_ANGLE - || next_tok->get_id () == SCOPE_RESOLUTION - || next_tok->get_id () == EXCLAM) - { - auto type = parse_type (); - if (type) - return AST::GenericArg::create_type (std::move (type)); - else - return tl::nullopt; - } - else if (next_tok->get_id () == COLON) - { - lexer.skip_token (); // skip ident - lexer.skip_token (); // skip colon - - auto tok = lexer.peek_token (); - std::vector> bounds - = parse_type_param_bounds (); - - auto type = std::unique_ptr ( - new AST::TraitObjectType (std::move (bounds), tok->get_locus (), - false)); - if (type) - return AST::GenericArg::create_type (std::move (type)); - else - return tl::nullopt; - } - lexer.skip_token (); - return AST::GenericArg::create_ambiguous (tok->get_str (), - tok->get_locus ()); - } - case LEFT_CURLY: - expr = parse_block_expr (); - break; - case MINUS: - case STRING_LITERAL: - case CHAR_LITERAL: - case INT_LITERAL: - case FLOAT_LITERAL: - case TRUE_LITERAL: - case FALSE_LITERAL: - expr = parse_literal_expr (); - break; - // FIXME: Because of this, error reporting is garbage for const generic - // parameter's default values - default: - { - auto type = parse_type (); - // FIXME: Find a better way to do this? - if (type) - return AST::GenericArg::create_type (std::move (type)); - else - return tl::nullopt; - } - } - - if (!expr) - return tl::nullopt; - - return AST::GenericArg::create_const (std::move (expr)); -} - -// Parses the generic arguments in each path segment. -template -AST::GenericArgs -Parser::parse_path_generic_args () -{ - if (lexer.peek_token ()->get_id () == LEFT_SHIFT) - lexer.split_current_token (LEFT_ANGLE, LEFT_ANGLE); - - if (!skip_token (LEFT_ANGLE)) - { - // skip after somewhere? - return AST::GenericArgs::create_empty (); - } - - // We need to parse all lifetimes, then parse types and const generics in - // any order. - - // try to parse lifetimes first - std::vector lifetime_args; - - const_TokenPtr t = lexer.peek_token (); - location_t locus = t->get_locus (); - while (!is_right_angle_tok (t->get_id ())) - { - auto lifetime = parse_lifetime (false); - if (!lifetime) - { - // not necessarily an error - break; - } - - lifetime_args.push_back (std::move (lifetime.value ())); - - // if next token isn't comma, then it must be end of list - if (lexer.peek_token ()->get_id () != COMMA) - { - break; - } - // skip comma - lexer.skip_token (); - - t = lexer.peek_token (); - } - - // try to parse types and const generics second - std::vector generic_args; - - // TODO: think of better control structure - t = lexer.peek_token (); - while (!is_right_angle_tok (t->get_id ())) - { - // FIXME: Is it fine to break if there is one binding? Can't there be - // bindings in between types? - - // ensure not binding being parsed as type accidently - if (t->get_id () == IDENTIFIER - && lexer.peek_token (1)->get_id () == EQUAL) - break; - - auto arg = parse_generic_arg (); - if (arg) - { - generic_args.emplace_back (std::move (arg.value ())); - } - - // FIXME: Do we need to break if we encounter an error? - - // if next token isn't comma, then it must be end of list - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip comma - lexer.skip_token (); - t = lexer.peek_token (); - } - - // try to parse bindings third - std::vector binding_args; - - // TODO: think of better control structure - t = lexer.peek_token (); - while (!is_right_angle_tok (t->get_id ())) - { - AST::GenericArgsBinding binding = parse_generic_args_binding (); - if (binding.is_error ()) - { - // not necessarily an error - break; - } - - binding_args.push_back (std::move (binding)); - - // if next token isn't comma, then it must be end of list - if (lexer.peek_token ()->get_id () != COMMA) - { - break; - } - // skip comma - lexer.skip_token (); - - t = lexer.peek_token (); - } - - // skip any trailing commas - if (lexer.peek_token ()->get_id () == COMMA) - lexer.skip_token (); - - if (!skip_generics_right_angle ()) - return AST::GenericArgs::create_empty (); - - lifetime_args.shrink_to_fit (); - generic_args.shrink_to_fit (); - binding_args.shrink_to_fit (); - - return AST::GenericArgs (std::move (lifetime_args), std::move (generic_args), - std::move (binding_args), locus); -} - -// Parses a binding in a generic args path segment. -template -AST::GenericArgsBinding -Parser::parse_generic_args_binding () -{ - const_TokenPtr ident_tok = lexer.peek_token (); - if (ident_tok->get_id () != IDENTIFIER) - { - // allow non error-inducing use - // skip somewhere? - return AST::GenericArgsBinding::create_error (); - } - lexer.skip_token (); - Identifier ident{ident_tok}; - - if (!skip_token (EQUAL)) - { - // skip after somewhere? - return AST::GenericArgsBinding::create_error (); - } - - // parse type (required) - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - // skip somewhere? - return AST::GenericArgsBinding::create_error (); - } - - return AST::GenericArgsBinding (std::move (ident), std::move (type), - ident_tok->get_locus ()); -} - -/* Parses a single type path segment (not including opening scope resolution, - * but includes any internal ones). Includes generic args or type path - * functions too. */ -template -std::unique_ptr -Parser::parse_type_path_segment () -{ - location_t locus = lexer.peek_token ()->get_locus (); - // parse ident segment part - auto ident_segment_res = parse_path_ident_segment (); - if (!ident_segment_res) - { - // not necessarily an error - return nullptr; - } - auto ident_segment = ident_segment_res.value (); - - /* lookahead to determine if variants exist - only consume scope resolution - * then */ - bool has_separating_scope_resolution = false; - const_TokenPtr next = lexer.peek_token (1); - if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION - && (next->get_id () == LEFT_ANGLE || next->get_id () == LEFT_PAREN)) - { - has_separating_scope_resolution = true; - lexer.skip_token (); - } - - // branch into variants on next token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_SHIFT: - case LEFT_ANGLE: - { - // parse generic args - AST::GenericArgs generic_args = parse_path_generic_args (); - - return std::unique_ptr ( - new AST::TypePathSegmentGeneric (std::move (ident_segment), - has_separating_scope_resolution, - std::move (generic_args), locus)); - } - case LEFT_PAREN: - { - // parse type path function - AST::TypePathFunction type_path_function - = parse_type_path_function (locus); - - if (type_path_function.is_error ()) - { - // skip after somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::TypePathSegmentFunction (std::move (ident_segment), - has_separating_scope_resolution, - std::move (type_path_function), - locus)); - } - default: - // neither of them - return std::unique_ptr ( - new AST::TypePathSegment (std::move (ident_segment), - has_separating_scope_resolution, locus)); - } - rust_unreachable (); -} - -// Parses a function call representation inside a type path. -template -AST::TypePathFunction -Parser::parse_type_path_function (location_t id_location) -{ - if (!skip_token (LEFT_PAREN)) - { - // skip somewhere? - return AST::TypePathFunction::create_error (); - } - - // parse function inputs - std::vector> inputs; - - while (lexer.peek_token ()->get_id () != RIGHT_PAREN) - { - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - /* this is an error as there should've been a ')' there if there - * wasn't a type */ - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse type in parameters of type path function"); - add_error (std::move (error)); - - // skip somewhere? - return AST::TypePathFunction::create_error (); - } - - inputs.push_back (std::move (type)); - - // skip commas, including trailing commas - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - } - - if (!skip_token (RIGHT_PAREN)) - { - // skip somewhere? - return AST::TypePathFunction::create_error (); - } - - // parse optional return type - std::unique_ptr return_type = parse_function_return_type (); - - inputs.shrink_to_fit (); - return AST::TypePathFunction (std::move (inputs), id_location, - std::move (return_type)); -} - -// Parses a path inside an expression that allows generic arguments. -template -AST::PathInExpression -Parser::parse_path_in_expression () -{ - location_t locus = UNKNOWN_LOCATION; - bool has_opening_scope_resolution = false; - if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) - { - has_opening_scope_resolution = true; - - locus = lexer.peek_token ()->get_locus (); - - lexer.skip_token (); - } - - // create segment vector - std::vector segments; - - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - } - - // parse required initial segment - AST::PathExprSegment initial_segment = parse_path_expr_segment (); - if (initial_segment.is_error ()) - { - // skip after somewhere? - // don't necessarily throw error but yeah - return AST::PathInExpression::create_error (); - } - segments.push_back (std::move (initial_segment)); - - // parse optional segments (as long as scope resolution operator exists) - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == SCOPE_RESOLUTION) - { - // skip scope resolution operator - lexer.skip_token (); - - // parse the actual segment - it is an error if it doesn't exist now - AST::PathExprSegment segment = parse_path_expr_segment (); - if (segment.is_error ()) - { - // skip after somewhere? - Error error (t->get_locus (), - "could not parse path expression segment"); - add_error (std::move (error)); - - return AST::PathInExpression::create_error (); - } - - segments.push_back (std::move (segment)); - - t = lexer.peek_token (); - } - - segments.shrink_to_fit (); - - return AST::PathInExpression (std::move (segments), {}, locus, - has_opening_scope_resolution); -} - -/* Parses a single path in expression path segment (including generic - * arguments). */ -template -AST::PathExprSegment -Parser::parse_path_expr_segment () -{ - location_t locus = lexer.peek_token ()->get_locus (); - // parse ident segment - auto ident_result = parse_path_ident_segment (); - if (!ident_result) - { - // not necessarily an error? - return AST::PathExprSegment::create_error (); - } - auto ident = ident_result.value (); - - // parse generic args (and turbofish), if they exist - /* use lookahead to determine if they actually exist (don't want to - * accidently parse over next ident segment) */ - if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION - && (lexer.peek_token (1)->get_id () == LEFT_ANGLE - || lexer.peek_token (1)->get_id () == LEFT_SHIFT)) - { - // skip scope resolution - lexer.skip_token (); - - // Let parse_path_generic_args split "<<" tokens - AST::GenericArgs generic_args = parse_path_generic_args (); - - return AST::PathExprSegment (std::move (ident), locus, - std::move (generic_args)); - } - - // return a generic parameter-less expr segment if not found - return AST::PathExprSegment (std::move (ident), locus); -} - -/* Parses a fully qualified path in expression (i.e. a pattern). FIXME does - * not parse outer attrs. */ -template -AST::QualifiedPathInExpression -Parser::parse_qualified_path_in_expression ( - location_t pratt_parsed_loc) -{ - /* Note: the Rust grammar is defined in such a way that it is impossible to - * determine whether a prospective qualified path is a - * QualifiedPathInExpression or QualifiedPathInType in all cases by the - * rules themselves (the only possible difference is a TypePathSegment with - * function, and lookahead to find this is too difficult). However, as this - * is a pattern and QualifiedPathInType is a type, I believe it that their - * construction will not be confused (due to rules regarding patterns vs - * types). - * As such, this function will not attempt to minimise errors created by - * their confusion. */ - - // parse the qualified path type (required) - AST::QualifiedPathType qual_path_type - = parse_qualified_path_type (pratt_parsed_loc); - if (qual_path_type.is_error ()) - { - // TODO: should this create a parse error? - return AST::QualifiedPathInExpression::create_error (); - } - location_t locus = qual_path_type.get_locus (); - - // parse path segments - std::vector segments; - - // parse initial required segment - if (!expect_token (SCOPE_RESOLUTION)) - { - // skip after somewhere? - - return AST::QualifiedPathInExpression::create_error (); - } - AST::PathExprSegment initial_segment = parse_path_expr_segment (); - if (initial_segment.is_error ()) - { - // skip after somewhere? - Error error (lexer.peek_token ()->get_locus (), - "required initial path expression segment in " - "qualified path in expression could not be parsed"); - add_error (std::move (error)); - - return AST::QualifiedPathInExpression::create_error (); - } - segments.push_back (std::move (initial_segment)); - - // parse optional segments (as long as scope resolution operator exists) - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == SCOPE_RESOLUTION) - { - // skip scope resolution operator - lexer.skip_token (); - - // parse the actual segment - it is an error if it doesn't exist now - AST::PathExprSegment segment = parse_path_expr_segment (); - if (segment.is_error ()) - { - // skip after somewhere? - Error error (t->get_locus (), - "could not parse path expression segment in qualified " - "path in expression"); - add_error (std::move (error)); - - return AST::QualifiedPathInExpression::create_error (); - } - - segments.push_back (std::move (segment)); - - t = lexer.peek_token (); - } - - segments.shrink_to_fit (); - - // FIXME: outer attr parsing - return AST::QualifiedPathInExpression (std::move (qual_path_type), - std::move (segments), {}, locus); -} - -// Parses the type syntactical construction at the start of a qualified path. -template -AST::QualifiedPathType -Parser::parse_qualified_path_type ( - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - /* TODO: should this actually be error? is there anywhere where this could - * be valid? */ - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - - if (lexer.peek_token ()->get_id () == LEFT_SHIFT) - lexer.split_current_token (LEFT_ANGLE, LEFT_ANGLE); - - // skip after somewhere? - if (!skip_token (LEFT_ANGLE)) - return AST::QualifiedPathType::create_error (); - } - - // parse type (required) - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse type in qualified path type"); - add_error (std::move (error)); - - // skip somewhere? - return AST::QualifiedPathType::create_error (); - } - - // parse optional as clause - AST::TypePath as_type_path = AST::TypePath::create_error (); - if (lexer.peek_token ()->get_id () == AS) - { - lexer.skip_token (); - - // parse type path, which is required now - as_type_path = parse_type_path (); - if (as_type_path.is_error ()) - { - Error error ( - lexer.peek_token ()->get_locus (), - "could not parse type path in as clause in qualified path type"); - add_error (std::move (error)); - - // skip somewhere? - return AST::QualifiedPathType::create_error (); - } - } - - /* NOTE: should actually be a right-angle token, so - * skip_generics_right_angle shouldn't be required */ - if (!skip_token (RIGHT_ANGLE)) - { - // skip after somewhere? - return AST::QualifiedPathType::create_error (); - } - - return AST::QualifiedPathType (std::move (type), locus, - std::move (as_type_path)); -} - -// Parses a fully qualified path in type (i.e. a type). -template -AST::QualifiedPathInType -Parser::parse_qualified_path_in_type () -{ - location_t locus = lexer.peek_token ()->get_locus (); - // parse the qualified path type (required) - AST::QualifiedPathType qual_path_type = parse_qualified_path_type (); - if (qual_path_type.is_error ()) - { - // TODO: should this create a parse error? - return AST::QualifiedPathInType::create_error (); - } - - // parse initial required segment - if (!expect_token (SCOPE_RESOLUTION)) - { - // skip after somewhere? - - return AST::QualifiedPathInType::create_error (); - } - std::unique_ptr initial_segment - = parse_type_path_segment (); - if (initial_segment == nullptr) - { - // skip after somewhere? - Error error (lexer.peek_token ()->get_locus (), - "required initial type path segment in qualified path in " - "type could not be parsed"); - add_error (std::move (error)); - - return AST::QualifiedPathInType::create_error (); - } - - // parse optional segments (as long as scope resolution operator exists) - std::vector> segments; - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == SCOPE_RESOLUTION) - { - // skip scope resolution operator - lexer.skip_token (); - - // parse the actual segment - it is an error if it doesn't exist now - std::unique_ptr segment - = parse_type_path_segment (); - if (segment == nullptr) - { - // skip after somewhere? - Error error ( - t->get_locus (), - "could not parse type path segment in qualified path in type"); - add_error (std::move (error)); - - return AST::QualifiedPathInType::create_error (); - } - - segments.push_back (std::move (segment)); - - t = lexer.peek_token (); - } - - segments.shrink_to_fit (); - - return AST::QualifiedPathInType (std::move (qual_path_type), - std::move (initial_segment), - std::move (segments), locus); -} - -// Parses a self param. Also handles self param not existing. -template -tl::expected, ParseSelfError> -Parser::parse_self_param () -{ - bool has_reference = false; - AST::Lifetime lifetime = AST::Lifetime::elided (); - - location_t locus = lexer.peek_token ()->get_locus (); - - // TODO: Feels off, find a better way to clearly express this - std::vector> ptrs - = {{ASTERISK, SELF} /* *self */, - {ASTERISK, CONST, SELF} /* *const self */, - {ASTERISK, MUT, SELF} /* *mut self */}; - - for (auto &s : ptrs) - { - size_t i = 0; - for (i = 0; i < s.size (); i++) - if (lexer.peek_token (i)->get_id () != s[i]) - break; - if (i == s.size ()) - { - rust_error_at (lexer.peek_token ()->get_locus (), - "cannot pass % by raw pointer"); - return tl::make_unexpected (ParseSelfError::SELF_PTR); - } - } - - // Trying to find those patterns: - // - // &'lifetime mut self - // &'lifetime self - // & mut self - // & self - // mut self - // self - // - // If not found, it is probably a function, exit and let function parsing - // handle it. - bool is_self = false; - for (size_t i = 0; i < 5; i++) - if (lexer.peek_token (i)->get_id () == SELF) - is_self = true; - - if (!is_self) - return tl::make_unexpected (ParseSelfError::NOT_SELF); - - // test if self is a reference parameter - if (lexer.peek_token ()->get_id () == AMP) - { - has_reference = true; - lexer.skip_token (); - - // now test whether it has a lifetime - if (lexer.peek_token ()->get_id () == LIFETIME) - { - // something went wrong somehow - if (auto parsed_lifetime = parse_lifetime (true)) - { - lifetime = parsed_lifetime.value (); - } - else - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse lifetime in self param"); - add_error (std::move (error)); - - // skip after somewhere? - return tl::make_unexpected (ParseSelfError::PARSING); - } - } - } - - // test for mut - bool has_mut = false; - if (lexer.peek_token ()->get_id () == MUT) - { - has_mut = true; - lexer.skip_token (); - } - - // skip self token - const_TokenPtr self_tok = lexer.peek_token (); - if (self_tok->get_id () != SELF) - { - // skip after somewhere? - return tl::make_unexpected (ParseSelfError::NOT_SELF); - } - lexer.skip_token (); - - // parse optional type - std::unique_ptr type = nullptr; - if (lexer.peek_token ()->get_id () == COLON) - { - lexer.skip_token (); - - // type is now required - type = parse_type (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse type in self param"); - add_error (std::move (error)); - - // skip after somewhere? - return tl::make_unexpected (ParseSelfError::PARSING); - } - } - - // ensure that cannot have both type and reference - if (type != nullptr && has_reference) - { - Error error ( - lexer.peek_token ()->get_locus (), - "cannot have both a reference and a type specified in a self param"); - add_error (std::move (error)); - - // skip after somewhere? - return tl::make_unexpected (ParseSelfError::PARSING); - } - - if (has_reference) - { - return std::make_unique (std::move (lifetime), has_mut, - locus); - } - else - { - // note that type may be nullptr here and that's fine - return std::make_unique (std::move (type), has_mut, - locus); - } -} - -/* Parses an expression or macro statement. */ -template -std::unique_ptr -Parser::parse_expr_stmt (AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - location_t locus = lexer.peek_token ()->get_locus (); - - std::unique_ptr expr; - - switch (lexer.peek_token ()->get_id ()) - { - case IDENTIFIER: - case CRATE: - case SUPER: - case SELF: - case SELF_ALIAS: - case DOLLAR_SIGN: - case SCOPE_RESOLUTION: - { - AST::PathInExpression path = parse_path_in_expression (); - std::unique_ptr null_denotation; - - if (lexer.peek_token ()->get_id () == EXCLAM) - { - std::unique_ptr invoc - = parse_macro_invocation_partial (std::move (path), - std::move (outer_attrs)); - - if (restrictions.consume_semi && maybe_skip_token (SEMICOLON)) - { - invoc->add_semicolon (); - // Macro invocation with semicolon. - return invoc; - } - - TokenId after_macro = lexer.peek_token ()->get_id (); - - if (restrictions.allow_close_after_expr_stmt - && (after_macro == RIGHT_PAREN || after_macro == RIGHT_CURLY - || after_macro == RIGHT_SQUARE)) - return invoc; - - if (invoc->get_invoc_data ().get_delim_tok_tree ().get_delim_type () - == AST::CURLY - && after_macro != DOT && after_macro != QUESTION_MARK) - { - rust_debug ("braced macro statement"); - return invoc; - } - - null_denotation = std::move (invoc); - } - else - { - null_denotation - = null_denotation_path (std::move (path), {}, restrictions); - } - - expr = left_denotations (std::move (null_denotation), LBP_LOWEST, - std::move (outer_attrs), restrictions); - break; - } - default: - restrictions.expr_can_be_stmt = true; - expr = parse_expr (std::move (outer_attrs), restrictions); - break; - } - - if (expr == nullptr) - { - // expr is required, error - Error error (lexer.peek_token ()->get_locus (), - "failed to parse expr in expr statement"); - add_error (std::move (error)); - - skip_after_semicolon (); - return nullptr; - } - - bool has_semi = false; - - if (restrictions.consume_semi) - { - if (maybe_skip_token (SEMICOLON)) - { - has_semi = true; - } - else if (expr->is_expr_without_block ()) - { - if (restrictions.allow_close_after_expr_stmt) - { - TokenId id = lexer.peek_token ()->get_id (); - if (id != RIGHT_PAREN && id != RIGHT_CURLY && id != RIGHT_SQUARE) - { - expect_token (SEMICOLON); - return nullptr; - } - } - else - { - expect_token (SEMICOLON); - return nullptr; - } - } - } - - return std::unique_ptr ( - new AST::ExprStmt (std::move (expr), locus, has_semi)); -} - -// Parses a block expression, including the curly braces at start and end. -template -std::unique_ptr -Parser::parse_block_expr ( - AST::AttrVec outer_attrs, tl::optional label, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - if (!skip_token (LEFT_CURLY)) - { - skip_after_end_block (); - return nullptr; - } - } - - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse statements and expression - std::vector> stmts; - std::unique_ptr expr = nullptr; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - ExprOrStmt expr_or_stmt = parse_stmt_or_expr (); - if (expr_or_stmt.is_error ()) - { - skip_after_end_block (); - return nullptr; - } - - t = lexer.peek_token (); - - if (expr_or_stmt.stmt != nullptr) - { - stmts.push_back (std::move (expr_or_stmt.stmt)); - } - else - { - // assign to expression and end parsing inside - expr = std::move (expr_or_stmt.expr); - break; - } - } - - location_t end_locus = t->get_locus (); - - if (!skip_token (RIGHT_CURLY)) - { - Error error (t->get_locus (), - "error may be from having an expression (as opposed to " - "statement) in the body of the function but not last"); - add_error (std::move (error)); - - skip_after_end_block (); - return nullptr; - } - - // grammar allows for empty block expressions - - stmts.shrink_to_fit (); - - return std::unique_ptr ( - new AST::BlockExpr (std::move (stmts), std::move (expr), - std::move (inner_attrs), std::move (outer_attrs), - std::move (label), locus, end_locus)); -} - -/* Parse an anonymous const expression. This can be a regular const expression - * or an underscore for deferred const inference */ -template -tl::expected -Parser::parse_anon_const () -{ - auto current = lexer.peek_token (); - auto locus = current->get_locus (); - - // Special case deferred inference constants - if (maybe_skip_token (UNDERSCORE)) - return AST::AnonConst (locus); - - auto expr = parse_expr (); - - if (!expr) - return tl::make_unexpected (AnonConstError::InvalidSizeExpr); - - return AST::AnonConst (std::move (expr), locus); -} - -/* Parse a "const block", a block preceded by the `const` keyword whose - * statements can be const evaluated and used in constant contexts */ -template -std::unique_ptr -Parser::parse_const_block_expr (AST::AttrVec outer_attrs, - location_t locus) -{ - auto block = parse_block_expr (); - - if (!block) - { - add_error (Error (locus, "failed to parse inner block in const block")); - skip_after_end_block (); - - return nullptr; - } - - auto block_locus = block->get_locus (); - - return std::make_unique (AST::AnonConst (std::move (block), - block_locus), - locus, std::move (outer_attrs)); -} - -/* Parses a "grouped" expression (expression in parentheses), used to control - * precedence. */ -template -std::unique_ptr -Parser::parse_grouped_expr (AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (LEFT_PAREN); - - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse required expr inside parentheses - std::unique_ptr expr_in_parens = parse_expr (); - if (expr_in_parens == nullptr) - { - // skip after somewhere? - // error? - return nullptr; - } - - if (!skip_token (RIGHT_PAREN)) - { - // skip after somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::GroupedExpr (std::move (expr_in_parens), std::move (inner_attrs), - std::move (outer_attrs), locus)); -} - -// Parses a closure expression (closure definition). -template -std::unique_ptr -Parser::parse_closure_expr (AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - // detect optional "move" - bool has_move = false; - if (lexer.peek_token ()->get_id () == MOVE) - { - lexer.skip_token (); - has_move = true; - } - - // handle parameter list - std::vector params; - - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case OR: - // skip token, no parameters - lexer.skip_token (); - break; - case PIPE: - // actually may have parameters - lexer.skip_token (); - t = lexer.peek_token (); - - while (t->get_id () != PIPE) - { - AST::ClosureParam param = parse_closure_param (); - if (param.is_error ()) - { - // TODO is this really an error? - Error error (t->get_locus (), "could not parse closure param"); - add_error (std::move (error)); - - break; - } - params.push_back (std::move (param)); - - if (lexer.peek_token ()->get_id () != COMMA) - { - lexer.skip_token (); - // not an error but means param list is done - break; - } - // skip comma - lexer.skip_token (); - - t = lexer.peek_token (); - } - params.shrink_to_fit (); - break; - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in closure expression - expected " - "%<|%> or %<||%>", - t->get_token_description ())); - - // skip somewhere? - return nullptr; - } - - // again branch based on next token - t = lexer.peek_token (); - if (t->get_id () == RETURN_TYPE) - { - // must be return type closure with block expr - - // skip "return type" token - lexer.skip_token (); - - // parse actual type, which is required - std::unique_ptr type = parse_type_no_bounds (); - if (type == nullptr) - { - // error - Error error (t->get_locus (), "failed to parse type for closure"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - // parse block expr, which is required - std::unique_ptr block = parse_block_expr (); - if (block == nullptr) - { - // error - Error error (lexer.peek_token ()->get_locus (), - "failed to parse block expr in closure"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ClosureExprInnerTyped (std::move (type), std::move (block), - std::move (params), locus, has_move, - std::move (outer_attrs))); - } - else - { - // must be expr-only closure - - // parse expr, which is required - std::unique_ptr expr = parse_expr (); - if (expr == nullptr) - { - Error error (t->get_locus (), - "failed to parse expression in closure"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ClosureExprInner (std::move (expr), std::move (params), locus, - has_move, std::move (outer_attrs))); - } -} - -// Parses a literal token (to literal expression). -template -std::unique_ptr -Parser::parse_literal_expr (AST::AttrVec outer_attrs) -{ - // TODO: change if literal representation in lexer changes - - std::string literal_value; - AST::Literal::LitType type = AST::Literal::STRING; - - // branch based on token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case CHAR_LITERAL: - type = AST::Literal::CHAR; - literal_value = t->get_str (); - lexer.skip_token (); - break; - case STRING_LITERAL: - type = AST::Literal::STRING; - literal_value = t->get_str (); - lexer.skip_token (); - break; - case BYTE_CHAR_LITERAL: - type = AST::Literal::BYTE; - literal_value = t->get_str (); - lexer.skip_token (); - break; - case BYTE_STRING_LITERAL: - type = AST::Literal::BYTE_STRING; - literal_value = t->get_str (); - lexer.skip_token (); - break; - case RAW_STRING_LITERAL: - type = AST::Literal::RAW_STRING; - literal_value = t->get_str (); - lexer.skip_token (); - break; - case INT_LITERAL: - type = AST::Literal::INT; - literal_value = t->get_str (); - lexer.skip_token (); - break; - case FLOAT_LITERAL: - type = AST::Literal::FLOAT; - literal_value = t->get_str (); - lexer.skip_token (); - break; - // case BOOL_LITERAL - // use true and false keywords rather than "bool literal" Rust terminology - case TRUE_LITERAL: - type = AST::Literal::BOOL; - literal_value = Values::Keywords::TRUE_LITERAL; - lexer.skip_token (); - break; - case FALSE_LITERAL: - type = AST::Literal::BOOL; - literal_value = Values::Keywords::FALSE_LITERAL; - lexer.skip_token (); - break; - default: - // error - cannot be a literal expr - add_error (Error (t->get_locus (), - "unexpected token %qs when parsing literal expression", - t->get_token_description ())); - - // skip? - return nullptr; - } - - // create literal based on stuff in switch - return std::unique_ptr ( - new AST::LiteralExpr (std::move (literal_value), std::move (type), - t->get_type_hint (), std::move (outer_attrs), - t->get_locus ())); -} - -template -std::unique_ptr -Parser::parse_box_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (BOX); - } - - ParseRestrictions restrictions; - restrictions.expr_can_be_null = false; - - std::unique_ptr expr = parse_expr (AST::AttrVec (), restrictions); - - return std::unique_ptr ( - new AST::BoxExpr (std::move (expr), std::move (outer_attrs), locus)); -} - -// Parses a return expression (including any expression to return). -template -std::unique_ptr -Parser::parse_return_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (RETURN_KW); - } - - // parse expression to return, if it exists - ParseRestrictions restrictions; - restrictions.expr_can_be_null = true; - std::unique_ptr returned_expr - = parse_expr (AST::AttrVec (), restrictions); - - return std::unique_ptr ( - new AST::ReturnExpr (std::move (returned_expr), std::move (outer_attrs), - locus)); -} - -// Parses a try expression. -template -std::unique_ptr -Parser::parse_try_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (TRY); - } - - std::unique_ptr block_expr = parse_block_expr (); - - if (!block_expr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse try block expression"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::TryExpr (std::move (block_expr), std::move (outer_attrs), locus)); -} - -/* Parses a break expression (including any label to break to AND any return - * expression). */ -template -std::unique_ptr -Parser::parse_break_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (BREAK); - } - - auto parsed_label = parse_lifetime (false); - auto label = (parsed_label) - ? tl::optional (parsed_label.value ()) - : tl::nullopt; - - // parse break return expression if it exists - ParseRestrictions restrictions; - restrictions.expr_can_be_null = true; - std::unique_ptr return_expr - = parse_expr (AST::AttrVec (), restrictions); - - return std::unique_ptr ( - new AST::BreakExpr (std::move (label), std::move (return_expr), - std::move (outer_attrs), locus)); -} - -// Parses a continue expression (including any label to continue from). -template -std::unique_ptr -Parser::parse_continue_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (CONTINUE); - } - - auto parsed_label = parse_lifetime (false); - auto label = (parsed_label) - ? tl::optional (parsed_label.value ()) - : tl::nullopt; - - return std::unique_ptr ( - new AST::ContinueExpr (std::move (label), std::move (outer_attrs), locus)); -} - -// Parses a loop label used in loop expressions. -template -tl::expected -Parser::parse_loop_label (const_TokenPtr tok) -{ - // parse lifetime - if doesn't exist, assume no label - if (tok->get_id () != LIFETIME) - { - // not necessarily an error - return tl::unexpected ( - ParseLoopLabelError::NOT_LOOP_LABEL); - } - /* FIXME: check for named lifetime requirement here? or check in semantic - * analysis phase? */ - AST::Lifetime label = lifetime_from_token (tok); - - if (!skip_token (COLON)) - { - // skip somewhere? - return tl::unexpected ( - ParseLoopLabelError::MISSING_COLON); - } - - return tl::expected ( - AST::LoopLabel (std::move (label), tok->get_locus ())); -} - -/* Parses an if expression of any kind, including with else, else if, else if - * let, and neither. Note that any outer attributes will be ignored because if - * expressions don't support them. */ -template -std::unique_ptr -Parser::parse_if_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - // TODO: make having outer attributes an error? - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - if (!skip_token (IF)) - { - skip_after_end_block (); - return nullptr; - } - } - - // detect accidental if let - if (lexer.peek_token ()->get_id () == LET) - { - Error error (lexer.peek_token ()->get_locus (), - "if let expression probably exists, but is being parsed " - "as an if expression. This may be a parser error"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - /* parse required condition expr - HACK to prevent struct expr from being - * parsed */ - ParseRestrictions no_struct_expr; - no_struct_expr.can_be_struct_expr = false; - std::unique_ptr condition = parse_expr ({}, no_struct_expr); - if (condition == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse condition expression in if expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - // parse required block expr - std::unique_ptr if_body = parse_block_expr (); - if (if_body == nullptr) - return nullptr; - - // branch to parse end or else (and then else, else if, or else if let) - if (lexer.peek_token ()->get_id () != ELSE) - { - // single selection - end of if expression - return std::unique_ptr ( - new AST::IfExpr (std::move (condition), std::move (if_body), - std::move (outer_attrs), locus)); - } - else - { - // double or multiple selection - branch on end, else if, or else if let - - // skip "else" - lexer.skip_token (); - - // branch on whether next token is '{' or 'if' - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_CURLY: - { - // double selection - else - // parse else block expr (required) - std::unique_ptr else_body = parse_block_expr (); - if (else_body == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse else body block expression in " - "if expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::IfExprConseqElse (std::move (condition), - std::move (if_body), - std::move (else_body), - std::move (outer_attrs), locus)); - } - case IF: - { - // multiple selection - else if or else if let - // branch on whether next token is 'let' or not - if (lexer.peek_token (1)->get_id () == LET) - { - // parse if let expr (required) - std::unique_ptr if_let_expr - = parse_if_let_expr (); - if (if_let_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse (else) if let expression " - "after if expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::IfExprConseqElse (std::move (condition), - std::move (if_body), - std::move (if_let_expr), - std::move (outer_attrs), locus)); - } - else - { - // parse if expr (required) - std::unique_ptr if_expr = parse_if_expr (); - if (if_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse (else) if expression after " - "if expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::IfExprConseqElse (std::move (condition), - std::move (if_body), - std::move (if_expr), - std::move (outer_attrs), locus)); - } - } - default: - // error - invalid token - add_error (Error (t->get_locus (), - "unexpected token %qs after else in if expression", - t->get_token_description ())); - - // skip somewhere? - return nullptr; - } - } -} - -/* Parses an if let expression of any kind, including with else, else if, else - * if let, and none. Note that any outer attributes will be ignored as if let - * expressions don't support them. */ -template -std::unique_ptr -Parser::parse_if_let_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - // TODO: make having outer attributes an error? - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - if (!skip_token (IF)) - { - skip_after_end_block (); - return nullptr; - } - } - - // detect accidental if expr parsed as if let expr - if (lexer.peek_token ()->get_id () != LET) - { - Error error (lexer.peek_token ()->get_locus (), - "if expression probably exists, but is being parsed as an " - "if let expression. This may be a parser error"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - lexer.skip_token (); - - // parse match arm patterns (which are required) - std::vector> match_arm_patterns - = parse_match_arm_patterns (EQUAL); - if (match_arm_patterns.empty ()) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse any match arm patterns in if let expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - if (!skip_token (EQUAL)) - { - // skip somewhere? - return nullptr; - } - - // parse expression (required) - HACK to prevent struct expr being parsed - ParseRestrictions no_struct_expr; - no_struct_expr.can_be_struct_expr = false; - std::unique_ptr scrutinee_expr = parse_expr ({}, no_struct_expr); - if (scrutinee_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse scrutinee expression in if let expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - /* TODO: check for expression not being a struct expression or lazy boolean - * expression here? or actually probably in semantic analysis. */ - - // parse block expression (required) - std::unique_ptr if_let_body = parse_block_expr (); - if (if_let_body == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse if let body block expression in if let expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - // branch to parse end or else (and then else, else if, or else if let) - if (lexer.peek_token ()->get_id () != ELSE) - { - // single selection - end of if let expression - return std::unique_ptr ( - new AST::IfLetExpr (std::move (match_arm_patterns), - std::move (scrutinee_expr), std::move (if_let_body), - std::move (outer_attrs), locus)); - } - else - { - // double or multiple selection - branch on end, else if, or else if let - - // skip "else" - lexer.skip_token (); - - // branch on whether next token is '{' or 'if' - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LEFT_CURLY: - { - // double selection - else - // parse else block expr (required) - std::unique_ptr else_body = parse_block_expr (); - if (else_body == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse else body block expression in " - "if let expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::IfLetExprConseqElse (std::move (match_arm_patterns), - std::move (scrutinee_expr), - std::move (if_let_body), - std::move (else_body), - std::move (outer_attrs), locus)); - } - case IF: - { - // multiple selection - else if or else if let - // branch on whether next token is 'let' or not - if (lexer.peek_token (1)->get_id () == LET) - { - // parse if let expr (required) - std::unique_ptr if_let_expr - = parse_if_let_expr (); - if (if_let_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse (else) if let expression " - "after if let expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::IfLetExprConseqElse ( - std::move (match_arm_patterns), std::move (scrutinee_expr), - std::move (if_let_body), std::move (if_let_expr), - std::move (outer_attrs), locus)); - } - else - { - // parse if expr (required) - std::unique_ptr if_expr = parse_if_expr (); - if (if_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse (else) if expression after " - "if let expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::IfLetExprConseqElse ( - std::move (match_arm_patterns), std::move (scrutinee_expr), - std::move (if_let_body), std::move (if_expr), - std::move (outer_attrs), locus)); - } - } - default: - // error - invalid token - add_error ( - Error (t->get_locus (), - "unexpected token %qs after else in if let expression", - t->get_token_description ())); - - // skip somewhere? - return nullptr; - } - } -} - -/* TODO: possibly decide on different method of handling label (i.e. not - * parameter) */ - -/* Parses a "loop" infinite loop expression. Label is not parsed and should be - * parsed via parse_labelled_loop_expr, which would call this. */ -template -std::unique_ptr -Parser::parse_loop_expr (AST::AttrVec outer_attrs, - tl::optional label, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - if (label) - locus = label->get_locus (); - else - locus = lexer.peek_token ()->get_locus (); - - if (!skip_token (LOOP)) - { - skip_after_end_block (); - return nullptr; - } - } - else - { - if (label) - locus = label->get_locus (); - } - - // parse loop body, which is required - std::unique_ptr loop_body = parse_block_expr (); - if (loop_body == nullptr) - return nullptr; - - return std::unique_ptr ( - new AST::LoopExpr (std::move (loop_body), locus, std::move (label), - std::move (outer_attrs))); -} - -/* Parses a "while" loop expression. Label is not parsed and should be parsed - * via parse_labelled_loop_expr, which would call this. */ -template -std::unique_ptr -Parser::parse_while_loop_expr ( - AST::AttrVec outer_attrs, tl::optional label, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - if (label) - locus = label->get_locus (); - else - locus = lexer.peek_token ()->get_locus (); - - if (!skip_token (WHILE)) - { - skip_after_end_block (); - return nullptr; - } - } - else - { - if (label) - locus = label->get_locus (); - } - - // ensure it isn't a while let loop - if (lexer.peek_token ()->get_id () == LET) - { - Error error (lexer.peek_token ()->get_locus (), - "appears to be while let loop but is being parsed by " - "while loop - this may be a compiler issue"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - // parse loop predicate (required) with HACK to prevent struct expr parsing - ParseRestrictions no_struct_expr; - no_struct_expr.can_be_struct_expr = false; - std::unique_ptr predicate = parse_expr ({}, no_struct_expr); - if (predicate == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse predicate expression in while loop"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - /* TODO: check that it isn't struct expression here? actually, probably in - * semantic analysis */ - - // parse loop body (required) - std::unique_ptr body = parse_block_expr (); - if (body == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse loop body block expression in while loop"); - add_error (std::move (error)); - - // skip somewhere - return nullptr; - } - - return std::unique_ptr ( - new AST::WhileLoopExpr (std::move (predicate), std::move (body), locus, - std::move (label), std::move (outer_attrs))); -} - -/* Parses a "while let" loop expression. Label is not parsed and should be - * parsed via parse_labelled_loop_expr, which would call this. */ -template -std::unique_ptr -Parser::parse_while_let_loop_expr ( - AST::AttrVec outer_attrs, tl::optional label) -{ - location_t locus = UNKNOWN_LOCATION; - if (label) - locus = label->get_locus (); - else - locus = lexer.peek_token ()->get_locus (); - maybe_skip_token (WHILE); - - /* check for possible accidental recognition of a while loop as a while let - * loop */ - if (lexer.peek_token ()->get_id () != LET) - { - Error error (lexer.peek_token ()->get_locus (), - "appears to be a while loop but is being parsed by " - "while let loop - this may be a compiler issue"); - add_error (std::move (error)); - - // skip somewhere - return nullptr; - } - // as this token is definitely let now, save the computation of comparison - lexer.skip_token (); - - // parse predicate patterns - std::vector> predicate_patterns - = parse_match_arm_patterns (EQUAL); - // ensure that there is at least 1 pattern - if (predicate_patterns.empty ()) - { - Error error (lexer.peek_token ()->get_locus (), - "should be at least 1 pattern"); - add_error (std::move (error)); - return nullptr; - } - - if (!skip_token (EQUAL)) - { - // skip somewhere? - return nullptr; - } - - /* parse predicate expression, which is required (and HACK to prevent struct - * expr) */ - ParseRestrictions no_struct_expr; - no_struct_expr.can_be_struct_expr = false; - std::unique_ptr predicate_expr = parse_expr ({}, no_struct_expr); - if (predicate_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse predicate expression in while let loop"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - /* TODO: ensure that struct expression is not parsed? Actually, probably in - * semantic analysis. */ - - // parse loop body, which is required - std::unique_ptr body = parse_block_expr (); - if (body == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse block expr (loop body) of while let loop"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr (new AST::WhileLetLoopExpr ( - std::move (predicate_patterns), std::move (predicate_expr), - std::move (body), locus, std::move (label), std::move (outer_attrs))); -} - -/* Parses a "for" iterative loop. Label is not parsed and should be parsed via - * parse_labelled_loop_expr, which would call this. */ -template -std::unique_ptr -Parser::parse_for_loop_expr ( - AST::AttrVec outer_attrs, tl::optional label) -{ - location_t locus = UNKNOWN_LOCATION; - if (label) - locus = label->get_locus (); - else - locus = lexer.peek_token ()->get_locus (); - maybe_skip_token (FOR); - - // parse pattern, which is required - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse iterator pattern in for loop"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - if (!skip_token (IN)) - { - // skip somewhere? - return nullptr; - } - - /* parse iterator expression, which is required - also HACK to prevent - * struct expr */ - ParseRestrictions no_struct_expr; - no_struct_expr.can_be_struct_expr = false; - std::unique_ptr expr = parse_expr ({}, no_struct_expr); - if (expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse iterator expression in for loop"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - // TODO: check to ensure this isn't struct expr? Or in semantic analysis. - - // parse loop body, which is required - std::unique_ptr body = parse_block_expr (); - if (body == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse loop body block expression in for loop"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ForLoopExpr (std::move (pattern), std::move (expr), - std::move (body), locus, std::move (label), - std::move (outer_attrs))); -} - -// Parses a loop expression with label (any kind of loop - disambiguates). -template -std::unique_ptr -Parser::parse_labelled_loop_expr (const_TokenPtr tok, - AST::AttrVec outer_attrs) -{ - /* TODO: decide whether it should not work if there is no label, or parse it - * with no label at the moment, I will make it not work with no label - * because that's the implication. */ - - if (tok->get_id () != LIFETIME) - { - Error error (tok->get_locus (), - "expected lifetime in labelled loop expr (to parse loop " - "label) - found %qs", - tok->get_token_description ()); - add_error (std::move (error)); - - // skip? - return nullptr; - } - - // parse loop label (required) - // TODO: Convert this return type to tl::expected instead of tl::optional - auto parsed_label = parse_loop_label (tok); - if (!parsed_label) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse loop label in labelled loop expr"); - add_error (std::move (error)); - - // skip? - return nullptr; - } - - auto label = parsed_label - ? tl::optional (parsed_label.value ()) - : tl::nullopt; - - // branch on next token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case LOOP: - return parse_loop_expr (std::move (outer_attrs), std::move (label)); - case FOR: - return parse_for_loop_expr (std::move (outer_attrs), std::move (label)); - case WHILE: - // further disambiguate into while vs while let - if (lexer.peek_token (1)->get_id () == LET) - { - return parse_while_let_loop_expr (std::move (outer_attrs), - std::move (label)); - } - else - { - return parse_while_loop_expr (std::move (outer_attrs), - std::move (label)); - } - case LEFT_CURLY: - return parse_block_expr (std::move (outer_attrs), std::move (label)); - default: - // error - add_error (Error (t->get_locus (), - "unexpected token %qs when parsing labelled loop", - t->get_token_description ())); - - // skip? - return nullptr; - } -} - -// Parses a match expression. -template -std::unique_ptr -Parser::parse_match_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (MATCH_KW); - } - - /* parse scrutinee expression, which is required (and HACK to prevent struct - * expr) */ - ParseRestrictions no_struct_expr; - no_struct_expr.can_be_struct_expr = false; - std::unique_ptr scrutinee = parse_expr ({}, no_struct_expr); - if (scrutinee == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse scrutinee expression in match expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - /* TODO: check for scrutinee expr not being struct expr? or do so in - * semantic analysis */ - - if (!skip_token (LEFT_CURLY)) - { - // skip somewhere? - return nullptr; - } - - // parse inner attributes (if they exist) - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse match arms (if they exist) - // std::vector > match_arms; - std::vector match_arms; - - // parse match cases - while (lexer.peek_token ()->get_id () != RIGHT_CURLY) - { - // parse match arm itself, which is required - AST::MatchArm arm = parse_match_arm (); - if (arm.is_error ()) - { - // TODO is this worth throwing everything away? - Error error (lexer.peek_token ()->get_locus (), - "failed to parse match arm in match arms"); - add_error (std::move (error)); - - return nullptr; - } - - if (!skip_token (MATCH_ARROW)) - { - // skip after somewhere? - // TODO is returning here a good idea? or is break better? - return nullptr; - } - - ParseRestrictions restrictions; - restrictions.expr_can_be_stmt = true; - - std::unique_ptr expr = parse_expr ({}, restrictions); - - if (expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse expr in match arm in match expr"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - bool is_expr_without_block = expr->is_expr_without_block (); - - match_arms.push_back (AST::MatchCase (std::move (arm), std::move (expr))); - - // handle comma presence - if (lexer.peek_token ()->get_id () != COMMA) - { - if (!is_expr_without_block) - { - // allowed even if not final case - continue; - } - else if (is_expr_without_block - && lexer.peek_token ()->get_id () != RIGHT_CURLY) - { - // not allowed if not final case - Error error (lexer.peek_token ()->get_locus (), - "exprwithoutblock requires comma after match case " - "expression in match arm (if not final case)"); - add_error (std::move (error)); - - return nullptr; - } - else - { - // otherwise, must be final case, so fine - break; - } - } - lexer.skip_token (); - } - - if (!skip_token (RIGHT_CURLY)) - { - // skip somewhere? - return nullptr; - } - - match_arms.shrink_to_fit (); - - return std::unique_ptr ( - new AST::MatchExpr (std::move (scrutinee), std::move (match_arms), - std::move (inner_attrs), std::move (outer_attrs), - locus)); -} - -// Parses the "pattern" part of the match arm (the 'case x:' equivalent). -template -AST::MatchArm -Parser::parse_match_arm () -{ - // parse optional outer attributes - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // DEBUG - rust_debug ("about to start parsing match arm patterns"); - - // break early if find right curly - if (lexer.peek_token ()->get_id () == RIGHT_CURLY) - { - // not an error - return AST::MatchArm::create_error (); - } - - // parse match arm patterns - at least 1 is required - std::vector> match_arm_patterns - = parse_match_arm_patterns (RIGHT_CURLY); - if (match_arm_patterns.empty ()) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse any patterns in match arm"); - add_error (std::move (error)); - - // skip somewhere? - return AST::MatchArm::create_error (); - } - - // DEBUG - rust_debug ("successfully parsed match arm patterns"); - - // parse match arm guard expr if it exists - std::unique_ptr guard_expr = nullptr; - if (lexer.peek_token ()->get_id () == IF) - { - lexer.skip_token (); - - guard_expr = parse_expr (); - if (guard_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse guard expression in match arm"); - add_error (std::move (error)); - - // skip somewhere? - return AST::MatchArm::create_error (); - } - } - - // DEBUG - rust_debug ("successfully parsed match arm"); - - return AST::MatchArm (std::move (match_arm_patterns), - lexer.peek_token ()->get_locus (), - std::move (guard_expr), std::move (outer_attrs)); -} - -/* Parses the patterns used in a match arm. End token id is the id of the - * token that would exist after the patterns are done (e.g. '}' for match - * expr, '=' for if let and while let). */ -template -std::vector> -Parser::parse_match_arm_patterns (TokenId end_token_id) -{ - // skip optional leading '|' - if (lexer.peek_token ()->get_id () == PIPE) - lexer.skip_token (); - /* TODO: do I even need to store the result of this? can't be used. - * If semantically different, I need a wrapped "match arm patterns" object - * for this. */ - - std::vector> patterns; - - // quick break out if end_token_id - if (lexer.peek_token ()->get_id () == end_token_id) - return patterns; - - // parse required pattern - if doesn't exist, return empty - std::unique_ptr initial_pattern = parse_pattern (); - if (initial_pattern == nullptr) - { - // FIXME: should this be an error? - return patterns; - } - patterns.push_back (std::move (initial_pattern)); - - // DEBUG - rust_debug ("successfully parsed initial match arm pattern"); - - // parse new patterns as long as next char is '|' - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == PIPE) - { - // skip pipe token - lexer.skip_token (); - - // break if hit end token id - if (lexer.peek_token ()->get_id () == end_token_id) - break; - - // parse pattern - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - // this is an error - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in match arm patterns"); - add_error (std::move (error)); - - // skip somewhere? - return {}; - } - - patterns.push_back (std::move (pattern)); - - t = lexer.peek_token (); - } - - patterns.shrink_to_fit (); - - return patterns; -} - -// Parses an async block expression. -template -std::unique_ptr -Parser::parse_async_block_expr (AST::AttrVec outer_attrs) -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (ASYNC); - - // detect optional move token - bool has_move = false; - if (lexer.peek_token ()->get_id () == MOVE) - { - lexer.skip_token (); - has_move = true; - } - - // parse block expression (required) - std::unique_ptr block_expr = parse_block_expr (); - if (block_expr == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse block expression of async block expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::AsyncBlockExpr (std::move (block_expr), has_move, - std::move (outer_attrs), locus)); -} - -// Parses an unsafe block expression. -template -std::unique_ptr -Parser::parse_unsafe_block_expr ( - AST::AttrVec outer_attrs, location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (UNSAFE); - } - - // parse block expression (required) - std::unique_ptr block_expr = parse_block_expr (); - if (block_expr == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse block expression of unsafe block expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::UnsafeBlockExpr (std::move (block_expr), std::move (outer_attrs), - locus)); -} - -// Parses an array definition expression. -template -std::unique_ptr -Parser::parse_array_expr (AST::AttrVec outer_attrs, - location_t pratt_parsed_loc) -{ - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (LEFT_SQUARE); - } - - // parse optional inner attributes - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // parse the "array elements" section, which is optional - if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) - { - // no array elements - lexer.skip_token (); - - std::vector> exprs; - auto array_elems - = std::make_unique (std::move (exprs), locus); - return std::make_unique (std::move (array_elems), - std::move (inner_attrs), - std::move (outer_attrs), locus); - } - else - { - // should have array elements - // parse initial expression, which is required for either - std::unique_ptr initial_expr = parse_expr (); - if (initial_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse expression in array expression " - "(even though arrayelems seems to be present)"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - if (lexer.peek_token ()->get_id () == SEMICOLON) - { - // copy array elems - lexer.skip_token (); - - // parse copy amount expression (required) - std::unique_ptr copy_amount = parse_expr (); - if (copy_amount == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "could not parse copy amount expression in array " - "expression (arrayelems)"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - skip_token (RIGHT_SQUARE); - - std::unique_ptr copied_array_elems ( - new AST::ArrayElemsCopied (std::move (initial_expr), - std::move (copy_amount), locus)); - return std::unique_ptr ( - new AST::ArrayExpr (std::move (copied_array_elems), - std::move (inner_attrs), - std::move (outer_attrs), locus)); - } - else if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) - { - // single-element array expression - std::vector> exprs; - exprs.reserve (1); - exprs.push_back (std::move (initial_expr)); - exprs.shrink_to_fit (); - - skip_token (RIGHT_SQUARE); - - std::unique_ptr array_elems ( - new AST::ArrayElemsValues (std::move (exprs), locus)); - return std::unique_ptr ( - new AST::ArrayExpr (std::move (array_elems), - std::move (inner_attrs), - std::move (outer_attrs), locus)); - } - else if (lexer.peek_token ()->get_id () == COMMA) - { - // multi-element array expression (or trailing comma) - std::vector> exprs; - exprs.push_back (std::move (initial_expr)); - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - lexer.skip_token (); - - // quick break if right square bracket - if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) - break; - - // parse expression (required) - std::unique_ptr expr = parse_expr (); - if (expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse element in array expression"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - exprs.push_back (std::move (expr)); - - t = lexer.peek_token (); - } - - skip_token (RIGHT_SQUARE); - - exprs.shrink_to_fit (); - - std::unique_ptr array_elems ( - new AST::ArrayElemsValues (std::move (exprs), locus)); - return std::unique_ptr ( - new AST::ArrayExpr (std::move (array_elems), - std::move (inner_attrs), - std::move (outer_attrs), locus)); - } - else - { - // error - Error error (lexer.peek_token ()->get_locus (), - "unexpected token %qs in array expression (arrayelems)", - lexer.peek_token ()->get_token_description ()); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - } -} - -// Parses a single parameter used in a closure definition. -template -AST::ClosureParam -Parser::parse_closure_param () -{ - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // parse pattern (which is required) - std::unique_ptr pattern = parse_pattern_no_alt (); - if (pattern == nullptr) - { - // not necessarily an error - return AST::ClosureParam::create_error (); - } - - // parse optional type of param - std::unique_ptr type = nullptr; - if (lexer.peek_token ()->get_id () == COLON) - { - lexer.skip_token (); - - // parse type, which is now required - type = parse_type (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse type in closure parameter"); - add_error (std::move (error)); - - // skip somewhere? - return AST::ClosureParam::create_error (); - } - } - - location_t loc = pattern->get_locus (); - return AST::ClosureParam (std::move (pattern), loc, std::move (type), - std::move (outer_attrs)); -} - -// Parses a grouped or tuple expression (disambiguates). -template -std::unique_ptr -Parser::parse_grouped_or_tuple_expr ( - AST::AttrVec outer_attrs, location_t pratt_parsed_loc) -{ - // adjustment to allow Pratt parsing to reuse function without copy-paste - location_t locus = pratt_parsed_loc; - if (locus == UNKNOWN_LOCATION) - { - locus = lexer.peek_token ()->get_locus (); - skip_token (LEFT_PAREN); - } - - // parse optional inner attributes - AST::AttrVec inner_attrs = parse_inner_attributes (); - - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - { - // must be empty tuple - lexer.skip_token (); - - // create tuple with empty tuple elems - return std::unique_ptr ( - new AST::TupleExpr (std::vector> (), - std::move (inner_attrs), std::move (outer_attrs), - locus)); - } - - // parse first expression (required) - std::unique_ptr first_expr = parse_expr (); - if (first_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse expression in grouped or tuple expression"); - add_error (std::move (error)); - - // skip after somewhere? - return nullptr; - } - - // detect whether grouped expression with right parentheses as next token - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - { - // must be grouped expr - lexer.skip_token (); - - // create grouped expr - return std::unique_ptr ( - new AST::GroupedExpr (std::move (first_expr), std::move (inner_attrs), - std::move (outer_attrs), locus)); - } - else if (lexer.peek_token ()->get_id () == COMMA) - { - // tuple expr - std::vector> exprs; - exprs.push_back (std::move (first_expr)); - - // parse potential other tuple exprs - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - lexer.skip_token (); - - // break out if right paren - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - break; - - // parse expr, which is now required - std::unique_ptr expr = parse_expr (); - if (expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse expr in tuple expr"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - exprs.push_back (std::move (expr)); - - t = lexer.peek_token (); - } - - // skip right paren - skip_token (RIGHT_PAREN); - - return std::unique_ptr ( - new AST::TupleExpr (std::move (exprs), std::move (inner_attrs), - std::move (outer_attrs), locus)); - } - else - { - // error - const_TokenPtr t = lexer.peek_token (); - Error error (t->get_locus (), - "unexpected token %qs in grouped or tuple expression " - "(parenthesised expression) - expected %<)%> for grouped " - "expr and %<,%> for tuple expr", - t->get_token_description ()); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } -} - -// Parses a type (will further disambiguate any type). -template -std::unique_ptr -Parser::parse_type (bool save_errors) -{ - /* rules for all types: - * NeverType: '!' - * SliceType: '[' Type ']' - * InferredType: '_' - * MacroInvocation: SimplePath '!' DelimTokenTree - * ParenthesisedType: '(' Type ')' - * ImplTraitType: 'impl' TypeParamBounds - * TypeParamBounds (not type) TypeParamBound ( '+' TypeParamBound )* '+'? - * TypeParamBound Lifetime | TraitBound - * ImplTraitTypeOneBound: 'impl' TraitBound - * TraitObjectType: 'dyn'? TypeParamBounds - * TraitObjectTypeOneBound: 'dyn'? TraitBound - * TraitBound '?'? ForLifetimes? TypePath | '(' '?'? - * ForLifetimes? TypePath ')' BareFunctionType: ForLifetimes? - * FunctionQualifiers 'fn' etc. ForLifetimes (not type) 'for' '<' - * LifetimeParams '>' FunctionQualifiers ( 'async' | 'const' )? - * 'unsafe'? - * ('extern' abi?)? QualifiedPathInType: '<' Type ( 'as' TypePath )? '>' - * ( - * '::' TypePathSegment )+ TypePath: '::'? TypePathSegment ( - * '::' TypePathSegment)* ArrayType: '[' Type ';' Expr ']' - * ReferenceType: '&' Lifetime? 'mut'? TypeNoBounds - * RawPointerType: '*' ( 'mut' | 'const' ) TypeNoBounds - * TupleType: '(' Type etc. - regular tuple stuff. Also - * regular tuple vs parenthesised precedence - * - * Disambiguate between macro and type path via type path being parsed, and - * then if '!' found, convert type path to simple path for macro. Usual - * disambiguation for tuple vs parenthesised. For ImplTraitType and - * TraitObjectType individual disambiguations, they seem more like "special - * cases", so probably just try to parse the more general ImplTraitType or - * TraitObjectType and return OneBound versions if they satisfy those - * criteria. */ - - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case EXCLAM: - // never type - can't be macro as no path beforehand - lexer.skip_token (); - return std::unique_ptr ( - new AST::NeverType (t->get_locus ())); - case LEFT_SQUARE: - // slice type or array type - requires further disambiguation - return parse_slice_or_array_type (); - case LEFT_SHIFT: - case LEFT_ANGLE: - { - // qualified path in type - AST::QualifiedPathInType path = parse_qualified_path_in_type (); - if (path.is_error ()) - { - if (save_errors) - { - Error error (t->get_locus (), - "failed to parse qualified path in type"); - add_error (std::move (error)); - } - - return nullptr; - } - return std::unique_ptr ( - new AST::QualifiedPathInType (std::move (path))); - } - case UNDERSCORE: - // inferred type - lexer.skip_token (); - return std::unique_ptr ( - new AST::InferredType (t->get_locus ())); - case ASTERISK: - // raw pointer type - return parse_raw_pointer_type (); - case AMP: // does this also include AMP_AMP? - case LOGICAL_AND: - // reference type - return parse_reference_type (); - case LIFETIME: - { - /* probably a lifetime bound, so probably type param bounds in - * TraitObjectType */ - std::vector> bounds - = parse_type_param_bounds (); - - return std::unique_ptr ( - new AST::TraitObjectType (std::move (bounds), t->get_locus (), - false)); - } - case IDENTIFIER: - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case DOLLAR_SIGN: - case SCOPE_RESOLUTION: - { - // macro invocation or type path - requires further disambiguation. - /* for parsing path component of each rule, perhaps parse it as a - * typepath and attempt conversion to simplepath if a trailing '!' is - * found */ - /* Type path also includes TraitObjectTypeOneBound BUT if it starts - * with it, it is exactly the same as a TypePath syntactically, so - * this is a syntactical ambiguity. As such, the parser will parse it - * as a TypePath. This, however, does not prevent TraitObjectType from - * starting with a typepath. */ - - // parse path as type path - AST::TypePath path = parse_type_path (); - if (path.is_error ()) - { - if (save_errors) - { - Error error (t->get_locus (), - "failed to parse path as first component of type"); - add_error (std::move (error)); - } - - return nullptr; - } - location_t locus = path.get_locus (); - - // branch on next token - t = lexer.peek_token (); - switch (t->get_id ()) - { - case EXCLAM: - { - // macro invocation - // convert to simple path - AST::SimplePath macro_path = path.as_simple_path (); - if (macro_path.is_empty ()) - { - if (save_errors) - { - Error error (t->get_locus (), - "failed to parse simple path in macro " - "invocation (for type)"); - add_error (std::move (error)); - } - - return nullptr; - } - - lexer.skip_token (); - - auto tok_tree = parse_delim_token_tree (); - if (!tok_tree) - return nullptr; - - return AST::MacroInvocation::Regular ( - AST::MacroInvocData (std::move (macro_path), - std::move (tok_tree.value ())), - {}, locus); - } - case PLUS: - { - // type param bounds - std::vector> bounds; - - // convert type path to trait bound - std::unique_ptr path_bound ( - new AST::TraitBound (std::move (path), locus, false, false)); - bounds.push_back (std::move (path_bound)); - - /* parse rest of bounds - FIXME: better way to find when to stop - * parsing */ - while (t->get_id () == PLUS) - { - lexer.skip_token (); - - // parse bound if it exists - if not, assume end of sequence - std::unique_ptr bound - = parse_type_param_bound (); - if (bound == nullptr) - { - break; - } - bounds.push_back (std::move (bound)); - - t = lexer.peek_token (); - } - - return std::unique_ptr ( - new AST::TraitObjectType (std::move (bounds), locus, false)); - } - default: - // assume that this is a type path and not an error - return std::unique_ptr ( - new AST::TypePath (std::move (path))); - } - } - case LEFT_PAREN: - /* tuple type or parenthesised type - requires further disambiguation - * (the usual). ok apparently can be a parenthesised TraitBound too, so - * could be TraitObjectTypeOneBound or TraitObjectType */ - return parse_paren_prefixed_type (); - case FOR: - // TraitObjectTypeOneBound or BareFunctionType - return parse_for_prefixed_type (); - case ASYNC: - case CONST: - case UNSAFE: - case EXTERN_KW: - case FN_KW: - // bare function type (with no for lifetimes) - return parse_bare_function_type (std::vector ()); - case IMPL: - lexer.skip_token (); - if (lexer.peek_token ()->get_id () == LIFETIME) - { - /* cannot be one bound because lifetime prevents it from being - * traitbound */ - std::vector> bounds - = parse_type_param_bounds (); - - return std::unique_ptr ( - new AST::ImplTraitType (std::move (bounds), t->get_locus ())); - } - else - { - // should be trait bound, so parse trait bound - std::unique_ptr initial_bound = parse_trait_bound (); - if (initial_bound == nullptr) - { - if (save_errors) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse ImplTraitType initial bound"); - add_error (std::move (error)); - } - - return nullptr; - } - - location_t locus = t->get_locus (); - - // short cut if next token isn't '+' - t = lexer.peek_token (); - if (t->get_id () != PLUS) - { - return std::unique_ptr ( - new AST::ImplTraitTypeOneBound (std::move (initial_bound), - locus)); - } - - // parse additional type param bounds - std::vector> bounds; - bounds.push_back (std::move (initial_bound)); - while (t->get_id () == PLUS) - { - lexer.skip_token (); - - // parse bound if it exists - std::unique_ptr bound - = parse_type_param_bound (); - if (bound == nullptr) - { - // not an error as trailing plus may exist - break; - } - bounds.push_back (std::move (bound)); - - t = lexer.peek_token (); - } - - return std::unique_ptr ( - new AST::ImplTraitType (std::move (bounds), locus)); - } - case DYN: - case QUESTION_MARK: - { - // either TraitObjectType or TraitObjectTypeOneBound - bool has_dyn = false; - if (t->get_id () == DYN) - { - lexer.skip_token (); - has_dyn = true; - } - - if (lexer.peek_token ()->get_id () == LIFETIME) - { - /* cannot be one bound because lifetime prevents it from being - * traitbound */ - std::vector> bounds - = parse_type_param_bounds (); - - return std::unique_ptr ( - new AST::TraitObjectType (std::move (bounds), t->get_locus (), - has_dyn)); - } - else - { - // should be trait bound, so parse trait bound - std::unique_ptr initial_bound - = parse_trait_bound (); - if (initial_bound == nullptr) - { - if (save_errors) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse TraitObjectType initial bound"); - add_error (std::move (error)); - } - - return nullptr; - } - - // short cut if next token isn't '+' - t = lexer.peek_token (); - if (t->get_id () != PLUS) - { - // convert trait bound to value object - AST::TraitBound value_bound (*initial_bound); - - // DEBUG: removed as unique ptr, so should auto delete - // delete initial_bound; - - return std::unique_ptr ( - new AST::TraitObjectTypeOneBound (std::move (value_bound), - t->get_locus (), has_dyn)); - } - - // parse additional type param bounds - std::vector> bounds; - bounds.push_back (std::move (initial_bound)); - while (t->get_id () == PLUS) - { - lexer.skip_token (); - - // parse bound if it exists - std::unique_ptr bound - = parse_type_param_bound (); - if (bound == nullptr) - { - // not an error as trailing plus may exist - break; - } - bounds.push_back (std::move (bound)); - - t = lexer.peek_token (); - } - - return std::unique_ptr ( - new AST::TraitObjectType (std::move (bounds), t->get_locus (), - has_dyn)); - } - } - default: - if (save_errors) - add_error (Error (t->get_locus (), "unrecognised token %qs in type", - t->get_token_description ())); - - return nullptr; - } -} - -/* Parses a type that has '(' as its first character. Returns a tuple type, - * parenthesised type, TraitObjectTypeOneBound, or TraitObjectType depending - * on following characters. */ -template -std::unique_ptr -Parser::parse_paren_prefixed_type () -{ - /* NOTE: Syntactical ambiguity of a parenthesised trait bound is considered - * a trait bound, not a parenthesised type, so that it can still be used in - * type param bounds. */ - - /* NOTE: this implementation is really shit but I couldn't think of a better - * one. It requires essentially breaking polymorphism and downcasting via - * virtual method abuse, as it was copied from the rustc implementation (in - * which types are reified due to tagged union), after a more OOP attempt by - * me failed. */ - location_t left_delim_locus = lexer.peek_token ()->get_locus (); - - // skip left delim - lexer.skip_token (); - /* while next token isn't close delim, parse comma-separated types, saving - * whether trailing comma happens */ - const_TokenPtr t = lexer.peek_token (); - bool trailing_comma = true; - std::vector> types; - - while (t->get_id () != RIGHT_PAREN) - { - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - Error error (t->get_locus (), - "failed to parse type inside parentheses (probably " - "tuple or parenthesised)"); - add_error (std::move (error)); - - return nullptr; - } - types.push_back (std::move (type)); - - t = lexer.peek_token (); - if (t->get_id () != COMMA) - { - trailing_comma = false; - break; - } - lexer.skip_token (); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - - // if only one type and no trailing comma, then not a tuple type - if (types.size () == 1 && !trailing_comma) - { - // must be a TraitObjectType (with more than one bound) - if (lexer.peek_token ()->get_id () == PLUS) - { - // create type param bounds vector - std::vector> bounds; - - // HACK: convert type to traitbound and add to bounds - std::unique_ptr released_ptr = std::move (types[0]); - std::unique_ptr converted_bound ( - released_ptr->to_trait_bound (true)); - if (converted_bound == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to hackily converted parsed type to trait bound"); - add_error (std::move (error)); - - return nullptr; - } - bounds.push_back (std::move (converted_bound)); - - t = lexer.peek_token (); - while (t->get_id () == PLUS) - { - lexer.skip_token (); - - // attempt to parse typeparambound - std::unique_ptr bound - = parse_type_param_bound (); - if (bound == nullptr) - { - // not an error if null - break; - } - bounds.push_back (std::move (bound)); - - t = lexer.peek_token (); - } - - return std::unique_ptr ( - new AST::TraitObjectType (std::move (bounds), left_delim_locus, - false)); - } - else - { - // release vector pointer - std::unique_ptr released_ptr = std::move (types[0]); - /* HACK: attempt to convert to trait bound. if fails, parenthesised - * type */ - std::unique_ptr converted_bound ( - released_ptr->to_trait_bound (true)); - if (converted_bound == nullptr) - { - // parenthesised type - return std::unique_ptr ( - new AST::ParenthesisedType (std::move (released_ptr), - left_delim_locus)); - } - else - { - // trait object type (one bound) - - // get value semantics trait bound - AST::TraitBound value_bound (*converted_bound); - - return std::unique_ptr ( - new AST::TraitObjectTypeOneBound (value_bound, - left_delim_locus)); - } - } - } - else - { - return std::unique_ptr ( - new AST::TupleType (std::move (types), left_delim_locus)); - } - /* TODO: ensure that this ensures that dynamic dispatch for traits is not - * lost somehow */ -} - -/* Parses a type that has 'for' as its first character. This means it has a - * "for lifetimes", so returns either a BareFunctionType, TraitObjectType, or - * TraitObjectTypeOneBound depending on following characters. */ -template -std::unique_ptr -Parser::parse_for_prefixed_type () -{ - location_t for_locus = lexer.peek_token ()->get_locus (); - // parse for lifetimes in type - std::vector for_lifetimes = parse_for_lifetimes (); - - // branch on next token - either function or a trait type - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case ASYNC: - case CONST: - case UNSAFE: - case EXTERN_KW: - case FN_KW: - return parse_bare_function_type (std::move (for_lifetimes)); - case SCOPE_RESOLUTION: - case IDENTIFIER: - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case DOLLAR_SIGN: - { - // path, so trait type - - // parse type path to finish parsing trait bound - AST::TypePath path = parse_type_path (); - - t = lexer.peek_token (); - if (t->get_id () != PLUS) - { - // must be one-bound trait type - // create trait bound value object - AST::TraitBound bound (std::move (path), for_locus, false, false, - std::move (for_lifetimes)); - - return std::unique_ptr ( - new AST::TraitObjectTypeOneBound (std::move (bound), for_locus)); - } - - /* more than one bound trait type (or at least parsed as it - could be - * trailing '+') create trait bound pointer and bounds */ - std::unique_ptr initial_bound ( - new AST::TraitBound (std::move (path), for_locus, false, false, - std::move (for_lifetimes))); - std::vector> bounds; - bounds.push_back (std::move (initial_bound)); - - while (t->get_id () == PLUS) - { - lexer.skip_token (); - - // parse type param bound if it exists - std::unique_ptr bound - = parse_type_param_bound (); - if (bound == nullptr) - { - // not an error - e.g. trailing plus - return nullptr; - } - bounds.push_back (std::move (bound)); - - t = lexer.peek_token (); - } - - return std::unique_ptr ( - new AST::TraitObjectType (std::move (bounds), for_locus, false)); - } - default: - // error - add_error (Error (t->get_locus (), - "unrecognised token %qs in bare function type or trait " - "object type or trait object type one bound", - t->get_token_description ())); - - return nullptr; - } -} - -// Parses a maybe named param used in bare function types. -template -AST::MaybeNamedParam -Parser::parse_maybe_named_param (AST::AttrVec outer_attrs) -{ - /* Basically guess that param is named if first token is identifier or - * underscore and second token is semicolon. This should probably have no - * exceptions. rustc uses backtracking to parse these, but at the time of - * writing gccrs has no backtracking capabilities. */ - const_TokenPtr current = lexer.peek_token (); - const_TokenPtr next = lexer.peek_token (1); - - Identifier name; - AST::MaybeNamedParam::ParamKind kind = AST::MaybeNamedParam::UNNAMED; - - if (current->get_id () == IDENTIFIER && next->get_id () == COLON) - { - // named param - name = {current}; - kind = AST::MaybeNamedParam::IDENTIFIER; - lexer.skip_token (1); - } - else if (current->get_id () == UNDERSCORE && next->get_id () == COLON) - { - // wildcard param - name = {Values::Keywords::UNDERSCORE, current->get_locus ()}; - kind = AST::MaybeNamedParam::WILDCARD; - lexer.skip_token (1); - } - - // parse type (required) - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse type in maybe named param"); - add_error (std::move (error)); - - return AST::MaybeNamedParam::create_error (); - } - - return AST::MaybeNamedParam (std::move (name), kind, std::move (type), - std::move (outer_attrs), current->get_locus ()); -} - -/* Parses a bare function type (with the given for lifetimes for convenience - - * does not parse them itself). */ -template -std::unique_ptr -Parser::parse_bare_function_type ( - std::vector for_lifetimes) -{ - // TODO: pass in for lifetime location as param - location_t best_try_locus = lexer.peek_token ()->get_locus (); - - AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); - - if (!skip_token (FN_KW)) - return nullptr; - - if (!skip_token (LEFT_PAREN)) - return nullptr; - - // parse function params, if they exist - std::vector params; - bool is_variadic = false; - AST::AttrVec variadic_attrs; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_PAREN) - { - AST::AttrVec temp_attrs = parse_outer_attributes (); - - if (lexer.peek_token ()->get_id () == ELLIPSIS) - { - lexer.skip_token (); - is_variadic = true; - variadic_attrs = std::move (temp_attrs); - - t = lexer.peek_token (); - - if (t->get_id () != RIGHT_PAREN) - { - Error error (t->get_locus (), - "expected right parentheses after variadic in maybe " - "named function " - "parameters, found %qs", - t->get_token_description ()); - add_error (std::move (error)); - - return nullptr; - } - - break; - } - - AST::MaybeNamedParam param - = parse_maybe_named_param (std::move (temp_attrs)); - if (param.is_error ()) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse maybe named param in bare function type"); - add_error (std::move (error)); - - return nullptr; - } - params.push_back (std::move (param)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_PAREN)) - return nullptr; - - // bare function return type, if exists - std::unique_ptr return_type = nullptr; - if (lexer.peek_token ()->get_id () == RETURN_TYPE) - { - lexer.skip_token (); - - // parse required TypeNoBounds - return_type = parse_type_no_bounds (); - if (return_type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse return type (type no bounds) in bare " - "function type"); - add_error (std::move (error)); - - return nullptr; - } - } - - return std::unique_ptr ( - new AST::BareFunctionType (std::move (for_lifetimes), - std::move (qualifiers), std::move (params), - is_variadic, std::move (variadic_attrs), - std::move (return_type), best_try_locus)); -} - -template -std::unique_ptr -Parser::parse_reference_type_inner (location_t locus) -{ - // parse optional lifetime - AST::Lifetime lifetime = AST::Lifetime::elided (); - if (lexer.peek_token ()->get_id () == LIFETIME) - { - auto parsed_lifetime = parse_lifetime (true); - if (parsed_lifetime) - { - lifetime = parsed_lifetime.value (); - } - else - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse lifetime in reference type"); - add_error (std::move (error)); - - return nullptr; - } - } - - bool is_mut = false; - if (lexer.peek_token ()->get_id () == MUT) - { - lexer.skip_token (); - is_mut = true; - } - - // parse type no bounds, which is required - std::unique_ptr type = parse_type_no_bounds (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse referenced type in reference type"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::ReferenceType (is_mut, std::move (type), locus, - std::move (lifetime))); -} - -// Parses a reference type (mutable or immutable, with given lifetime). -template -std::unique_ptr -Parser::parse_reference_type () -{ - auto t = lexer.peek_token (); - auto locus = t->get_locus (); - - switch (t->get_id ()) - { - case AMP: - skip_token (AMP); - return parse_reference_type_inner (locus); - case LOGICAL_AND: - skip_token (LOGICAL_AND); - return std::unique_ptr ( - new AST::ReferenceType (false, parse_reference_type_inner (locus), - locus)); - default: - rust_unreachable (); - } -} - -// Parses a raw (unsafe) pointer type. -template -std::unique_ptr -Parser::parse_raw_pointer_type () -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (ASTERISK); - - AST::RawPointerType::PointerType kind = AST::RawPointerType::CONST; - - // branch on next token for pointer kind info - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case MUT: - kind = AST::RawPointerType::MUT; - lexer.skip_token (); - break; - case CONST: - kind = AST::RawPointerType::CONST; - lexer.skip_token (); - break; - default: - add_error (Error (t->get_locus (), - "unrecognised token %qs in raw pointer type", - t->get_token_description ())); - - return nullptr; - } - - // parse type no bounds (required) - std::unique_ptr type = parse_type_no_bounds (); - if (type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pointed type of raw pointer type"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::RawPointerType (kind, std::move (type), locus)); -} - -/* Parses a slice or array type, depending on following arguments (as - * lookahead is not possible). */ -template -std::unique_ptr -Parser::parse_slice_or_array_type () -{ - location_t locus = lexer.peek_token ()->get_locus (); - skip_token (LEFT_SQUARE); - - // parse inner type (required) - std::unique_ptr inner_type = parse_type (); - if (inner_type == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse inner type in slice or array type"); - add_error (std::move (error)); - - return nullptr; - } - - // branch on next token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case RIGHT_SQUARE: - // slice type - lexer.skip_token (); - - return std::unique_ptr ( - new AST::SliceType (std::move (inner_type), locus)); - case SEMICOLON: - { - // array type - lexer.skip_token (); - - // parse required array size expression - auto size = parse_anon_const (); - - if (!size) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse size expression in array type"); - add_error (std::move (error)); - - return nullptr; - } - - if (!skip_token (RIGHT_SQUARE)) - { - return nullptr; - } - - return std::unique_ptr ( - new AST::ArrayType (std::move (inner_type), std::move (*size), - locus)); - } - default: - // error - add_error ( - Error (t->get_locus (), - "unrecognised token %qs in slice or array type after inner type", - t->get_token_description ())); - - return nullptr; - } -} - -// Parses a type, taking into account type boundary disambiguation. -template -std::unique_ptr -Parser::parse_type_no_bounds () -{ - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case EXCLAM: - // never type - can't be macro as no path beforehand - lexer.skip_token (); - return std::unique_ptr ( - new AST::NeverType (t->get_locus ())); - case LEFT_SQUARE: - // slice type or array type - requires further disambiguation - return parse_slice_or_array_type (); - case LEFT_SHIFT: - case LEFT_ANGLE: - { - // qualified path in type - AST::QualifiedPathInType path = parse_qualified_path_in_type (); - if (path.is_error ()) - { - Error error (t->get_locus (), - "failed to parse qualified path in type"); - add_error (std::move (error)); - - return nullptr; - } - return std::unique_ptr ( - new AST::QualifiedPathInType (std::move (path))); - } - case UNDERSCORE: - // inferred type - lexer.skip_token (); - return std::unique_ptr ( - new AST::InferredType (t->get_locus ())); - case ASTERISK: - // raw pointer type - return parse_raw_pointer_type (); - case AMP: // does this also include AMP_AMP? Yes! Which is... LOGICAL_AND? - case LOGICAL_AND: - // reference type - return parse_reference_type (); - case LIFETIME: - /* probably a lifetime bound, so probably type param bounds in - * TraitObjectType. this is not allowed, but detection here for error - * message */ - add_error (Error (t->get_locus (), - "lifetime bounds (i.e. in type param bounds, in " - "TraitObjectType) are not allowed as TypeNoBounds")); - - return nullptr; - case IDENTIFIER: - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case DOLLAR_SIGN: - case SCOPE_RESOLUTION: - { - // macro invocation or type path - requires further disambiguation. - /* for parsing path component of each rule, perhaps parse it as a - * typepath and attempt conversion to simplepath if a trailing '!' is - * found */ - /* Type path also includes TraitObjectTypeOneBound BUT if it starts - * with it, it is exactly the same as a TypePath syntactically, so - * this is a syntactical ambiguity. As such, the parser will parse it - * as a TypePath. This, however, does not prevent TraitObjectType from - * starting with a typepath. */ - - // parse path as type path - AST::TypePath path = parse_type_path (); - if (path.is_error ()) - { - Error error ( - t->get_locus (), - "failed to parse path as first component of type no bounds"); - add_error (std::move (error)); - - return nullptr; - } - location_t locus = path.get_locus (); - - // branch on next token - t = lexer.peek_token (); - switch (t->get_id ()) - { - case EXCLAM: - { - // macro invocation - // convert to simple path - AST::SimplePath macro_path = path.as_simple_path (); - if (macro_path.is_empty ()) - { - Error error (t->get_locus (), - "failed to parse simple path in macro " - "invocation (for type)"); - add_error (std::move (error)); - - return nullptr; - } - - lexer.skip_token (); - - auto tok_tree = parse_delim_token_tree (); - if (!tok_tree) - return nullptr; - - return AST::MacroInvocation::Regular ( - AST::MacroInvocData (std::move (macro_path), - std::move (tok_tree.value ())), - {}, locus); - } - default: - // assume that this is a type path and not an error - return std::unique_ptr ( - new AST::TypePath (std::move (path))); - } - } - case LEFT_PAREN: - /* tuple type or parenthesised type - requires further disambiguation - * (the usual). ok apparently can be a parenthesised TraitBound too, so - * could be TraitObjectTypeOneBound */ - return parse_paren_prefixed_type_no_bounds (); - case FOR: - case ASYNC: - case CONST: - case UNSAFE: - case EXTERN_KW: - case FN_KW: - // bare function type (with no for lifetimes) - return parse_bare_function_type (std::vector ()); - case IMPL: - lexer.skip_token (); - if (lexer.peek_token ()->get_id () == LIFETIME) - { - /* cannot be one bound because lifetime prevents it from being - * traitbound not allowed as type no bounds, only here for error - * message */ - Error error ( - lexer.peek_token ()->get_locus (), - "lifetime (probably lifetime bound, in type param " - "bounds, in ImplTraitType) is not allowed in TypeNoBounds"); - add_error (std::move (error)); - - return nullptr; - } - else - { - // should be trait bound, so parse trait bound - std::unique_ptr initial_bound = parse_trait_bound (); - if (initial_bound == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse ImplTraitTypeOneBound bound"); - add_error (std::move (error)); - - return nullptr; - } - - location_t locus = t->get_locus (); - - // ensure not a trait with multiple bounds - t = lexer.peek_token (); - if (t->get_id () == PLUS) - { - Error error (t->get_locus (), - "plus after trait bound means an ImplTraitType, " - "which is not allowed as a TypeNoBounds"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::ImplTraitTypeOneBound (std::move (initial_bound), locus)); - } - case DYN: - case QUESTION_MARK: - { - // either TraitObjectTypeOneBound - bool has_dyn = false; - if (t->get_id () == DYN) - { - lexer.skip_token (); - has_dyn = true; - } - - if (lexer.peek_token ()->get_id () == LIFETIME) - { - /* means that cannot be TraitObjectTypeOneBound - so here for - * error message */ - Error error (lexer.peek_token ()->get_locus (), - "lifetime as bound in TraitObjectTypeOneBound " - "is not allowed, so cannot be TypeNoBounds"); - add_error (std::move (error)); - - return nullptr; - } - - // should be trait bound, so parse trait bound - std::unique_ptr initial_bound = parse_trait_bound (); - if (initial_bound == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse TraitObjectTypeOneBound initial bound"); - add_error (std::move (error)); - - return nullptr; - } - - location_t locus = t->get_locus (); - - // detect error with plus as next token - t = lexer.peek_token (); - if (t->get_id () == PLUS) - { - Error error (t->get_locus (), - "plus after trait bound means a TraitObjectType, " - "which is not allowed as a TypeNoBounds"); - add_error (std::move (error)); - - return nullptr; - } - - // convert trait bound to value object - AST::TraitBound value_bound (*initial_bound); - - return std::unique_ptr ( - new AST::TraitObjectTypeOneBound (std::move (value_bound), locus, - has_dyn)); - } - default: - add_error (Error (t->get_locus (), - "unrecognised token %qs in type no bounds", - t->get_token_description ())); - - return nullptr; - } -} - -// Parses a type no bounds beginning with '('. -template -std::unique_ptr -Parser::parse_paren_prefixed_type_no_bounds () -{ - /* NOTE: this could probably be parsed without the HACK solution of - * parse_paren_prefixed_type, but I was lazy. So FIXME for future.*/ - - /* NOTE: again, syntactical ambiguity of a parenthesised trait bound is - * considered a trait bound, not a parenthesised type, so that it can still - * be used in type param bounds. */ - - location_t left_paren_locus = lexer.peek_token ()->get_locus (); - - // skip left delim - lexer.skip_token (); - /* while next token isn't close delim, parse comma-separated types, saving - * whether trailing comma happens */ - const_TokenPtr t = lexer.peek_token (); - bool trailing_comma = true; - std::vector> types; - - while (t->get_id () != RIGHT_PAREN) - { - std::unique_ptr type = parse_type (); - if (type == nullptr) - { - Error error (t->get_locus (), - "failed to parse type inside parentheses (probably " - "tuple or parenthesised)"); - add_error (std::move (error)); - - return nullptr; - } - types.push_back (std::move (type)); - - t = lexer.peek_token (); - if (t->get_id () != COMMA) - { - trailing_comma = false; - break; - } - lexer.skip_token (); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - - // if only one type and no trailing comma, then not a tuple type - if (types.size () == 1 && !trailing_comma) - { - // must be a TraitObjectType (with more than one bound) - if (lexer.peek_token ()->get_id () == PLUS) - { - // error - this is not allowed for type no bounds - Error error (lexer.peek_token ()->get_locus (), - "plus (implying TraitObjectType as type param " - "bounds) is not allowed in type no bounds"); - add_error (std::move (error)); - - return nullptr; - } - else - { - // release vector pointer - std::unique_ptr released_ptr = std::move (types[0]); - /* HACK: attempt to convert to trait bound. if fails, parenthesised - * type */ - std::unique_ptr converted_bound ( - released_ptr->to_trait_bound (true)); - if (converted_bound == nullptr) - { - // parenthesised type - return std::unique_ptr ( - new AST::ParenthesisedType (std::move (released_ptr), - left_paren_locus)); - } - else - { - // trait object type (one bound) - - // get value semantics trait bound - AST::TraitBound value_bound (*converted_bound); - - return std::unique_ptr ( - new AST::TraitObjectTypeOneBound (value_bound, - left_paren_locus)); - } - } - } - else - { - return std::unique_ptr ( - new AST::TupleType (std::move (types), left_paren_locus)); - } - /* TODO: ensure that this ensures that dynamic dispatch for traits is not - * lost somehow */ -} - -/* Parses a literal pattern or range pattern. Assumes that literals passed in - * are valid range pattern bounds. Do not pass in paths in expressions, for - * instance. */ -template -std::unique_ptr -Parser::parse_literal_or_range_pattern () -{ - const_TokenPtr range_lower = lexer.peek_token (); - AST::Literal::LitType type = AST::Literal::STRING; - bool has_minus = false; - - // get lit type - switch (range_lower->get_id ()) - { - case CHAR_LITERAL: - type = AST::Literal::CHAR; - lexer.skip_token (); - break; - case BYTE_CHAR_LITERAL: - type = AST::Literal::BYTE; - lexer.skip_token (); - break; - case INT_LITERAL: - type = AST::Literal::INT; - lexer.skip_token (); - break; - case FLOAT_LITERAL: - type = AST::Literal::FLOAT; - lexer.skip_token (); - break; - case MINUS: - // branch on next token - range_lower = lexer.peek_token (1); - switch (range_lower->get_id ()) - { - case INT_LITERAL: - type = AST::Literal::INT; - has_minus = true; - lexer.skip_token (1); - break; - case FLOAT_LITERAL: - type = AST::Literal::FLOAT; - has_minus = true; - lexer.skip_token (1); - break; - default: - add_error (Error (range_lower->get_locus (), - "token type %qs cannot be parsed as range pattern " - "bound or literal after minus symbol", - range_lower->get_token_description ())); - - return nullptr; - } - break; - default: - add_error ( - Error (range_lower->get_locus (), - "token type %qs cannot be parsed as range pattern bound", - range_lower->get_token_description ())); - - return nullptr; - } - - const_TokenPtr next = lexer.peek_token (); - if (next->get_id () == DOT_DOT_EQ || next->get_id () == ELLIPSIS - || next->get_id () == DOT_DOT) - { - AST::RangeKind kind = AST::tokenid_to_rangekind (next->get_id ()); - // range pattern - lexer.skip_token (); - std::unique_ptr lower ( - new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), type, - PrimitiveCoreType::CORETYPE_UNKNOWN), - range_lower->get_locus (), has_minus)); - - std::unique_ptr upper - = parse_range_pattern_bound (); - if (upper == nullptr) - { - Error error (next->get_locus (), - "failed to parse range pattern bound in range pattern"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::RangePattern (std::move (lower), std::move (upper), kind, - range_lower->get_locus ())); - } - else - { - // literal pattern - return std::unique_ptr ( - new AST::LiteralPattern (range_lower->get_str (), type, - range_lower->get_locus (), - range_lower->get_type_hint (), has_minus)); - } -} - -// Parses a range pattern bound (value only). -template -std::unique_ptr -Parser::parse_range_pattern_bound () -{ - const_TokenPtr range_lower = lexer.peek_token (); - location_t range_lower_locus = range_lower->get_locus (); - - // get lit type - switch (range_lower->get_id ()) - { - case CHAR_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::CHAR, - range_lower->get_type_hint ()), - range_lower_locus)); - case BYTE_CHAR_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::BYTE, - range_lower->get_type_hint ()), - range_lower_locus)); - case INT_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::INT, - range_lower->get_type_hint ()), - range_lower_locus)); - case FLOAT_LITERAL: - lexer.skip_token (); - rust_debug ("warning: used deprecated float range pattern bound"); - return std::unique_ptr ( - new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, - range_lower->get_type_hint ()), - range_lower_locus)); - case MINUS: - // branch on next token - range_lower = lexer.peek_token (1); - switch (range_lower->get_id ()) - { - case INT_LITERAL: - lexer.skip_token (1); - return std::unique_ptr ( - new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::INT, - range_lower->get_type_hint ()), - range_lower_locus, true)); - case FLOAT_LITERAL: - lexer.skip_token (1); - rust_debug ("warning: used deprecated float range pattern bound"); - return std::unique_ptr ( - new AST::RangePatternBoundLiteral ( - AST::Literal (range_lower->get_str (), AST::Literal::FLOAT, - range_lower->get_type_hint ()), - range_lower_locus, true)); - default: - add_error (Error (range_lower->get_locus (), - "token type %qs cannot be parsed as range pattern " - "bound after minus symbol", - range_lower->get_token_description ())); - - return nullptr; - } - case IDENTIFIER: - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case SCOPE_RESOLUTION: - case DOLLAR_SIGN: - { - // path in expression - AST::PathInExpression path = parse_path_in_expression (); - if (path.is_error ()) - { - Error error ( - range_lower->get_locus (), - "failed to parse path in expression range pattern bound"); - add_error (std::move (error)); - - return nullptr; - } - return std::unique_ptr ( - new AST::RangePatternBoundPath (std::move (path))); - } - case LEFT_SHIFT: - case LEFT_ANGLE: - { - // qualified path in expression - AST::QualifiedPathInExpression path - = parse_qualified_path_in_expression (); - if (path.is_error ()) - { - Error error (range_lower->get_locus (), - "failed to parse qualified path in expression range " - "pattern bound"); - add_error (std::move (error)); - - return nullptr; - } - return std::unique_ptr ( - new AST::RangePatternBoundQualPath (std::move (path))); - } - default: - add_error ( - Error (range_lower->get_locus (), - "token type %qs cannot be parsed as range pattern bound", - range_lower->get_token_description ())); - - return nullptr; - } -} - -template -std::unique_ptr -Parser::parse_pattern () -{ - location_t start_locus = lexer.peek_token ()->get_locus (); - - /* skip optional starting pipe */ - maybe_skip_token (PIPE); - - auto first = parse_pattern_no_alt (); - - if (lexer.peek_token ()->get_id () != PIPE) - /* no alternates */ - return first; - - std::vector> alts; - if (first != nullptr) - alts.push_back (std::move (first)); - - do - { - lexer.skip_token (); - auto follow = parse_pattern_no_alt (); - if (follow != nullptr) - alts.push_back (std::move (follow)); - } - - while (lexer.peek_token ()->get_id () == PIPE); - - if (alts.empty ()) - return nullptr; - - /* alternates */ - return std::unique_ptr ( - new AST::AltPattern (std::move (alts), start_locus)); -} - -// Parses a pattern without alternates ('|') -// (will further disambiguate any pattern). -template -std::unique_ptr -Parser::parse_pattern_no_alt () -{ - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case TRUE_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::LiteralPattern (Values::Keywords::TRUE_LITERAL, - AST::Literal::BOOL, t->get_locus (), - t->get_type_hint ())); - case FALSE_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::LiteralPattern (Values::Keywords::FALSE_LITERAL, - AST::Literal::BOOL, t->get_locus (), - t->get_type_hint ())); - case CHAR_LITERAL: - case BYTE_CHAR_LITERAL: - case INT_LITERAL: - case FLOAT_LITERAL: - return parse_literal_or_range_pattern (); - case STRING_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::LiteralPattern (t->get_str (), AST::Literal::STRING, - t->get_locus (), t->get_type_hint ())); - case BYTE_STRING_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::LiteralPattern (t->get_str (), AST::Literal::BYTE_STRING, - t->get_locus (), t->get_type_hint ())); - case RAW_STRING_LITERAL: - lexer.skip_token (); - return std::unique_ptr ( - new AST::LiteralPattern (t->get_str (), AST::Literal::RAW_STRING, - t->get_locus (), t->get_type_hint ())); - // raw string and raw byte string literals too if they are readded to - // lexer - case MINUS: - if (lexer.peek_token (1)->get_id () == INT_LITERAL) - { - return parse_literal_or_range_pattern (); - } - else if (lexer.peek_token (1)->get_id () == FLOAT_LITERAL) - { - return parse_literal_or_range_pattern (); - } - else - { - Error error (t->get_locus (), "unexpected token %<-%> in pattern - " - "did you forget an integer literal"); - add_error (std::move (error)); - - return nullptr; - } - case UNDERSCORE: - lexer.skip_token (); - return std::unique_ptr ( - new AST::WildcardPattern (t->get_locus ())); - case DOT_DOT: - lexer.skip_token (); - return std::unique_ptr ( - new AST::RestPattern (t->get_locus ())); - case REF: - case MUT: - return parse_identifier_pattern (); - case IDENTIFIER: - /* if identifier with no scope resolution afterwards, identifier - * pattern. if scope resolution afterwards, path pattern (or range - * pattern or struct pattern or tuple struct pattern) or macro - * invocation */ - return parse_ident_leading_pattern (); - case AMP: - case LOGICAL_AND: - // reference pattern - return parse_reference_pattern (); - case LEFT_PAREN: - // tuple pattern or grouped pattern - return parse_grouped_or_tuple_pattern (); - case LEFT_SQUARE: - // slice pattern - return parse_slice_pattern (); - case LEFT_SHIFT: - case LEFT_ANGLE: - { - // qualified path in expression or qualified range pattern bound - AST::QualifiedPathInExpression path - = parse_qualified_path_in_expression (); - - if (lexer.peek_token ()->get_id () == DOT_DOT_EQ - || lexer.peek_token ()->get_id () == ELLIPSIS - || lexer.peek_token ()->get_id () == DOT_DOT) - { - // qualified range pattern bound, so parse rest of range pattern - AST::RangeKind kind - = AST::tokenid_to_rangekind (lexer.peek_token ()->get_id ()); - lexer.skip_token (); - - std::unique_ptr lower_bound ( - new AST::RangePatternBoundQualPath (std::move (path))); - std::unique_ptr upper_bound - = parse_range_pattern_bound (); - - return std::unique_ptr ( - new AST::RangePattern (std::move (lower_bound), - std::move (upper_bound), kind, - t->get_locus ())); - } - else - { - // just qualified path in expression - return std::unique_ptr ( - new AST::QualifiedPathInExpression (std::move (path))); - } - } - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case SCOPE_RESOLUTION: - case DOLLAR_SIGN: - { - // path in expression or range pattern bound - AST::PathInExpression path = parse_path_in_expression (); - - const_TokenPtr next = lexer.peek_token (); - switch (next->get_id ()) - { - case DOT_DOT_EQ: - case DOT_DOT: - case ELLIPSIS: - { - // qualified range pattern bound, so parse rest of range pattern - AST::RangeKind kind = AST::tokenid_to_rangekind (next->get_id ()); - lexer.skip_token (); - - std::unique_ptr lower_bound ( - new AST::RangePatternBoundPath (std::move (path))); - std::unique_ptr upper_bound - = parse_range_pattern_bound (); - - return std::unique_ptr ( - new AST::RangePattern (std::move (lower_bound), - std::move (upper_bound), kind, - next->get_locus ())); - } - case EXCLAM: - return parse_macro_invocation_partial (std::move (path), - AST::AttrVec ()); - case LEFT_PAREN: - { - // tuple struct - lexer.skip_token (); - - // parse items - std::unique_ptr items - = parse_tuple_struct_items (); - if (items == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse tuple struct items"); - add_error (std::move (error)); - - return nullptr; - } - - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - - return std::unique_ptr ( - new AST::TupleStructPattern (std::move (path), - std::move (items))); - } - case LEFT_CURLY: - { - // struct - lexer.skip_token (); - - // parse elements (optional) - AST::StructPatternElements elems = parse_struct_pattern_elems (); - - if (!skip_token (RIGHT_CURLY)) - { - return nullptr; - } - - return std::unique_ptr ( - new AST::StructPattern (std::move (path), t->get_locus (), - std::move (elems))); - } - default: - // assume path in expression - return std::unique_ptr ( - new AST::PathInExpression (std::move (path))); - } - } - default: - add_error (Error (t->get_locus (), "unexpected token %qs in pattern", - t->get_token_description ())); - - return nullptr; - } -} - -// Parses a single or double reference pattern. -template -std::unique_ptr -Parser::parse_reference_pattern () -{ - // parse double or single ref - bool is_double_ref = false; - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case AMP: - // still false - lexer.skip_token (); - break; - case LOGICAL_AND: - is_double_ref = true; - lexer.skip_token (); - break; - default: - add_error (Error (t->get_locus (), - "unexpected token %qs in reference pattern", - t->get_token_description ())); - - return nullptr; - } - - // parse mut (if it exists) - bool is_mut = false; - if (lexer.peek_token ()->get_id () == MUT) - { - is_mut = true; - lexer.skip_token (); - } - - // parse pattern to get reference of (required) - std::unique_ptr pattern = parse_pattern_no_alt (); - if (pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in reference pattern"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ReferencePattern (std::move (pattern), is_mut, is_double_ref, - t->get_locus ())); -} - -/* Parses a grouped pattern or tuple pattern. Prefers grouped over tuple if - * only a single element with no commas. */ -template -std::unique_ptr -Parser::parse_grouped_or_tuple_pattern () -{ - location_t paren_locus = lexer.peek_token ()->get_locus (); - skip_token (LEFT_PAREN); - - // detect '..' token (ranged with no lower range) - if (lexer.peek_token ()->get_id () == DOT_DOT) - { - lexer.skip_token (); - - // parse new patterns while next token is a comma - std::vector> patterns; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - lexer.skip_token (); - - // break if next token is ')' - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - { - break; - } - - // parse pattern, which is required - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error ( - lexer.peek_token ()->get_locus (), - "failed to parse pattern inside ranged tuple pattern"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - patterns.push_back (std::move (pattern)); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_PAREN)) - { - // skip somewhere? - return nullptr; - } - - // create tuple pattern items with only upper pattern items - std::unique_ptr items ( - new AST::TuplePatternItemsHasRest ( - std::vector> (), std::move (patterns))); - return std::unique_ptr ( - new AST::TuplePattern (std::move (items), paren_locus)); - } - else if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - { - skip_token (RIGHT_PAREN); - auto items = std::unique_ptr ( - new AST::TuplePatternItemsNoRest ( - std::vector> ())); - return std::unique_ptr ( - new AST::TuplePattern (std::move (items), paren_locus)); - } - - // parse initial pattern (required) - std::unique_ptr initial_pattern = parse_pattern (); - if (initial_pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in grouped or tuple pattern"); - add_error (std::move (error)); - - return nullptr; - } - - // branch on whether next token is a comma or not - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case RIGHT_PAREN: - // grouped pattern - lexer.skip_token (); - - return std::unique_ptr ( - new AST::GroupedPattern (std::move (initial_pattern), paren_locus)); - case COMMA: - { - // tuple pattern - lexer.skip_token (); - - // create vector of patterns - std::vector> patterns; - patterns.push_back (std::move (initial_pattern)); - - t = lexer.peek_token (); - while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) - { - // parse pattern (required) - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (t->get_locus (), - "failed to parse pattern in tuple pattern"); - add_error (std::move (error)); - - return nullptr; - } - patterns.push_back (std::move (pattern)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - t = lexer.peek_token (); - } - - t = lexer.peek_token (); - if (t->get_id () == RIGHT_PAREN) - { - // non-ranged tuple pattern - lexer.skip_token (); - - std::unique_ptr items ( - new AST::TuplePatternItemsNoRest (std::move (patterns))); - return std::unique_ptr ( - new AST::TuplePattern (std::move (items), paren_locus)); - } - else if (t->get_id () == DOT_DOT) - { - // ranged tuple pattern - lexer.skip_token (); - - // parse upper patterns - std::vector> upper_patterns; - t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - lexer.skip_token (); - - // break if end - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - break; - - // parse pattern (required) - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in tuple pattern"); - add_error (std::move (error)); - - return nullptr; - } - upper_patterns.push_back (std::move (pattern)); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - - std::unique_ptr items ( - new AST::TuplePatternItemsHasRest (std::move (patterns), - std::move (upper_patterns))); - return std::unique_ptr ( - new AST::TuplePattern (std::move (items), paren_locus)); - } - else - { - // some kind of error - Error error (t->get_locus (), - "failed to parse tuple pattern (probably) or maybe " - "grouped pattern"); - add_error (std::move (error)); - - return nullptr; - } - } - default: - // error - add_error (Error (t->get_locus (), - "unrecognised token %qs in grouped or tuple pattern " - "after first pattern", - t->get_token_description ())); - - return nullptr; - } -} - -/* Parses a slice pattern that can match arrays or slices. Parses the square - * brackets too. */ -template -std::unique_ptr -Parser::parse_slice_pattern () -{ - location_t square_locus = lexer.peek_token ()->get_locus (); - std::vector> patterns; - tl::optional>> upper_patterns - = tl::nullopt; - - // lambda function to determine which vector to push new patterns into - auto get_pattern_ref - = [&] () -> std::vector> & { - return upper_patterns.has_value () ? upper_patterns.value () : patterns; - }; - - skip_token (LEFT_SQUARE); - - if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) - { - skip_token (RIGHT_SQUARE); - std::unique_ptr items ( - new AST::SlicePatternItemsNoRest (std::move (patterns))); - return std::unique_ptr ( - new AST::SlicePattern (std::move (items), square_locus)); - } - - // parse initial pattern (required) - if (lexer.peek_token ()->get_id () == DOT_DOT) - { - lexer.skip_token (); - upper_patterns = std::vector> (); - } - else - { - // Not a rest pattern `..`, parse normally - std::unique_ptr initial_pattern = parse_pattern (); - if (initial_pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse initial pattern in slice pattern"); - add_error (std::move (error)); - - return nullptr; - } - - patterns.push_back (std::move (initial_pattern)); - } - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - lexer.skip_token (); - - // break if end bracket - if (lexer.peek_token ()->get_id () == RIGHT_SQUARE) - break; - - if (lexer.peek_token ()->get_id () == DOT_DOT) - { - if (upper_patterns.has_value ()) - { - // DOT_DOT has been parsed before - Error error (lexer.peek_token ()->get_locus (), "%s", - "`..` can only be used once per slice pattern"); - add_error (std::move (error)); - - return nullptr; - } - upper_patterns = std::vector> (); - lexer.skip_token (); - t = lexer.peek_token (); - continue; - } - - // parse pattern (required) - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in slice pattern"); - add_error (std::move (error)); - - return nullptr; - } - get_pattern_ref ().push_back (std::move (pattern)); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_SQUARE)) - { - return nullptr; - } - - if (upper_patterns.has_value ()) - { - // Slice pattern with rest - std::unique_ptr items ( - new AST::SlicePatternItemsHasRest ( - std::move (patterns), std::move (upper_patterns.value ()))); - return std::unique_ptr ( - new AST::SlicePattern (std::move (items), square_locus)); - } - - // Rest-less slice pattern - std::unique_ptr items ( - new AST::SlicePatternItemsNoRest (std::move (patterns))); - return std::unique_ptr ( - new AST::SlicePattern (std::move (items), square_locus)); -} - -/* Parses an identifier pattern (pattern that binds a value matched to a - * variable). */ -template -std::unique_ptr -Parser::parse_identifier_pattern () -{ - location_t locus = lexer.peek_token ()->get_locus (); - - bool has_ref = false; - if (lexer.peek_token ()->get_id () == REF) - { - has_ref = true; - lexer.skip_token (); - - // DEBUG - rust_debug ("parsed ref in identifier pattern"); - } - - bool has_mut = false; - if (lexer.peek_token ()->get_id () == MUT) - { - has_mut = true; - lexer.skip_token (); - } - - // parse identifier (required) - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - { - // skip somewhere? - return nullptr; - } - Identifier ident{ident_tok}; - - // DEBUG - rust_debug ("parsed identifier in identifier pattern"); - - // parse optional pattern binding thing - std::unique_ptr bind_pattern = nullptr; - if (lexer.peek_token ()->get_id () == PATTERN_BIND) - { - lexer.skip_token (); - - // parse required pattern to bind - bind_pattern = parse_pattern_no_alt (); - if (bind_pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern to bind in identifier pattern"); - add_error (std::move (error)); - - return nullptr; - } - } - - // DEBUG - rust_debug ("about to return identifier pattern"); - - return std::unique_ptr ( - new AST::IdentifierPattern (std::move (ident), locus, has_ref, has_mut, - std::move (bind_pattern))); -} - -/* Parses a pattern that opens with an identifier. This includes identifier - * patterns, path patterns (and derivatives such as struct patterns, tuple - * struct patterns, and macro invocations), and ranges. */ -template -std::unique_ptr -Parser::parse_ident_leading_pattern () -{ - // ensure first token is actually identifier - const_TokenPtr initial_tok = lexer.peek_token (); - if (initial_tok->get_id () != IDENTIFIER) - { - return nullptr; - } - - // save initial identifier as it may be useful (but don't skip) - std::string initial_ident = initial_tok->get_str (); - - // parse next tokens as a PathInExpression - AST::PathInExpression path = parse_path_in_expression (); - - // branch on next token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case EXCLAM: - return parse_macro_invocation_partial (std::move (path), AST::AttrVec ()); - case LEFT_PAREN: - { - // tuple struct - lexer.skip_token (); - - // DEBUG - rust_debug ("parsing tuple struct pattern"); - - // parse items - std::unique_ptr items - = parse_tuple_struct_items (); - if (items == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse tuple struct items"); - add_error (std::move (error)); - - return nullptr; - } - - // DEBUG - rust_debug ("successfully parsed tuple struct items"); - - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - - // DEBUG - rust_debug ("successfully parsed tuple struct pattern"); - - return std::unique_ptr ( - new AST::TupleStructPattern (std::move (path), std::move (items))); - } - case LEFT_CURLY: - { - // struct - lexer.skip_token (); - - // parse elements (optional) - AST::StructPatternElements elems = parse_struct_pattern_elems (); - - if (!skip_token (RIGHT_CURLY)) - { - return nullptr; - } - - // DEBUG - rust_debug ("successfully parsed struct pattern"); - - return std::unique_ptr ( - new AST::StructPattern (std::move (path), initial_tok->get_locus (), - std::move (elems))); - } - case DOT_DOT_EQ: - case DOT_DOT: - case ELLIPSIS: - { - // range - AST::RangeKind kind - = AST::tokenid_to_rangekind (lexer.peek_token ()->get_id ()); - - lexer.skip_token (); - - std::unique_ptr lower_bound ( - new AST::RangePatternBoundPath (std::move (path))); - std::unique_ptr upper_bound - = parse_range_pattern_bound (); - - return std::unique_ptr ( - new AST::RangePattern (std::move (lower_bound), - std::move (upper_bound), kind, - t->get_locus ())); - } - case PATTERN_BIND: - { - // only allow on single-segment paths - if (path.is_single_segment ()) - { - // identifier with pattern bind - lexer.skip_token (); - - std::unique_ptr bind_pattern - = parse_pattern_no_alt (); - if (bind_pattern == nullptr) - { - Error error ( - t->get_locus (), - "failed to parse pattern to bind to identifier pattern"); - add_error (std::move (error)); - - return nullptr; - } - return std::unique_ptr ( - new AST::IdentifierPattern (std::move (initial_ident), - initial_tok->get_locus (), false, - false, std::move (bind_pattern))); - } - Error error ( - t->get_locus (), - "failed to parse pattern bind to a path, not an identifier"); - add_error (std::move (error)); - - return nullptr; - } - default: - // assume identifier if single segment - if (path.is_single_segment ()) - { - return std::unique_ptr ( - new AST::IdentifierPattern (std::move (initial_ident), - initial_tok->get_locus ())); - } - // return path otherwise - return std::unique_ptr ( - new AST::PathInExpression (std::move (path))); - } -} - -// Parses tuple struct items if they exist. Does not parse parentheses. -template -std::unique_ptr -Parser::parse_tuple_struct_items () -{ - std::vector> lower_patterns; - - // DEBUG - rust_debug ("started parsing tuple struct items"); - - // check for '..' at front - if (lexer.peek_token ()->get_id () == DOT_DOT) - { - // only parse upper patterns - lexer.skip_token (); - - // DEBUG - rust_debug ("'..' at front in tuple struct items detected"); - - std::vector> upper_patterns; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - lexer.skip_token (); - - // break if right paren - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - break; - - // parse pattern, which is now required - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in tuple struct items"); - add_error (std::move (error)); - - return nullptr; - } - upper_patterns.push_back (std::move (pattern)); - - t = lexer.peek_token (); - } - - // DEBUG - rust_debug ( - "finished parsing tuple struct items ranged (upper/none only)"); - - return std::unique_ptr ( - new AST::TupleStructItemsHasRest (std::move (lower_patterns), - std::move (upper_patterns))); - } - - // has at least some lower patterns - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) - { - // DEBUG - rust_debug ("about to parse pattern in tuple struct items"); - - // parse pattern, which is required - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (t->get_locus (), - "failed to parse pattern in tuple struct items"); - add_error (std::move (error)); - - return nullptr; - } - lower_patterns.push_back (std::move (pattern)); - - // DEBUG - rust_debug ("successfully parsed pattern in tuple struct items"); - - if (lexer.peek_token ()->get_id () != COMMA) - { - // DEBUG - rust_debug ("broke out of parsing patterns in tuple struct " - "items as no comma"); - - break; - } - lexer.skip_token (); - t = lexer.peek_token (); - } - - // branch on next token - t = lexer.peek_token (); - switch (t->get_id ()) - { - case RIGHT_PAREN: - return std::unique_ptr ( - new AST::TupleStructItemsNoRest (std::move (lower_patterns))); - case DOT_DOT: - { - // has an upper range that must be parsed separately - lexer.skip_token (); - - std::vector> upper_patterns; - - t = lexer.peek_token (); - while (t->get_id () == COMMA) - { - lexer.skip_token (); - - // break if next token is right paren - if (lexer.peek_token ()->get_id () == RIGHT_PAREN) - break; - - // parse pattern, which is required - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse pattern in tuple struct items"); - add_error (std::move (error)); - - return nullptr; - } - upper_patterns.push_back (std::move (pattern)); - - t = lexer.peek_token (); - } - - return std::unique_ptr ( - new AST::TupleStructItemsHasRest (std::move (lower_patterns), - std::move (upper_patterns))); - } - default: - // error - add_error (Error (t->get_locus (), - "unexpected token %qs in tuple struct items", - t->get_token_description ())); - - return nullptr; - } -} - -// Parses struct pattern elements if they exist. -template -AST::StructPatternElements -Parser::parse_struct_pattern_elems () -{ - std::vector> fields; - - AST::AttrVec etc_attrs; - bool has_rest = false; - - // try parsing struct pattern fields - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_CURLY) - { - AST::AttrVec outer_attrs = parse_outer_attributes (); - - // parse etc (must be last in struct pattern, so breaks) - if (lexer.peek_token ()->get_id () == DOT_DOT) - { - lexer.skip_token (); - etc_attrs = std::move (outer_attrs); - has_rest = true; - break; - } - - std::unique_ptr field - = parse_struct_pattern_field_partial (std::move (outer_attrs)); - if (field == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse struct pattern field"); - add_error (std::move (error)); - - // skip after somewhere? - return AST::StructPatternElements::create_empty (); - } - fields.push_back (std::move (field)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - // skip comma - lexer.skip_token (); - t = lexer.peek_token (); - } - - if (has_rest) - return AST::StructPatternElements (std::move (fields), - std::move (etc_attrs)); - else - return AST::StructPatternElements (std::move (fields)); -} - -/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or - * identifier). */ -template -std::unique_ptr -Parser::parse_struct_pattern_field () -{ - // parse outer attributes (if they exist) - AST::AttrVec outer_attrs = parse_outer_attributes (); - - return parse_struct_pattern_field_partial (std::move (outer_attrs)); -} - -/* Parses a struct pattern field (tuple index/pattern, identifier/pattern, or - * identifier), with outer attributes passed in. */ -template -std::unique_ptr -Parser::parse_struct_pattern_field_partial ( - AST::AttrVec outer_attrs) -{ - // branch based on next token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case INT_LITERAL: - { - // tuple index - std::string index_str = t->get_str (); - int index = atoi (index_str.c_str ()); - - lexer.skip_token (); - - if (!skip_token (COLON)) - { - return nullptr; - } - - // parse required pattern - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error ( - t->get_locus (), - "failed to parse pattern in tuple index struct pattern field"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::StructPatternFieldTuplePat (index, std::move (pattern), - std::move (outer_attrs), - t->get_locus ())); - } - case IDENTIFIER: - // identifier-pattern OR only identifier - // branch on next token - switch (lexer.peek_token (1)->get_id ()) - { - case COLON: - { - // identifier-pattern - Identifier ident{t}; - lexer.skip_token (); - - skip_token (COLON); - - // parse required pattern - std::unique_ptr pattern = parse_pattern (); - if (pattern == nullptr) - { - Error error (t->get_locus (), - "failed to parse pattern in struct pattern field"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::StructPatternFieldIdentPat (std::move (ident), - std::move (pattern), - std::move (outer_attrs), - t->get_locus ())); - } - case COMMA: - case RIGHT_CURLY: - { - // identifier only - Identifier ident = {t}; - lexer.skip_token (); - - return std::unique_ptr ( - new AST::StructPatternFieldIdent (std::move (ident), false, false, - std::move (outer_attrs), - t->get_locus ())); - } - default: - // error - add_error (Error (t->get_locus (), - "unrecognised token %qs in struct pattern field", - t->get_token_description ())); - - return nullptr; - } - case REF: - case MUT: - { - // only identifier - bool has_ref = false; - if (t->get_id () == REF) - { - has_ref = true; - lexer.skip_token (); - } - - bool has_mut = false; - if (lexer.peek_token ()->get_id () == MUT) - { - has_mut = true; - lexer.skip_token (); - } - - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - { - return nullptr; - } - Identifier ident{ident_tok}; - - return std::unique_ptr ( - new AST::StructPatternFieldIdent (std::move (ident), has_ref, has_mut, - std::move (outer_attrs), - t->get_locus ())); - } - default: - // not necessarily an error - return nullptr; - } -} - -/* Parses a statement or expression (depending on whether a trailing semicolon - * exists). Useful for block expressions where it cannot be determined through - * lookahead whether it is a statement or expression to be parsed. */ -template -ExprOrStmt -Parser::parse_stmt_or_expr () -{ - // quick exit for empty statement - const_TokenPtr t = lexer.peek_token (); - if (t->get_id () == SEMICOLON) - { - lexer.skip_token (); - std::unique_ptr stmt ( - new AST::EmptyStmt (t->get_locus ())); - return ExprOrStmt (std::move (stmt)); - } - - // parse outer attributes - AST::AttrVec outer_attrs = parse_outer_attributes (); - ParseRestrictions restrictions; - restrictions.expr_can_be_stmt = true; - std::unique_ptr expr; - - // parsing this will be annoying because of the many different possibilities - /* best may be just to copy paste in parse_item switch, and failing that try - * to parse outer attributes, and then pass them in to either a let - * statement or (fallback) expression statement. */ - // FIXME: think of a way to do this without such a large switch? - - /* FIXME: for expressions at least, the only way that they can really be - * parsed properly in this way is if they don't support operators on them. - * They must be pratt-parsed otherwise. As such due to composability, only - * explicit statements will have special cases here. This should roughly - * correspond to "expr-with-block", but this warning is here in case it - * isn't the case. */ - t = lexer.peek_token (); - switch (t->get_id ()) - { - case LET: - { - // let statement - std::unique_ptr stmt ( - parse_let_stmt (std::move (outer_attrs))); - return ExprOrStmt (std::move (stmt)); - } - case PUB: - case MOD: - case EXTERN_KW: - case USE: - case FN_KW: - case TYPE: - case STRUCT_KW: - case ENUM_KW: - case CONST: - case STATIC_KW: - case AUTO: - case TRAIT: - case IMPL: - { - std::unique_ptr item ( - parse_vis_item (std::move (outer_attrs))); - return ExprOrStmt (std::move (item)); - } - /* TODO: implement union keyword but not really because of - * context-dependence crappy hack way to parse a union written below to - * separate it from the good code. */ - // case UNION: - case UNSAFE: - { // maybe - unsafe traits are a thing - /* if any of these (should be all possible VisItem prefixes), parse a - * VisItem - can't parse item because would require reparsing outer - * attributes */ - const_TokenPtr t2 = lexer.peek_token (1); - switch (t2->get_id ()) - { - case LEFT_CURLY: - { - // unsafe block: parse as expression - expr = parse_expr (std::move (outer_attrs), restrictions); - break; - } - case AUTO: - case TRAIT: - { - // unsafe trait - std::unique_ptr item ( - parse_vis_item (std::move (outer_attrs))); - return ExprOrStmt (std::move (item)); - } - case EXTERN_KW: - case FN_KW: - { - // unsafe function - std::unique_ptr item ( - parse_vis_item (std::move (outer_attrs))); - return ExprOrStmt (std::move (item)); - } - case IMPL: - { - // unsafe trait impl - std::unique_ptr item ( - parse_vis_item (std::move (outer_attrs))); - return ExprOrStmt (std::move (item)); - } - default: - add_error (Error (t2->get_locus (), - "unrecognised token %qs after parsing unsafe - " - "expected beginning of expression or statement", - t->get_token_description ())); - - // skip somewhere? - return ExprOrStmt::create_error (); - } - break; - } - /* FIXME: this is either a macro invocation or macro invocation semi. - * start parsing to determine which one it is. */ - // FIXME: old code there - - // crappy hack to do union "keyword" - case IDENTIFIER: - if (t->get_str () == Values::WeakKeywords::UNION - && lexer.peek_token (1)->get_id () == IDENTIFIER) - { - std::unique_ptr item ( - parse_vis_item (std::move (outer_attrs))); - return ExprOrStmt (std::move (item)); - // or should this go straight to parsing union? - } - else if (t->get_str () == Values::WeakKeywords::MACRO_RULES - && lexer.peek_token (1)->get_id () == EXCLAM) - { - // macro_rules! macro item - std::unique_ptr item ( - parse_macro_rules_def (std::move (outer_attrs))); - return ExprOrStmt (std::move (item)); - } - gcc_fallthrough (); - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - case SCOPE_RESOLUTION: - case DOLLAR_SIGN: - { - AST::PathInExpression path = parse_path_in_expression (); - std::unique_ptr null_denotation; - - if (lexer.peek_token ()->get_id () == EXCLAM) - { - std::unique_ptr invoc - = parse_macro_invocation_partial (std::move (path), - std::move (outer_attrs)); - if (invoc == nullptr) - return ExprOrStmt::create_error (); - - if (restrictions.consume_semi && maybe_skip_token (SEMICOLON)) - { - invoc->add_semicolon (); - // Macro invocation with semicolon. - return ExprOrStmt ( - std::unique_ptr (std::move (invoc))); - } - - TokenId after_macro = lexer.peek_token ()->get_id (); - - AST::DelimType delim_type = invoc->get_invoc_data () - .get_delim_tok_tree () - .get_delim_type (); - - if (delim_type == AST::CURLY && after_macro != DOT - && after_macro != QUESTION_MARK) - { - rust_debug ("braced macro statement"); - return ExprOrStmt ( - std::unique_ptr (std::move (invoc))); - } - - null_denotation = std::move (invoc); - } - else - { - null_denotation - = null_denotation_path (std::move (path), {}, restrictions); - } - - expr = left_denotations (std::move (null_denotation), LBP_LOWEST, - std::move (outer_attrs), restrictions); - break; - } - default: - /* expression statement or expression itself - parse - * expression then make it statement if semi afterwards */ - expr = parse_expr (std::move (outer_attrs), restrictions); - break; - } - - const_TokenPtr after_expr = lexer.peek_token (); - if (after_expr->get_id () == SEMICOLON) - { - // must be expression statement - lexer.skip_token (); - - if (expr) - { - std::unique_ptr stmt ( - new AST::ExprStmt (std::move (expr), t->get_locus (), true)); - return ExprOrStmt (std::move (stmt)); - } - else - { - return ExprOrStmt::create_error (); - } - } - - if (expr && !expr->is_expr_without_block () - && after_expr->get_id () != RIGHT_CURLY) - { - // block expression statement. - std::unique_ptr stmt ( - new AST::ExprStmt (std::move (expr), t->get_locus (), false)); - return ExprOrStmt (std::move (stmt)); - } - - // return expression - return ExprOrStmt (std::move (expr)); -} - -// Parses a struct expression field. -template -std::unique_ptr -Parser::parse_struct_expr_field () -{ - AST::AttrVec outer_attrs = parse_outer_attributes (); - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case IDENTIFIER: - if (lexer.peek_token (1)->get_id () == COLON) - { - // struct expr field with identifier and expr - Identifier ident = {t}; - lexer.skip_token (1); - - // parse expression (required) - std::unique_ptr expr = parse_expr (); - if (expr == nullptr) - { - Error error (t->get_locus (), - "failed to parse struct expression field with " - "identifier and expression"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::StructExprFieldIdentifierValue (std::move (ident), - std::move (expr), - std::move (outer_attrs), - t->get_locus ())); - } - else - { - // struct expr field with identifier only - Identifier ident{t}; - lexer.skip_token (); - - return std::unique_ptr ( - new AST::StructExprFieldIdentifier (std::move (ident), - std::move (outer_attrs), - t->get_locus ())); - } - case INT_LITERAL: - { - // parse tuple index field - int index = atoi (t->get_str ().c_str ()); - lexer.skip_token (); - - if (!skip_token (COLON)) - { - // skip somewhere? - return nullptr; - } - - // parse field expression (required) - std::unique_ptr expr = parse_expr (); - if (expr == nullptr) - { - Error error (t->get_locus (), - "failed to parse expr in struct (or enum) expr " - "field with tuple index"); - add_error (std::move (error)); - - return nullptr; - } - - return std::unique_ptr ( - new AST::StructExprFieldIndexValue (index, std::move (expr), - std::move (outer_attrs), - t->get_locus ())); - } - case DOT_DOT: - /* this is a struct base and can't be parsed here, so just return - * nothing without erroring */ - - return nullptr; - default: - add_error ( - Error (t->get_locus (), - "unrecognised token %qs as first token of struct expr field - " - "expected identifier or integer literal", - t->get_token_description ())); - - return nullptr; - } -} - -// "Unexpected token" panic mode - flags gcc error at unexpected token -// TODO: seems to be unused, remove? -template -void -Parser::unexpected_token (const_TokenPtr t) -{ - Error error (t->get_locus (), "unexpected token %qs", - t->get_token_description ()); - add_error (std::move (error)); -} - -/* Crappy "error recovery" performed after error by skipping tokens until a - * semi-colon is found */ -template -void -Parser::skip_after_semicolon () -{ - const_TokenPtr t = lexer.peek_token (); - - while (t->get_id () != END_OF_FILE && t->get_id () != SEMICOLON) - { - lexer.skip_token (); - t = lexer.peek_token (); - } - - if (t->get_id () == SEMICOLON) - lexer.skip_token (); -} - -/* Skips the current token */ -template -void -Parser::skip_token () -{ - lexer.skip_token (); -} - -/* Checks if current token has inputted id - skips it and returns true if so, - * diagnoses an error and returns false otherwise. */ -template -bool -Parser::skip_token (TokenId token_id) -{ - return expect_token (token_id) != const_TokenPtr (); -} - -/* Checks if current token is similar to inputted token - skips it and returns - * true if so, diagnoses an error and returns false otherwise. */ -template -bool -Parser::skip_token (const_TokenPtr token) -{ - return expect_token (token) != const_TokenPtr (); -} - -/* Checks if current token has inputted id - skips it and returns true if so, - * returns false otherwise without diagnosing an error */ -template -bool -Parser::maybe_skip_token (TokenId token_id) -{ - if (lexer.peek_token ()->get_id () != token_id) - return false; - else - return skip_token (token_id); -} - -/* Checks the current token - if id is same as expected, skips and returns it, - * otherwise diagnoses error and returns null. */ -template -const_TokenPtr -Parser::expect_token (TokenId token_id) -{ - const_TokenPtr t = lexer.peek_token (); - if (t->get_id () == token_id) - { - lexer.skip_token (); - return t; - } - else - { - Error error (t->get_locus (), "expecting %qs but %qs found", - get_token_description (token_id), - t->get_token_description ()); - add_error (std::move (error)); - - return const_TokenPtr (); - } -} - -/* Checks the current token - if same as expected, skips and returns it, - * otherwise diagnoses error and returns null. */ -template -const_TokenPtr -Parser::expect_token (const_TokenPtr token_expect) -{ - const_TokenPtr t = lexer.peek_token (); - if (t->get_id () == token_expect->get_id () - && (!t->should_have_str () || t->get_str () == token_expect->get_str ())) - { - lexer.skip_token (); - return t; - } - else - { - Error error (t->get_locus (), "expecting %qs but %qs found", - token_expect->get_token_description (), - t->get_token_description ()); - add_error (std::move (error)); - - return const_TokenPtr (); - } -} - -// Skips all tokens until EOF or }. Don't use. -template -void -Parser::skip_after_end () -{ - const_TokenPtr t = lexer.peek_token (); - - while (t->get_id () != END_OF_FILE && t->get_id () != RIGHT_CURLY) - { - lexer.skip_token (); - t = lexer.peek_token (); - } - - if (t->get_id () == RIGHT_CURLY) - { - lexer.skip_token (); - } -} - -/* A slightly more aware error-handler that skips all tokens until it reaches - * the end of the block scope (i.e. when left curly brackets = right curly - * brackets). Note: assumes currently in the middle of a block. Use - * skip_after_next_block to skip based on the assumption that the block - * has not been entered yet. */ -template -void -Parser::skip_after_end_block () -{ - const_TokenPtr t = lexer.peek_token (); - int curly_count = 1; - - while (curly_count > 0 && t->get_id () != END_OF_FILE) - { - switch (t->get_id ()) - { - case LEFT_CURLY: - curly_count++; - break; - case RIGHT_CURLY: - curly_count--; - break; - default: - break; - } - lexer.skip_token (); - t = lexer.peek_token (); - } -} - -/* Skips tokens until the end of the next block. i.e. assumes that the block - * has not been entered yet. */ -template -void -Parser::skip_after_next_block () -{ - const_TokenPtr t = lexer.peek_token (); - - // initial loop - skip until EOF if no left curlies encountered - while (t->get_id () != END_OF_FILE && t->get_id () != LEFT_CURLY) - { - lexer.skip_token (); - - t = lexer.peek_token (); - } - - // if next token is left, skip it and then skip after the block ends - if (t->get_id () == LEFT_CURLY) - { - lexer.skip_token (); - - skip_after_end_block (); - } - // otherwise, do nothing as EOF -} - -/* Skips all tokens until ] (the end of an attribute) - does not skip the ] - * (as designed for attribute body use) */ -template -void -Parser::skip_after_end_attribute () -{ - const_TokenPtr t = lexer.peek_token (); - - while (t->get_id () != RIGHT_SQUARE && t->get_id () != END_OF_FILE) - { - lexer.skip_token (); - t = lexer.peek_token (); - } - - // Don't skip the RIGHT_SQUARE token -} - -/* Pratt parser impl of parse_expr. FIXME: this is only provisional and - * probably will be changed. */ -template -std::unique_ptr -Parser::parse_expr (int right_binding_power, - AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - const_TokenPtr current_token = lexer.peek_token (); - // Special hack because we are allowed to return nullptr, in that case we - // don't want to skip the token, since we don't actually parse it. But if - // null isn't allowed it indicates an error, and we want to skip past that. - // So return early if it is one of the tokens that ends an expression - // (or at least cannot start a new expression). - if (restrictions.expr_can_be_null) - { - TokenId id = current_token->get_id (); - if (id == SEMICOLON || id == RIGHT_PAREN || id == RIGHT_CURLY - || id == RIGHT_SQUARE || id == COMMA || id == LEFT_CURLY) - return nullptr; - } - - ParseRestrictions null_denotation_restrictions = restrictions; - null_denotation_restrictions.expr_can_be_stmt = false; - - // parse null denotation (unary part of expression) - std::unique_ptr expr - = null_denotation ({}, null_denotation_restrictions); - if (expr == nullptr) - return nullptr; - - return left_denotations (std::move (expr), right_binding_power, - std::move (outer_attrs), restrictions); -} - -template -std::unique_ptr -Parser::left_denotations (std::unique_ptr expr, - int right_binding_power, - AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - if (expr == nullptr) - { - // DEBUG - rust_debug ("null denotation is null; returning null for parse_expr"); - return nullptr; - } - - const_TokenPtr current_token = lexer.peek_token (); - - if (restrictions.expr_can_be_stmt && !expr->is_expr_without_block () - && current_token->get_id () != DOT - && current_token->get_id () != QUESTION_MARK) - { - rust_debug ("statement expression with block"); - expr->set_outer_attrs (std::move (outer_attrs)); - return expr; - } - - restrictions.expr_can_be_stmt = false; - - // stop parsing if find lower priority token - parse higher priority first - while (right_binding_power < left_binding_power (current_token)) - { - lexer.skip_token (); - - // FIXME attributes should generally be applied to the null denotation. - expr = left_denotation (current_token, std::move (expr), - std::move (outer_attrs), restrictions); - - if (expr == nullptr) - { - // DEBUG - rust_debug ("left denotation is null; returning null for parse_expr"); - - return nullptr; - } - - current_token = lexer.peek_token (); - } - - return expr; -} - -// Parse expression with lowest left binding power. -template -std::unique_ptr -Parser::parse_expr (AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - return parse_expr (LBP_LOWEST, std::move (outer_attrs), restrictions); -} - -/* Determines action to take when finding token at beginning of expression. */ -template -std::unique_ptr -Parser::null_denotation (AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - /* note: tok is previous character in input stream, not current one, as - * parse_expr skips it before passing it in */ - - /* as a Pratt parser (which works by decomposing expressions into a null - * denotation and then a left denotation), null denotations handle primaries - * and unary operands (but only prefix unary operands) */ - - auto tok = lexer.peek_token (); - - switch (tok->get_id ()) - { - case IDENTIFIER: - case SELF: - case SELF_ALIAS: - case DOLLAR_SIGN: - case CRATE: - case SUPER: - case SCOPE_RESOLUTION: - { - // DEBUG - rust_debug ("beginning null denotation identifier handling"); - - /* best option: parse as path, then extract identifier, macro, - * struct/enum, or just path info from it */ - AST::PathInExpression path = parse_path_in_expression (); - - return null_denotation_path (std::move (path), std::move (outer_attrs), - restrictions); - } - default: - if (tok->get_id () == LEFT_SHIFT) - { - lexer.split_current_token (LEFT_ANGLE, LEFT_ANGLE); - tok = lexer.peek_token (); - } - - lexer.skip_token (); - return null_denotation_not_path (std::move (tok), std::move (outer_attrs), - restrictions); - } -} - -// Handling of expresions that start with a path for `null_denotation`. -template -std::unique_ptr -Parser::null_denotation_path ( - AST::PathInExpression path, AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - rust_debug ("parsing null denotation after path"); - - // HACK: always make "self" by itself a path (regardless of next - // tokens) - if (path.is_single_segment () && path.get_segments ()[0].is_lower_self_seg ()) - { - // HACK: add outer attrs to path - path.set_outer_attrs (std::move (outer_attrs)); - return std::unique_ptr ( - new AST::PathInExpression (std::move (path))); - } - - // branch on next token - const_TokenPtr t = lexer.peek_token (); - switch (t->get_id ()) - { - case EXCLAM: - // macro - return parse_macro_invocation_partial (std::move (path), - std::move (outer_attrs)); - case LEFT_CURLY: - { - bool not_a_block = lexer.peek_token (1)->get_id () == IDENTIFIER - && (lexer.peek_token (2)->get_id () == COMMA - || (lexer.peek_token (2)->get_id () == COLON - && (lexer.peek_token (4)->get_id () == COMMA - || !can_tok_start_type ( - lexer.peek_token (3)->get_id ())))); - - /* definitely not a block: - * path '{' ident ',' - * path '{' ident ':' [anything] ',' - * path '{' ident ':' [not a type] - * otherwise, assume block expr and thus path */ - // DEBUG - rust_debug ("values of lookahead: '%s' '%s' '%s' '%s' ", - lexer.peek_token (1)->get_token_description (), - lexer.peek_token (2)->get_token_description (), - lexer.peek_token (3)->get_token_description (), - lexer.peek_token (4)->get_token_description ()); - - rust_debug ("can be struct expr: '%s', not a block: '%s'", - restrictions.can_be_struct_expr ? "true" : "false", - not_a_block ? "true" : "false"); - - // struct/enum expr struct - if (!restrictions.can_be_struct_expr && !not_a_block) - { - // HACK: add outer attrs to path - path.set_outer_attrs (std::move (outer_attrs)); - return std::unique_ptr ( - new AST::PathInExpression (std::move (path))); - } - return parse_struct_expr_struct_partial (std::move (path), - std::move (outer_attrs)); - } - case LEFT_PAREN: - // struct/enum expr tuple - if (!restrictions.can_be_struct_expr) - { - // assume path is returned - // HACK: add outer attributes to path - path.set_outer_attrs (std::move (outer_attrs)); - return std::unique_ptr ( - new AST::PathInExpression (std::move (path))); - } - return parse_struct_expr_tuple_partial (std::move (path), - std::move (outer_attrs)); - default: - // assume path is returned if not single segment - if (path.is_single_segment ()) - { - // FIXME: This should probably be returned as a path. - /* HACK: may have to become permanent, but this is my current - * identifier expression */ - return std::unique_ptr (new AST::IdentifierExpr ( - path.get_segments ()[0].get_ident_segment ().as_string (), {}, - path.get_locus ())); - } - // HACK: add outer attrs to path - path.set_outer_attrs (std::move (outer_attrs)); - return std::unique_ptr ( - new AST::PathInExpression (std::move (path))); - } - rust_unreachable (); -} - -// Handling of expresions that do not start with a path for `null_denotation`. -template -std::unique_ptr -Parser::null_denotation_not_path ( - const_TokenPtr tok, AST::AttrVec outer_attrs, ParseRestrictions restrictions) -{ - switch (tok->get_id ()) - { - // FIXME: Handle in null_denotation_path? - case LEFT_SHIFT: - case LEFT_ANGLE: - { - // qualified path - // HACK: add outer attrs to path - AST::QualifiedPathInExpression path - = parse_qualified_path_in_expression (tok->get_locus ()); - path.set_outer_attrs (std::move (outer_attrs)); - return std::unique_ptr ( - new AST::QualifiedPathInExpression (std::move (path))); - } - // FIXME: delegate to parse_literal_expr instead? would have to rejig - // tokens and whatever. - // FIXME: for literal exprs, outer attrs should be passed in, and later - // error if it does not make up the entire statement. - case INT_LITERAL: - // we should check the range, but ignore for now - // encode as int? - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::INT, - tok->get_type_hint (), {}, tok->get_locus ())); - case FLOAT_LITERAL: - // encode as float? - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::FLOAT, - tok->get_type_hint (), {}, tok->get_locus ())); - case STRING_LITERAL: - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::STRING, - tok->get_type_hint (), {}, tok->get_locus ())); - case BYTE_STRING_LITERAL: - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE_STRING, - tok->get_type_hint (), {}, tok->get_locus ())); - case RAW_STRING_LITERAL: - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::RAW_STRING, - tok->get_type_hint (), {}, tok->get_locus ())); - case CHAR_LITERAL: - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::CHAR, - tok->get_type_hint (), {}, tok->get_locus ())); - case BYTE_CHAR_LITERAL: - return std::unique_ptr ( - new AST::LiteralExpr (tok->get_str (), AST::Literal::BYTE, - tok->get_type_hint (), {}, tok->get_locus ())); - case TRUE_LITERAL: - return std::unique_ptr ( - new AST::LiteralExpr (Values::Keywords::TRUE_LITERAL, - AST::Literal::BOOL, tok->get_type_hint (), {}, - tok->get_locus ())); - case FALSE_LITERAL: - return std::unique_ptr ( - new AST::LiteralExpr (Values::Keywords::FALSE_LITERAL, - AST::Literal::BOOL, tok->get_type_hint (), {}, - tok->get_locus ())); - case LEFT_PAREN: - return parse_grouped_or_tuple_expr (std::move (outer_attrs), - tok->get_locus ()); - - /*case PLUS: { // unary plus operator - // invoke parse_expr recursively with appropriate priority, etc. for - below AST::Expr* expr = parse_expr(LBP_UNARY_PLUS); - - if (expr == nullptr) - return nullptr; - // can only apply to integer and float expressions - if (expr->get_type() != integer_type_node || expr->get_type() != - float_type_node) { rust_error_at(tok->get_locus(), "operand of unary - plus must be int or float but it is %s", print_type(expr->get_type())); - return nullptr; - } - - return Tree(expr, tok->get_locus()); - }*/ - // Rust has no unary plus operator - case MINUS: - { // unary minus - ParseRestrictions entered_from_unary; - entered_from_unary.entered_from_unary = true; - if (!restrictions.can_be_struct_expr) - entered_from_unary.can_be_struct_expr = false; - std::unique_ptr expr - = parse_expr (LBP_UNARY_MINUS, {}, entered_from_unary); - - if (expr == nullptr) - return nullptr; - // can only apply to integer and float expressions - /*if (expr.get_type() != integer_type_node || expr.get_type() != - float_type_node) { rust_error_at(tok->get_locus(), "operand of unary - minus must be int or float but it is %s", - print_type(expr.get_type())); return Tree::error(); - }*/ - /* FIXME: when implemented the "get type" method on expr, ensure it is - * int or float type (except unsigned int). Actually, this would - * probably have to be done in semantic analysis (as type checking). - */ - - /* FIXME: allow outer attributes on these expressions by having an - * outer attrs parameter in function*/ - return std::unique_ptr ( - new AST::NegationExpr (std::move (expr), NegationOperator::NEGATE, - std::move (outer_attrs), tok->get_locus ())); - } - case EXCLAM: - { // logical or bitwise not - ParseRestrictions entered_from_unary; - entered_from_unary.entered_from_unary = true; - if (!restrictions.can_be_struct_expr) - entered_from_unary.can_be_struct_expr = false; - std::unique_ptr expr - = parse_expr (LBP_UNARY_EXCLAM, {}, entered_from_unary); - - if (expr == nullptr) - return nullptr; - // can only apply to boolean expressions - /*if (expr.get_type() != boolean_type_node) { - rust_error_at(tok->get_locus(), - "operand of logical not must be a boolean but it is %s", - print_type(expr.get_type())); - return Tree::error(); - }*/ - /* FIXME: type checking for boolean or integer expressions in semantic - * analysis */ - - // FIXME: allow outer attributes on these expressions - return std::unique_ptr ( - new AST::NegationExpr (std::move (expr), NegationOperator::NOT, - std::move (outer_attrs), tok->get_locus ())); - } - case ASTERISK: - { - /* pointer dereference only - HACK: as struct expressions should - * always be value expressions, cannot be dereferenced */ - ParseRestrictions entered_from_unary; - entered_from_unary.entered_from_unary = true; - entered_from_unary.can_be_struct_expr = false; - std::unique_ptr expr - = parse_expr (LBP_UNARY_ASTERISK, {}, entered_from_unary); - // FIXME: allow outer attributes on expression - return std::unique_ptr ( - new AST::DereferenceExpr (std::move (expr), std::move (outer_attrs), - tok->get_locus ())); - } - case AMP: - { - // (single) "borrow" expression - shared (mutable) or immutable - std::unique_ptr expr = nullptr; - Mutability mutability = Mutability::Imm; - bool raw_borrow = false; - - ParseRestrictions entered_from_unary; - entered_from_unary.entered_from_unary = true; - if (!restrictions.can_be_struct_expr) - entered_from_unary.can_be_struct_expr = false; - - auto is_mutability = [] (const_TokenPtr token) { - return token->get_id () == CONST || token->get_id () == MUT; - }; - - auto t = lexer.peek_token (); - // Weak raw keyword, we look (1) ahead and treat it as an identifier if - // there is no mut nor const. - if (t->get_id () == IDENTIFIER - && t->get_str () == Values::WeakKeywords::RAW - && is_mutability (lexer.peek_token (1))) - { - lexer.skip_token (); - switch (lexer.peek_token ()->get_id ()) - { - case MUT: - mutability = Mutability::Mut; - break; - case CONST: - mutability = Mutability::Imm; - break; - default: - rust_error_at (lexer.peek_token ()->get_locus (), - "raw borrow should be either const or mut"); - } - lexer.skip_token (); - expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); - raw_borrow = true; - } - else if (t->get_id () == MUT) - { - lexer.skip_token (); - expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); - mutability = Mutability::Mut; - raw_borrow = false; - } - else - { - expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); - raw_borrow = false; - } - - // FIXME: allow outer attributes on expression - return std::unique_ptr ( - new AST::BorrowExpr (std::move (expr), mutability, raw_borrow, false, - std::move (outer_attrs), tok->get_locus ())); - } - case LOGICAL_AND: - { - // (double) "borrow" expression - shared (mutable) or immutable - std::unique_ptr expr = nullptr; - Mutability mutability = Mutability::Imm; - - ParseRestrictions entered_from_unary; - entered_from_unary.entered_from_unary = true; - - if (lexer.peek_token ()->get_id () == MUT) - { - lexer.skip_token (); - expr = parse_expr (LBP_UNARY_AMP_MUT, {}, entered_from_unary); - mutability = Mutability::Mut; - } - else - { - expr = parse_expr (LBP_UNARY_AMP, {}, entered_from_unary); - mutability = Mutability::Imm; - } - - // FIXME: allow outer attributes on expression - return std::unique_ptr ( - new AST::BorrowExpr (std::move (expr), mutability, false, true, - std::move (outer_attrs), tok->get_locus ())); - } - case OR: - case PIPE: - case MOVE: - // closure expression - return parse_closure_expr_pratt (tok, std::move (outer_attrs)); - case DOT_DOT: - // either "range to" or "range full" expressions - return parse_nud_range_exclusive_expr (tok, std::move (outer_attrs)); - case DOT_DOT_EQ: - // range to inclusive expr - return parse_range_to_inclusive_expr (tok, std::move (outer_attrs)); - case RETURN_KW: - // FIXME: is this really a null denotation expression? - return parse_return_expr (std::move (outer_attrs), tok->get_locus ()); - case TRY: - // FIXME: is this really a null denotation expression? - return parse_try_expr (std::move (outer_attrs), tok->get_locus ()); - case BREAK: - // FIXME: is this really a null denotation expression? - return parse_break_expr (std::move (outer_attrs), tok->get_locus ()); - case CONTINUE: - return parse_continue_expr (std::move (outer_attrs), tok->get_locus ()); - case LEFT_CURLY: - // ok - this is an expression with block for once. - return parse_block_expr (std::move (outer_attrs), tl::nullopt, - tok->get_locus ()); - case IF: - // if or if let, so more lookahead to find out - if (lexer.peek_token ()->get_id () == LET) - { - // if let expr - return parse_if_let_expr (std::move (outer_attrs), tok->get_locus ()); - } - else - { - // if expr - return parse_if_expr (std::move (outer_attrs), tok->get_locus ()); - } - case LIFETIME: - return parse_labelled_loop_expr (tok, std::move (outer_attrs)); - case LOOP: - return parse_loop_expr (std::move (outer_attrs), tl::nullopt, - tok->get_locus ()); - case WHILE: - if (lexer.peek_token ()->get_id () == LET) - { - return parse_while_let_loop_expr (std::move (outer_attrs)); - } - else - { - return parse_while_loop_expr (std::move (outer_attrs), tl::nullopt, - tok->get_locus ()); - } - case FOR: - return parse_for_loop_expr (std::move (outer_attrs), tl::nullopt); - case MATCH_KW: - // also an expression with block - return parse_match_expr (std::move (outer_attrs), tok->get_locus ()); - case LEFT_SQUARE: - // array definition expr (not indexing) - return parse_array_expr (std::move (outer_attrs), tok->get_locus ()); - case UNSAFE: - return parse_unsafe_block_expr (std::move (outer_attrs), - tok->get_locus ()); - case BOX: - return parse_box_expr (std::move (outer_attrs), tok->get_locus ()); - case UNDERSCORE: - add_error ( - Error (tok->get_locus (), - "use of %qs is not allowed on the right-side of an assignment", - tok->get_token_description ())); - return nullptr; - case CONST: - return parse_const_block_expr (std::move (outer_attrs), - tok->get_locus ()); - default: - if (!restrictions.expr_can_be_null) - add_error (Error (tok->get_locus (), - "found unexpected token %qs in null denotation", - tok->get_token_description ())); - return nullptr; - } -} - -/* Called for each token that can appear in infix (between) position. Can be - * operators or other punctuation. Returns a function pointer to member - * function that implements the left denotation for the token given. */ -template -std::unique_ptr -Parser::left_denotation (const_TokenPtr tok, - std::unique_ptr left, - AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - // Token passed in has already been skipped, so peek gives "next" token - switch (tok->get_id ()) - { - // FIXME: allow for outer attributes to be applied - case QUESTION_MARK: - { - location_t left_locus = left->get_locus (); - // error propagation expression - unary postfix - return std::unique_ptr ( - new AST::ErrorPropagationExpr (std::move (left), - std::move (outer_attrs), left_locus)); - } - case PLUS: - // sum expression - binary infix - /*return parse_binary_plus_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr (tok, std::move (left), - std::move (outer_attrs), - ArithmeticOrLogicalOperator::ADD, - restrictions); - case MINUS: - // difference expression - binary infix - /*return parse_binary_minus_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::SUBTRACT, restrictions); - case ASTERISK: - // product expression - binary infix - /*return parse_binary_mult_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::MULTIPLY, restrictions); - case DIV: - // quotient expression - binary infix - /*return parse_binary_div_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::DIVIDE, restrictions); - case PERCENT: - // modulo expression - binary infix - /*return parse_binary_mod_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::MODULUS, restrictions); - case AMP: - // logical or bitwise and expression - binary infix - /*return parse_bitwise_and_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::BITWISE_AND, restrictions); - case PIPE: - // logical or bitwise or expression - binary infix - /*return parse_bitwise_or_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::BITWISE_OR, restrictions); - case CARET: - // logical or bitwise xor expression - binary infix - /*return parse_bitwise_xor_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::BITWISE_XOR, restrictions); - case LEFT_SHIFT: - // left shift expression - binary infix - /*return parse_left_shift_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::LEFT_SHIFT, restrictions); - case RIGHT_SHIFT: - // right shift expression - binary infix - /*return parse_right_shift_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_arithmetic_or_logical_expr ( - tok, std::move (left), std::move (outer_attrs), - ArithmeticOrLogicalOperator::RIGHT_SHIFT, restrictions); - case EQUAL_EQUAL: - // equal to expression - binary infix (no associativity) - /*return parse_binary_equal_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_comparison_expr (tok, std::move (left), - std::move (outer_attrs), - ComparisonOperator::EQUAL, restrictions); - case NOT_EQUAL: - // not equal to expression - binary infix (no associativity) - /*return parse_binary_not_equal_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_comparison_expr (tok, std::move (left), - std::move (outer_attrs), - ComparisonOperator::NOT_EQUAL, - restrictions); - case RIGHT_ANGLE: - // greater than expression - binary infix (no associativity) - /*return parse_binary_greater_than_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_comparison_expr (tok, std::move (left), - std::move (outer_attrs), - ComparisonOperator::GREATER_THAN, - restrictions); - case LEFT_ANGLE: - // less than expression - binary infix (no associativity) - /*return parse_binary_less_than_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_comparison_expr (tok, std::move (left), - std::move (outer_attrs), - ComparisonOperator::LESS_THAN, - restrictions); - case GREATER_OR_EQUAL: - // greater than or equal to expression - binary infix (no associativity) - /*return parse_binary_greater_equal_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_comparison_expr (tok, std::move (left), - std::move (outer_attrs), - ComparisonOperator::GREATER_OR_EQUAL, - restrictions); - case LESS_OR_EQUAL: - // less than or equal to expression - binary infix (no associativity) - /*return parse_binary_less_equal_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_comparison_expr (tok, std::move (left), - std::move (outer_attrs), - ComparisonOperator::LESS_OR_EQUAL, - restrictions); - case OR: - // lazy logical or expression - binary infix - return parse_lazy_or_expr (tok, std::move (left), std::move (outer_attrs), - restrictions); - case LOGICAL_AND: - // lazy logical and expression - binary infix - return parse_lazy_and_expr (tok, std::move (left), - std::move (outer_attrs), restrictions); - case AS: - /* type cast expression - kind of binary infix (RHS is actually a - * TypeNoBounds) */ - return parse_type_cast_expr (tok, std::move (left), - std::move (outer_attrs), restrictions); - case EQUAL: - // assignment expression - binary infix (note right-to-left - // associativity) - return parse_assig_expr (tok, std::move (left), std::move (outer_attrs), - restrictions); - case PLUS_EQ: - /* plus-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_plus_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr (tok, std::move (left), - std::move (outer_attrs), - CompoundAssignmentOperator::ADD, - restrictions); - case MINUS_EQ: - /* minus-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_minus_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::SUBTRACT, restrictions); - case ASTERISK_EQ: - /* multiply-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_mult_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::MULTIPLY, restrictions); - case DIV_EQ: - /* division-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_div_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr (tok, std::move (left), - std::move (outer_attrs), - CompoundAssignmentOperator::DIVIDE, - restrictions); - case PERCENT_EQ: - /* modulo-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_mod_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::MODULUS, restrictions); - case AMP_EQ: - /* bitwise and-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_and_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::BITWISE_AND, restrictions); - case PIPE_EQ: - /* bitwise or-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_or_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::BITWISE_OR, restrictions); - case CARET_EQ: - /* bitwise xor-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_xor_assig_expr (tok, std::move (left), - std::move (outer_attrs), restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::BITWISE_XOR, restrictions); - case LEFT_SHIFT_EQ: - /* left shift-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_left_shift_assig_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::LEFT_SHIFT, restrictions); - case RIGHT_SHIFT_EQ: - /* right shift-assignment expression - binary infix (note right-to-left - * associativity) */ - /*return parse_right_shift_assig_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions);*/ - return parse_compound_assignment_expr ( - tok, std::move (left), std::move (outer_attrs), - CompoundAssignmentOperator::RIGHT_SHIFT, restrictions); - case DOT_DOT: - /* range exclusive expression - binary infix (no associativity) - * either "range" or "range from" */ - return parse_led_range_exclusive_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions); - case DOT_DOT_EQ: - /* range inclusive expression - binary infix (no associativity) - * unambiguously RangeInclusiveExpr */ - return parse_range_inclusive_expr (tok, std::move (left), - std::move (outer_attrs), restrictions); - case SCOPE_RESOLUTION: - // path expression - binary infix? FIXME should this even be parsed - // here? - add_error ( - Error (tok->get_locus (), - "found scope resolution operator in left denotation " - "function - this should probably be handled elsewhere")); - - return nullptr; - case DOT: - { - /* field expression or method call - relies on parentheses after next - * identifier or await if token after is "await" (unary postfix) or - * tuple index if token after is a decimal int literal */ - - const_TokenPtr next_tok = lexer.peek_token (); - if (next_tok->get_id () == IDENTIFIER - && next_tok->get_str () == Values::Keywords::AWAIT) - { - // await expression - return parse_await_expr (tok, std::move (left), - std::move (outer_attrs)); - } - else if (next_tok->get_id () == INT_LITERAL) - { - // tuple index expression - TODO check for decimal int literal - return parse_tuple_index_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions); - } - else if (next_tok->get_id () == FLOAT_LITERAL) - { - // Lexer has misidentified a tuple index as a float literal - // eg: `(x, (y, z)).1.0` -> 1.0 has been identified as a float - // literal. This means we should split it into three new separate - // tokens, the first tuple index, the dot and the second tuple - // index. - auto current_loc = next_tok->get_locus (); - auto str = next_tok->get_str (); - auto dot_pos = str.find ("."); - auto prefix = str.substr (0, dot_pos); - auto suffix = str.substr (dot_pos + 1); - if (dot_pos == str.size () - 1) - lexer.split_current_token ( - {Token::make_int (current_loc, std::move (prefix), - CORETYPE_PURE_DECIMAL), - Token::make (DOT, current_loc + 1)}); - else - lexer.split_current_token ( - {Token::make_int (current_loc, std::move (prefix), - CORETYPE_PURE_DECIMAL), - Token::make (DOT, current_loc + 1), - Token::make_int (current_loc + 2, std::move (suffix), - CORETYPE_PURE_DECIMAL)}); - return parse_tuple_index_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions); - } - else if (next_tok->get_id () == IDENTIFIER - && lexer.peek_token (1)->get_id () != LEFT_PAREN - && lexer.peek_token (1)->get_id () != SCOPE_RESOLUTION) - { - /* field expression (or should be) - FIXME: scope resolution right - * after identifier should always be method, I'm pretty sure */ - return parse_field_access_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions); - } - else - { - // method call (probably) - return parse_method_call_expr (tok, std::move (left), - std::move (outer_attrs), - restrictions); - } - } - case LEFT_PAREN: - // function call - method call is based on dot notation first - return parse_function_call_expr (tok, std::move (left), - std::move (outer_attrs), restrictions); - case LEFT_SQUARE: - // array or slice index expression (pseudo binary infix) - return parse_index_expr (tok, std::move (left), std::move (outer_attrs), - restrictions); - default: - add_error (Error (tok->get_locus (), - "found unexpected token %qs in left denotation", - tok->get_token_description ())); - - return nullptr; - } -} - -/* Returns the left binding power for the given ArithmeticOrLogicalExpr type. - * TODO make constexpr? Would that even do anything useful? */ -inline binding_powers -get_lbp_for_arithmetic_or_logical_expr ( - AST::ArithmeticOrLogicalExpr::ExprType expr_type) -{ - switch (expr_type) - { - case ArithmeticOrLogicalOperator::ADD: - return LBP_PLUS; - case ArithmeticOrLogicalOperator::SUBTRACT: - return LBP_MINUS; - case ArithmeticOrLogicalOperator::MULTIPLY: - return LBP_MUL; - case ArithmeticOrLogicalOperator::DIVIDE: - return LBP_DIV; - case ArithmeticOrLogicalOperator::MODULUS: - return LBP_MOD; - case ArithmeticOrLogicalOperator::BITWISE_AND: - return LBP_AMP; - case ArithmeticOrLogicalOperator::BITWISE_OR: - return LBP_PIPE; - case ArithmeticOrLogicalOperator::BITWISE_XOR: - return LBP_CARET; - case ArithmeticOrLogicalOperator::LEFT_SHIFT: - return LBP_L_SHIFT; - case ArithmeticOrLogicalOperator::RIGHT_SHIFT: - return LBP_R_SHIFT; - default: - // WTF? should not happen, this is an error - rust_unreachable (); - - return LBP_PLUS; - } -} - -// Parses an arithmetic or logical expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_arithmetic_or_logical_expr ( - const_TokenPtr, std::unique_ptr left, AST::AttrVec, - AST::ArithmeticOrLogicalExpr::ExprType expr_type, - ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (get_lbp_for_arithmetic_or_logical_expr (expr_type), - AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - expr_type, locus)); -} - -// Parses a binary addition expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_plus_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_PLUS, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::ADD, locus)); -} - -// Parses a binary subtraction expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_minus_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_MINUS, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::SUBTRACT, - locus)); -} - -// Parses a binary multiplication expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_mult_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_MUL, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::MULTIPLY, - locus)); -} - -// Parses a binary division expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_div_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_DIV, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::DIVIDE, - locus)); -} - -// Parses a binary modulo expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_mod_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_MOD, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::MODULUS, - locus)); -} - -/* Parses a binary bitwise (or eager logical) and expression (with Pratt - * parsing). */ -template -std::unique_ptr -Parser::parse_bitwise_and_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_AMP, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::BITWISE_AND, - locus)); -} - -/* Parses a binary bitwise (or eager logical) or expression (with Pratt - * parsing). */ -template -std::unique_ptr -Parser::parse_bitwise_or_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_PIPE, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::BITWISE_OR, - locus)); -} - -/* Parses a binary bitwise (or eager logical) xor expression (with Pratt - * parsing). */ -template -std::unique_ptr -Parser::parse_bitwise_xor_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_CARET, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::BITWISE_XOR, - locus)); -} - -// Parses a binary left shift expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_left_shift_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_L_SHIFT, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::LEFT_SHIFT, - locus)); -} - -// Parses a binary right shift expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_right_shift_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_R_SHIFT, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ArithmeticOrLogicalExpr (std::move (left), std::move (right), - ArithmeticOrLogicalOperator::RIGHT_SHIFT, - locus)); -} - -/* Returns the left binding power for the given ComparisonExpr type. - * TODO make constexpr? Would that even do anything useful? */ -inline binding_powers -get_lbp_for_comparison_expr (AST::ComparisonExpr::ExprType expr_type) -{ - switch (expr_type) - { - case ComparisonOperator::EQUAL: - return LBP_EQUAL; - case ComparisonOperator::NOT_EQUAL: - return LBP_NOT_EQUAL; - case ComparisonOperator::GREATER_THAN: - return LBP_GREATER_THAN; - case ComparisonOperator::LESS_THAN: - return LBP_SMALLER_THAN; - case ComparisonOperator::GREATER_OR_EQUAL: - return LBP_GREATER_EQUAL; - case ComparisonOperator::LESS_OR_EQUAL: - return LBP_SMALLER_EQUAL; - default: - // WTF? should not happen, this is an error - rust_unreachable (); - - return LBP_EQUAL; - } -} - -/* Parses a ComparisonExpr of given type and LBP. TODO find a way to only - * specify one and have the other looked up - e.g. specify ExprType and - * binding power is looked up? */ -template -std::unique_ptr -Parser::parse_comparison_expr ( - const_TokenPtr, std::unique_ptr left, AST::AttrVec, - AST::ComparisonExpr::ExprType expr_type, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (get_lbp_for_comparison_expr (expr_type), AST::AttrVec (), - restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ComparisonExpr (std::move (left), std::move (right), expr_type, - locus)); -} - -// Parses a binary equal to expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_equal_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_EQUAL, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ComparisonExpr (std::move (left), std::move (right), - ComparisonOperator::EQUAL, locus)); -} - -// Parses a binary not equal to expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_not_equal_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_NOT_EQUAL, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ComparisonExpr (std::move (left), std::move (right), - ComparisonOperator::NOT_EQUAL, locus)); -} - -// Parses a binary greater than expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_greater_than_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_GREATER_THAN, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ComparisonExpr (std::move (left), std::move (right), - ComparisonOperator::GREATER_THAN, locus)); -} - -// Parses a binary less than expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_less_than_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_SMALLER_THAN, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ComparisonExpr (std::move (left), std::move (right), - ComparisonOperator::LESS_THAN, locus)); -} - -// Parses a binary greater than or equal to expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_greater_equal_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_GREATER_EQUAL, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ComparisonExpr (std::move (left), std::move (right), - ComparisonOperator::GREATER_OR_EQUAL, locus)); -} - -// Parses a binary less than or equal to expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_binary_less_equal_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_SMALLER_EQUAL, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::ComparisonExpr (std::move (left), std::move (right), - ComparisonOperator::LESS_OR_EQUAL, locus)); -} - -// Parses a binary lazy boolean or expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_lazy_or_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_LOGICAL_OR, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::LazyBooleanExpr (std::move (left), std::move (right), - LazyBooleanOperator::LOGICAL_OR, locus)); -} - -// Parses a binary lazy boolean and expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_lazy_and_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_LOGICAL_AND, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::LazyBooleanExpr (std::move (left), std::move (right), - LazyBooleanOperator::LOGICAL_AND, locus)); -} - -// Parses a pseudo-binary infix type cast expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_type_cast_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr expr_to_cast, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, - ParseRestrictions restrictions ATTRIBUTE_UNUSED) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr type = parse_type_no_bounds (); - if (type == nullptr) - return nullptr; - // FIXME: how do I get precedence put in here? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = expr_to_cast->get_locus (); - - return std::unique_ptr ( - new AST::TypeCastExpr (std::move (expr_to_cast), std::move (type), locus)); -} - -// Parses a binary assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::AssignmentExpr (std::move (left), std::move (right), - std::move (outer_attrs), locus)); -} - -/* Returns the left binding power for the given CompoundAssignmentExpr type. - * TODO make constexpr? Would that even do anything useful? */ -inline binding_powers -get_lbp_for_compound_assignment_expr ( - AST::CompoundAssignmentExpr::ExprType expr_type) -{ - switch (expr_type) - { - case CompoundAssignmentOperator::ADD: - return LBP_PLUS; - case CompoundAssignmentOperator::SUBTRACT: - return LBP_MINUS; - case CompoundAssignmentOperator::MULTIPLY: - return LBP_MUL; - case CompoundAssignmentOperator::DIVIDE: - return LBP_DIV; - case CompoundAssignmentOperator::MODULUS: - return LBP_MOD; - case CompoundAssignmentOperator::BITWISE_AND: - return LBP_AMP; - case CompoundAssignmentOperator::BITWISE_OR: - return LBP_PIPE; - case CompoundAssignmentOperator::BITWISE_XOR: - return LBP_CARET; - case CompoundAssignmentOperator::LEFT_SHIFT: - return LBP_L_SHIFT; - case CompoundAssignmentOperator::RIGHT_SHIFT: - return LBP_R_SHIFT; - default: - // WTF? should not happen, this is an error - rust_unreachable (); - - return LBP_PLUS; - } -} - -// Parses a compound assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_compound_assignment_expr ( - const_TokenPtr, std::unique_ptr left, AST::AttrVec, - AST::CompoundAssignmentExpr::ExprType expr_type, - ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (get_lbp_for_compound_assignment_expr (expr_type) - 1, - AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - expr_type, locus)); -} - -// Parses a binary add-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_plus_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_PLUS_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::ADD, locus)); -} - -// Parses a binary minus-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_minus_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_MINUS_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::SUBTRACT, - locus)); -} - -// Parses a binary multiplication-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_mult_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_MULT_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::MULTIPLY, - locus)); -} - -// Parses a binary division-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_div_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_DIV_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::DIVIDE, - locus)); -} - -// Parses a binary modulo-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_mod_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_MOD_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::MODULUS, - locus)); -} - -// Parses a binary and-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_and_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_AMP_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::BITWISE_AND, - locus)); -} - -// Parses a binary or-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_or_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_PIPE_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::BITWISE_OR, - locus)); -} - -// Parses a binary xor-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_xor_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_CARET_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::BITWISE_XOR, - locus)); -} - -// Parses a binary left shift-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_left_shift_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_L_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::LEFT_SHIFT, - locus)); -} - -// Parses a binary right shift-assignment expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_right_shift_assig_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_R_SHIFT_ASSIG - 1, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: ensure right-associativity for this - 'LBP - 1' may do this? - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::CompoundAssignmentExpr (std::move (left), std::move (right), - CompoundAssignmentOperator::RIGHT_SHIFT, - locus)); -} - -// Parses a postfix unary await expression (with Pratt parsing). -template -std::unique_ptr -Parser::parse_await_expr ( - const_TokenPtr tok, std::unique_ptr expr_to_await, - AST::AttrVec outer_attrs) -{ - /* skip "await" identifier (as "." has already been consumed in - * parse_expression) this assumes that the identifier was already identified - * as await */ - if (!skip_token (IDENTIFIER)) - { - Error error (tok->get_locus (), "failed to skip % in await expr " - "- this is probably a deep issue"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - // TODO: check inside async block in semantic analysis - location_t locus = expr_to_await->get_locus (); - - return std::unique_ptr ( - new AST::AwaitExpr (std::move (expr_to_await), std::move (outer_attrs), - locus)); -} - -/* Parses an exclusive range ('..') in left denotation position (i.e. - * RangeFromExpr or RangeFromToExpr). */ -template -std::unique_ptr -Parser::parse_led_range_exclusive_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // FIXME: this probably parses expressions accidently or whatever - // try parsing RHS (as tok has already been consumed in parse_expression) - // Can be nullptr, in which case it is a RangeFromExpr, otherwise a - // RangeFromToExpr. - restrictions.expr_can_be_null = true; - std::unique_ptr right - = parse_expr (LBP_DOT_DOT, AST::AttrVec (), restrictions); - - location_t locus = left->get_locus (); - - if (right == nullptr) - { - // range from expr - return std::unique_ptr ( - new AST::RangeFromExpr (std::move (left), locus)); - } - else - { - return std::unique_ptr ( - new AST::RangeFromToExpr (std::move (left), std::move (right), locus)); - } - // FIXME: make non-associative -} - -/* Parses an exclusive range ('..') in null denotation position (i.e. - * RangeToExpr or RangeFullExpr). */ -template -std::unique_ptr -Parser::parse_nud_range_exclusive_expr ( - const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) -{ - auto restrictions = ParseRestrictions (); - restrictions.expr_can_be_null = true; - - // FIXME: this probably parses expressions accidently or whatever - // try parsing RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_DOT_DOT, AST::AttrVec (), restrictions); - - location_t locus = tok->get_locus (); - - if (right == nullptr) - { - // range from expr - return std::unique_ptr ( - new AST::RangeFullExpr (locus)); - } - else - { - return std::unique_ptr ( - new AST::RangeToExpr (std::move (right), locus)); - } - // FIXME: make non-associative -} - -// Parses a full binary range inclusive expression. -template -std::unique_ptr -Parser::parse_range_inclusive_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr left, - AST::AttrVec outer_attrs ATTRIBUTE_UNUSED, ParseRestrictions restrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right - = parse_expr (LBP_DOT_DOT_EQ, AST::AttrVec (), restrictions); - if (right == nullptr) - return nullptr; - // FIXME: make non-associative - - // TODO: check types. actually, do so during semantic analysis - location_t locus = left->get_locus (); - - return std::unique_ptr ( - new AST::RangeFromToInclExpr (std::move (left), std::move (right), locus)); -} - -// Parses an inclusive range-to prefix unary expression. -template -std::unique_ptr -Parser::parse_range_to_inclusive_expr ( - const_TokenPtr tok, AST::AttrVec outer_attrs ATTRIBUTE_UNUSED) -{ - // parse RHS (as tok has already been consumed in parse_expression) - std::unique_ptr right = parse_expr (LBP_DOT_DOT_EQ); - if (right == nullptr) - return nullptr; - // FIXME: make non-associative - - // TODO: check types. actually, do so during semantic analysis - - return std::unique_ptr ( - new AST::RangeToInclExpr (std::move (right), tok->get_locus ())); -} - -// Parses a pseudo-binary infix tuple index expression. -template -std::unique_ptr -Parser::parse_tuple_index_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr tuple_expr, - AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) -{ - // parse int literal (as token already skipped) - const_TokenPtr index_tok = expect_token (INT_LITERAL); - if (index_tok == nullptr) - { - return nullptr; - } - std::string index = index_tok->get_str (); - - // convert to integer - if (!index_tok->is_pure_decimal ()) - { - Error error (index_tok->get_locus (), - "tuple index should be a pure decimal literal"); - add_error (std::move (error)); - } - int index_int = atoi (index.c_str ()); - - location_t locus = tuple_expr->get_locus (); - - return std::unique_ptr ( - new AST::TupleIndexExpr (std::move (tuple_expr), index_int, - std::move (outer_attrs), locus)); -} - -// Parses a pseudo-binary infix array (or slice) index expression. -template -std::unique_ptr -Parser::parse_index_expr ( - const_TokenPtr, std::unique_ptr array_expr, - AST::AttrVec outer_attrs, ParseRestrictions) -{ - // parse RHS (as tok has already been consumed in parse_expression) - /*std::unique_ptr index_expr - = parse_expr (LBP_ARRAY_REF, AST::AttrVec (), - restrictions);*/ - // TODO: conceptually, should treat [] as brackets, so just parse all expr - std::unique_ptr index_expr = parse_expr (); - if (index_expr == nullptr) - return nullptr; - - // skip ']' at end of array - if (!skip_token (RIGHT_SQUARE)) - { - // skip somewhere? - return nullptr; - } - - // TODO: check types. actually, do so during semantic analysis - location_t locus = array_expr->get_locus (); - - return std::unique_ptr ( - new AST::ArrayIndexExpr (std::move (array_expr), std::move (index_expr), - std::move (outer_attrs), locus)); -} - -// Parses a pseudo-binary infix struct field access expression. -template -std::unique_ptr -Parser::parse_field_access_expr ( - const_TokenPtr tok ATTRIBUTE_UNUSED, std::unique_ptr struct_expr, - AST::AttrVec outer_attrs, ParseRestrictions restrictions ATTRIBUTE_UNUSED) -{ - /* get field name identifier (assume that this is a field access expr and - * not await, for instance) */ - const_TokenPtr ident_tok = expect_token (IDENTIFIER); - if (ident_tok == nullptr) - return nullptr; - - Identifier ident{ident_tok}; - - location_t locus = struct_expr->get_locus (); - - // TODO: check types. actually, do so during semantic analysis - return std::unique_ptr ( - new AST::FieldAccessExpr (std::move (struct_expr), std::move (ident), - std::move (outer_attrs), locus)); -} - -// Parses a pseudo-binary infix method call expression. -template -std::unique_ptr -Parser::parse_method_call_expr ( - const_TokenPtr tok, std::unique_ptr receiver_expr, - AST::AttrVec outer_attrs, ParseRestrictions) -{ - // parse path expr segment - AST::PathExprSegment segment = parse_path_expr_segment (); - if (segment.is_error ()) - { - Error error (tok->get_locus (), - "failed to parse path expr segment of method call expr"); - add_error (std::move (error)); - - return nullptr; - } - - // skip left parentheses - if (!skip_token (LEFT_PAREN)) - { - return nullptr; - } - - // parse method params (if they exist) - std::vector> params; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_PAREN) - { - std::unique_ptr param = parse_expr (); - if (param == nullptr) - { - Error error (t->get_locus (), - "failed to parse method param in method call"); - add_error (std::move (error)); - - return nullptr; - } - params.push_back (std::move (param)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - t = lexer.peek_token (); - } - - // skip right paren - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - - // TODO: check types. actually do so in semantic analysis pass. - location_t locus = receiver_expr->get_locus (); - - return std::unique_ptr ( - new AST::MethodCallExpr (std::move (receiver_expr), std::move (segment), - std::move (params), std::move (outer_attrs), - locus)); -} - -// Parses a pseudo-binary infix function call expression. -template -std::unique_ptr -Parser::parse_function_call_expr ( - const_TokenPtr, std::unique_ptr function_expr, - AST::AttrVec outer_attrs, ParseRestrictions) -{ - // parse function params (if they exist) - std::vector> params; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_PAREN) - { - std::unique_ptr param = parse_expr (); - if (param == nullptr) - { - Error error (t->get_locus (), - "failed to parse function param in function call"); - add_error (std::move (error)); - - return nullptr; - } - params.push_back (std::move (param)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - t = lexer.peek_token (); - } - - // skip ')' at end of param list - if (!skip_token (RIGHT_PAREN)) - { - // skip somewhere? - return nullptr; - } - - // TODO: check types. actually, do so during semantic analysis - location_t locus = function_expr->get_locus (); - - return std::unique_ptr ( - new AST::CallExpr (std::move (function_expr), std::move (params), - std::move (outer_attrs), locus)); -} - -/* Parses a macro invocation with a path in expression already parsed (but not - * '!' token). */ -template -std::unique_ptr -Parser::parse_macro_invocation_partial ( - AST::PathInExpression path, AST::AttrVec outer_attrs, - ParseRestrictions restrictions) -{ - // macro invocation - if (!skip_token (EXCLAM)) - { - return nullptr; - } - - // convert PathInExpression to SimplePath - if this isn't possible, error - AST::SimplePath converted_path = path.as_simple_path (); - if (converted_path.is_empty ()) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse simple path in macro invocation"); - add_error (std::move (error)); - - return nullptr; - } - - auto tok_tree = parse_delim_token_tree (); - if (!tok_tree) - return nullptr; - - rust_debug ("successfully parsed macro invocation (via partial)"); - - location_t macro_locus = converted_path.get_locus (); - - return AST::MacroInvocation::Regular ( - AST::MacroInvocData (std::move (converted_path), - std::move (tok_tree.value ())), - std::move (outer_attrs), macro_locus); -} - -/* Parses a struct expr struct with a path in expression already parsed (but - * not - * '{' token). */ -template -std::unique_ptr -Parser::parse_struct_expr_struct_partial ( - AST::PathInExpression path, AST::AttrVec outer_attrs) -{ - // assume struct expr struct (as struct-enum disambiguation requires name - // lookup) again, make statement if final ';' - if (!skip_token (LEFT_CURLY)) - { - return nullptr; - } - - // parse inner attributes - AST::AttrVec inner_attrs = parse_inner_attributes (); - - // branch based on next token - const_TokenPtr t = lexer.peek_token (); - location_t path_locus = path.get_locus (); - switch (t->get_id ()) - { - case RIGHT_CURLY: - // struct with no body - lexer.skip_token (); - - return std::unique_ptr ( - new AST::StructExprStruct (std::move (path), std::move (inner_attrs), - std::move (outer_attrs), path_locus)); - case DOT_DOT: - /* technically this would give a struct base-only struct, but this - * algorithm should work too. As such, AST type not happening. */ - case IDENTIFIER: - case HASH: - case INT_LITERAL: - { - // struct with struct expr fields - - // parse struct expr fields - std::vector> fields; - - while (t->get_id () != RIGHT_CURLY && t->get_id () != DOT_DOT) - { - std::unique_ptr field - = parse_struct_expr_field (); - if (field == nullptr) - { - Error error (t->get_locus (), - "failed to parse struct (or enum) expr field"); - add_error (std::move (error)); - - return nullptr; - } - - // DEBUG: - rust_debug ("struct/enum expr field validated to not be null"); - - fields.push_back (std::move (field)); - - // DEBUG: - rust_debug ("struct/enum expr field pushed back"); - - if (lexer.peek_token ()->get_id () != COMMA) - { - // DEBUG: - rust_debug ("lack of comma detected in struct/enum expr " - "fields - break"); - break; - } - lexer.skip_token (); - - // DEBUG: - rust_debug ("struct/enum expr fields comma skipped "); - - t = lexer.peek_token (); - } - - // DEBUG: - rust_debug ("struct/enum expr about to parse struct base "); - - // parse struct base if it exists - AST::StructBase struct_base = AST::StructBase::error (); - if (lexer.peek_token ()->get_id () == DOT_DOT) - { - location_t dot_dot_location = lexer.peek_token ()->get_locus (); - lexer.skip_token (); - - // parse required struct base expr - std::unique_ptr base_expr = parse_expr (); - if (base_expr == nullptr) - { - Error error (lexer.peek_token ()->get_locus (), - "failed to parse struct base expression in struct " - "expression"); - add_error (std::move (error)); - - return nullptr; - } - - // DEBUG: - rust_debug ("struct/enum expr - parsed and validated base expr"); - - struct_base - = AST::StructBase (std::move (base_expr), dot_dot_location); - - // DEBUG: - rust_debug ("assigned struct base to new struct base "); - } - - if (!skip_token (RIGHT_CURLY)) - { - return nullptr; - } - - // DEBUG: - rust_debug ( - "struct/enum expr skipped right curly - done and ready to return"); - - return std::unique_ptr ( - new AST::StructExprStructFields (std::move (path), std::move (fields), - path_locus, std::move (struct_base), - std::move (inner_attrs), - std::move (outer_attrs))); - } - default: - add_error ( - Error (t->get_locus (), - "unrecognised token %qs in struct (or enum) expression - " - "expected %<}%>, identifier, integer literal, or %<..%>", - t->get_token_description ())); - - return nullptr; - } -} - -/* Parses a struct expr tuple with a path in expression already parsed (but - * not - * '(' token). - * FIXME: this currently outputs a call expr, as they cannot be disambiguated. - * A better solution would be to just get this to call that function directly. - * */ -template -std::unique_ptr -Parser::parse_struct_expr_tuple_partial ( - AST::PathInExpression path, AST::AttrVec outer_attrs) -{ - if (!skip_token (LEFT_PAREN)) - { - return nullptr; - } - - AST::AttrVec inner_attrs = parse_inner_attributes (); - - std::vector> exprs; - - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != RIGHT_PAREN) - { - // parse expression (required) - std::unique_ptr expr = parse_expr (); - if (expr == nullptr) - { - Error error (t->get_locus (), "failed to parse expression in " - "struct (or enum) expression tuple"); - add_error (std::move (error)); - - return nullptr; - } - exprs.push_back (std::move (expr)); - - if (lexer.peek_token ()->get_id () != COMMA) - break; - - lexer.skip_token (); - - t = lexer.peek_token (); - } - - if (!skip_token (RIGHT_PAREN)) - { - return nullptr; - } - - location_t path_locus = path.get_locus (); - - auto pathExpr = std::unique_ptr ( - new AST::PathInExpression (std::move (path))); - - return std::unique_ptr ( - new AST::CallExpr (std::move (pathExpr), std::move (exprs), - std::move (outer_attrs), path_locus)); -} - -// Parses a closure expression with pratt parsing (from null denotation). -template -std::unique_ptr -Parser::parse_closure_expr_pratt (const_TokenPtr tok, - AST::AttrVec outer_attrs) -{ - // TODO: does this need pratt parsing (for precedence)? probably not, but - // idk - location_t locus = tok->get_locus (); - bool has_move = false; - if (tok->get_id () == MOVE) - { - has_move = true; - tok = lexer.peek_token (); - lexer.skip_token (); - // skip token and reassign - } - - // handle parameter list - std::vector params; - - switch (tok->get_id ()) - { - case OR: - // no parameters, don't skip token - break; - case PIPE: - { - // actually may have parameters - // don't skip token - const_TokenPtr t = lexer.peek_token (); - while (t->get_id () != PIPE) - { - AST::ClosureParam param = parse_closure_param (); - if (param.is_error ()) - { - // TODO is this really an error? - Error error (t->get_locus (), "could not parse closure param"); - add_error (std::move (error)); - - return nullptr; - } - params.push_back (std::move (param)); - - if (lexer.peek_token ()->get_id () != COMMA) - { - if (lexer.peek_token ()->get_id () == OR) - lexer.split_current_token (PIPE, PIPE); - // not an error but means param list is done - break; - } - // skip comma - lexer.skip_token (); - - if (lexer.peek_token ()->get_id () == OR) - lexer.split_current_token (PIPE, PIPE); - - t = lexer.peek_token (); - } - - if (!skip_token (PIPE)) - { - return nullptr; - } - break; - } - default: - add_error (Error (tok->get_locus (), - "unexpected token %qs in closure expression - expected " - "%<|%> or %<||%>", - tok->get_token_description ())); - - // skip somewhere? - return nullptr; - } - - // again branch based on next token - tok = lexer.peek_token (); - if (tok->get_id () == RETURN_TYPE) - { - // must be return type closure with block expr - - // skip "return type" token - lexer.skip_token (); - - // parse actual type, which is required - std::unique_ptr type = parse_type_no_bounds (); - if (type == nullptr) - { - // error - Error error (tok->get_locus (), "failed to parse type for closure"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - // parse block expr, which is required - std::unique_ptr block = parse_block_expr (); - if (block == nullptr) - { - // error - Error error (lexer.peek_token ()->get_locus (), - "failed to parse block expr in closure"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ClosureExprInnerTyped (std::move (type), std::move (block), - std::move (params), locus, has_move, - std::move (outer_attrs))); - } - else - { - // must be expr-only closure - - // parse expr, which is required - std::unique_ptr expr = parse_expr (); - if (expr == nullptr) - { - Error error (tok->get_locus (), - "failed to parse expression in closure"); - add_error (std::move (error)); - - // skip somewhere? - return nullptr; - } - - return std::unique_ptr ( - new AST::ClosureExprInner (std::move (expr), std::move (params), locus, - has_move, std::move (outer_attrs))); - } -} - -// Returns true if the next token is END, ELSE, or EOF; -template -bool -Parser::done_end_or_else () -{ - const_TokenPtr t = lexer.peek_token (); - return (t->get_id () == RIGHT_CURLY || t->get_id () == ELSE - || t->get_id () == END_OF_FILE); -} - -// Returns true if the next token is END or EOF. -template -bool -Parser::done_end () -{ - const_TokenPtr t = lexer.peek_token (); - return (t->get_id () == RIGHT_CURLY || t->get_id () == END_OF_FILE); -} -} // namespace Rust diff --git a/gcc/rust/parse/rust-parse-impl.hxx b/gcc/rust/parse/rust-parse-impl.hxx new file mode 100644 index 00000000000..4ad4f834336 --- /dev/null +++ b/gcc/rust/parse/rust-parse-impl.hxx @@ -0,0 +1,7205 @@ +// Copyright (C) 2020-2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +/* Template implementation for Rust::Parser. Previously in rust-parse.cc (before + * Parser was template). Separated from rust-parse.h for readability. */ + +/* DO NOT INCLUDE ANYWHERE - this is automatically included + * by rust-parse-impl-*.cc + * This is also the reason why there are no include guards. */ + +#include "expected.h" +#include "rust-ast.h" +#include "rust-common.h" +#include "rust-expr.h" +#include "rust-item.h" +#include "rust-common.h" +#include "rust-parse.h" +#include "rust-token.h" +#define INCLUDE_ALGORITHM +#include "rust-diagnostics.h" +#include "rust-dir-owner.h" +#include "rust-keyword-values.h" +#include "rust-edition.h" +#include "rust-parse-error.h" + +#include "optional.h" + +namespace Rust { + +/* HACK-y special handling for skipping a right angle token at the end of + * generic arguments. + * Currently, this replaces the "current token" with one that is identical + * except has the leading '>' removed (e.g. '>>' becomes '>'). This is bad + * for several reasons - it modifies the token stream to something that + * actually doesn't make syntactic sense, it may not worked if the token + * has already been skipped, etc. It was done because it would not + * actually require inserting new items into the token stream (which I + * thought would take more work to not mess up) and because I wasn't sure + * if the "already seen right angle" flag in the parser would work + * correctly. + * Those two other approaches listed are in my opinion actually better + * long-term - insertion is probably best as it reflects syntactically + * what occurs. On the other hand, I need to do a code audit to make sure + * that insertion doesn't mess anything up. So that's a FIXME. */ +template +bool +Parser::skip_generics_right_angle () +{ + /* OK, new great idea. Have a lexer method called + * "split_current_token(TokenType newLeft, TokenType newRight)", which is + * called here with whatever arguments are appropriate. That lexer method + * handles "replacing" the current token with the "newLeft" and "inserting" + * the next token with the "newRight" (and creating a location, etc. for it) + */ + + /* HACK: special handling for right shift '>>', greater or equal '>=', and + * right shift assig */ + // '>>=' + const_TokenPtr tok = lexer.peek_token (); + switch (tok->get_id ()) + { + case RIGHT_ANGLE: + // this is good - skip token + lexer.skip_token (); + return true; + case RIGHT_SHIFT: + { + // new implementation that should be better + lexer.split_current_token (RIGHT_ANGLE, RIGHT_ANGLE); + lexer.skip_token (); + return true; + } + case GREATER_OR_EQUAL: + { + // new implementation that should be better + lexer.split_current_token (RIGHT_ANGLE, EQUAL); + lexer.skip_token (); + return true; + } + case RIGHT_SHIFT_EQ: + { + // new implementation that should be better + lexer.split_current_token (RIGHT_ANGLE, GREATER_OR_EQUAL); + lexer.skip_token (); + return true; + } + default: + add_error (Error (tok->get_locus (), + "expected %<>%> at end of generic argument - found %qs", + tok->get_token_description ())); + return false; + } +} + +/* Gets left binding power for specified token. + * Not suitable for use at the moment or possibly ever because binding power + * cannot be purely determined from operator token with Rust grammar - e.g. + * method call and field access have + * different left binding powers but the same operator token. */ +template +int +Parser::left_binding_power (const_TokenPtr token) +{ + // HACK: called with "peek_token()", so lookahead is "peek_token(1)" + switch (token->get_id ()) + { + /* TODO: issue here - distinguish between method calls and field access + * somehow? Also would have to distinguish between paths and function + * calls (:: operator), maybe more stuff. */ + /* Current plan for tackling LBP - don't do it based on token, use + * lookahead. Or alternatively, only use Pratt parsing for OperatorExpr + * and handle other expressions without it. rustc only considers + * arithmetic, logical/relational, 'as', + * '?=', ranges, colons, and assignment to have operator precedence and + * associativity rules applicable. It then has + * a separate "ExprPrecedence" that also includes binary operators. */ + + // TODO: handle operator overloading - have a function replace the + // operator? + + /*case DOT: + return LBP_DOT;*/ + + case SCOPE_RESOLUTION: + rust_debug ( + "possible error - looked up LBP of scope resolution operator. should " + "be handled elsewhere."); + return LBP_PATH; + + /* Resolved by lookahead HACK that should work with current code. If next + * token is identifier and token after that isn't parenthesised expression + * list, it is a field reference. */ + case DOT: + if (lexer.peek_token (1)->get_id () == IDENTIFIER + && lexer.peek_token (2)->get_id () != LEFT_PAREN) + { + return LBP_FIELD_EXPR; + } + return LBP_METHOD_CALL; + + case LEFT_PAREN: + return LBP_FUNCTION_CALL; + + case LEFT_SQUARE: + return LBP_ARRAY_REF; + + // postfix question mark (i.e. error propagation expression) + case QUESTION_MARK: + return LBP_QUESTION_MARK; + + case AS: + return LBP_AS; + + case ASTERISK: + return LBP_MUL; + case DIV: + return LBP_DIV; + case PERCENT: + return LBP_MOD; + + case PLUS: + return LBP_PLUS; + case MINUS: + return LBP_MINUS; + + case LEFT_SHIFT: + return LBP_L_SHIFT; + case RIGHT_SHIFT: + return LBP_R_SHIFT; + + // binary & operator + case AMP: + return LBP_AMP; + + // binary ^ operator + case CARET: + return LBP_CARET; + + // binary | operator + case PIPE: + return LBP_PIPE; + + case EQUAL_EQUAL: + return LBP_EQUAL; + case NOT_EQUAL: + return LBP_NOT_EQUAL; + case RIGHT_ANGLE: + return LBP_GREATER_THAN; + case GREATER_OR_EQUAL: + return LBP_GREATER_EQUAL; + case LEFT_ANGLE: + return LBP_SMALLER_THAN; + case LESS_OR_EQUAL: + return LBP_SMALLER_EQUAL; + + case LOGICAL_AND: + return LBP_LOGICAL_AND; + + case OR: + return LBP_LOGICAL_OR; + + case DOT_DOT: + return LBP_DOT_DOT; + + case DOT_DOT_EQ: + return LBP_DOT_DOT_EQ; + + case EQUAL: + return LBP_ASSIG; + case PLUS_EQ: + return LBP_PLUS_ASSIG; + case MINUS_EQ: + return LBP_MINUS_ASSIG; + case ASTERISK_EQ: + return LBP_MULT_ASSIG; + case DIV_EQ: + return LBP_DIV_ASSIG; + case PERCENT_EQ: + return LBP_MOD_ASSIG; + case AMP_EQ: + return LBP_AMP_ASSIG; + case PIPE_EQ: + return LBP_PIPE_ASSIG; + case CARET_EQ: + return LBP_CARET_ASSIG; + case LEFT_SHIFT_EQ: + return LBP_L_SHIFT_ASSIG; + case RIGHT_SHIFT_EQ: + return LBP_R_SHIFT_ASSIG; + + /* HACK: float literal due to lexer misidentifying a dot then an integer as + * a float */ + case FLOAT_LITERAL: + return LBP_FIELD_EXPR; + // field expr is same as tuple expr in precedence, i imagine + // TODO: is this needed anymore? lexer shouldn't do that anymore + + // anything that can't appear in an infix position is given lowest priority + default: + return LBP_LOWEST; + } +} + +// Returns true when current token is EOF. +template +bool +Parser::done_end_of_file () +{ + return lexer.peek_token ()->get_id () == END_OF_FILE; +} + +// Parses a sequence of items within a module or the implicit top-level module +// in a crate +template +tl::expected>, Parse::Error::Items> +Parser::parse_items () +{ + std::vector> items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != END_OF_FILE) + { + auto item = parse_item (false); + if (!item) + return Parse::Error::Items::make_malformed (std::move (items)); + + items.push_back (std::move (item.value ())); + + t = lexer.peek_token (); + } + + // GCC 5->7 bug doesn't threat lvalue as an rvalue for the overload +#if __GNUC__ <= 7 + return std::move (items); +#else + return items; +#endif +} + +// Parses a crate (compilation unit) - entry point +template +std::unique_ptr +Parser::parse_crate () +{ + // parse inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse items + auto items + = parse_items ().value_or (std::vector>{}); + + // emit all errors + for (const auto &error : error_table) + error.emit (); + + return std::unique_ptr ( + new AST::Crate (std::move (items), std::move (inner_attrs))); +} + +// Parses an identifier/keyword as a Token +template +tl::expected, Parse::Error::Token> +Parser::parse_identifier_or_keyword_token () +{ + const_TokenPtr t = lexer.peek_token (); + + if (t->get_id () == IDENTIFIER || token_id_is_keyword (t->get_id ())) + { + lexer.skip_token (); + return std::unique_ptr (new AST::Token (std::move (t))); + } + else + { + add_error (Error (t->get_locus (), "expected keyword or identifier")); + return Parse::Error::Token::make_malformed (); + } +} + +template +bool +Parser::is_macro_rules_def (const_TokenPtr t) +{ + auto macro_name = lexer.peek_token (2)->get_id (); + + bool allowed_macro_name = (macro_name == IDENTIFIER || macro_name == TRY); + + return t->get_str () == Values::WeakKeywords::MACRO_RULES + && lexer.peek_token (1)->get_id () == EXCLAM && allowed_macro_name; +} + +// Parses a single item +template +tl::expected, Parse::Error::Item> +Parser::parse_item (bool called_from_statement) +{ + // has a "called_from_statement" parameter for better error message handling + + // TODO: GCC 5 does not handle implicit return type correctly so we're forced + // to specify it almost every time until the baseline GCC gets bumped. + // Since this type is quite long and the code is dense we use an alias. + // + // When support for GCC 5 stops: remove this alias as well as the explicit + // ctor calls. + using RType = tl::expected, Parse::Error::Item>; + + // parse outer attributes for item + AST::AttrVec outer_attrs = parse_outer_attributes (); + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case END_OF_FILE: + // not necessarily an error, unless we just read outer + // attributes which needs to be attached + if (!outer_attrs.empty ()) + { + Rust::AST::Attribute attr = outer_attrs.back (); + Error error (attr.get_locus (), + "expected item after outer attribute or doc comment"); + add_error (std::move (error)); + } + return Parse::Error::Item::make_end_of_file (); + + case ASYNC: + case PUB: + case MOD: + case EXTERN_KW: + case USE: + case FN_KW: + case TYPE: + case STRUCT_KW: + case ENUM_KW: + case CONST: + case STATIC_KW: + case AUTO: + case TRAIT: + case IMPL: + case MACRO: + /* TODO: implement union keyword but not really because of + * context-dependence crappy hack way to parse a union written below to + * separate it from the good code. */ + // case UNION: + case UNSAFE: // maybe - unsafe traits are a thing + // if any of these (should be all possible VisItem prefixes), parse a + // VisItem + { + auto vis_item = parse_vis_item (std::move (outer_attrs)); + if (!vis_item) + return Parse::Error::Item::make_malformed (); + return RType{std::move (vis_item)}; + } + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: + // almost certainly macro invocation semi + { + auto macro_invoc_semi + = parse_macro_invocation_semi (std::move (outer_attrs)); + if (!macro_invoc_semi) + return Parse::Error::Item::make_malformed (); + return RType{std::move (macro_invoc_semi)}; + } + // crappy hack to do union "keyword" + case IDENTIFIER: + // TODO: ensure std::string and literal comparison works + if (t->get_str () == Values::WeakKeywords::UNION + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + auto vis_item = parse_vis_item (std::move (outer_attrs)); + if (!vis_item) + return Parse::Error::Item::make_malformed (); + return RType{std::move (vis_item)}; + // or should this go straight to parsing union? + } + else if (t->get_str () == Values::WeakKeywords::DEFAULT + && lexer.peek_token (1)->get_id () != EXCLAM) + { + add_error (Error (t->get_locus (), + "%qs is only allowed on items within %qs blocks", + "default", "impl")); + return Parse::Error::Item::make_malformed (); + } + else if (is_macro_rules_def (t)) + { + // macro_rules! macro item + auto macro_rule_def = parse_macro_rules_def (std::move (outer_attrs)); + if (!macro_rule_def) + return Parse::Error::Item::make_malformed (); + return RType{std::move (macro_rule_def)}; + } + else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION + || lexer.peek_token (1)->get_id () == EXCLAM) + { + /* path (probably) or macro invocation, so probably a macro invocation + * semi */ + auto macro_invocation_semi + = parse_macro_invocation_semi (std::move (outer_attrs)); + if (!macro_invocation_semi) + return Parse::Error::Item::make_malformed (); + return RType{std::move (macro_invocation_semi)}; + } + gcc_fallthrough (); + default: + // otherwise unrecognised + add_error (Error (t->get_locus (), + "unrecognised token %qs for start of %s", + t->get_token_description (), + called_from_statement ? "statement" : "item")); + + // skip somewhere? + return Parse::Error::Item::make_malformed (); + break; + } +} + +// Parses a VisItem (item that can have non-default visibility). +template +std::unique_ptr +Parser::parse_vis_item (AST::AttrVec outer_attrs) +{ + // parse visibility, which may or may not exist + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); + + // select VisItem to create depending on keyword + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case MOD: + return parse_module (std::move (vis), std::move (outer_attrs)); + case EXTERN_KW: + // lookahead to resolve syntactical production + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case CRATE: + return parse_extern_crate (std::move (vis), std::move (outer_attrs)); + case FN_KW: // extern function + return parse_function (std::move (vis), std::move (outer_attrs)); + case LEFT_CURLY: // extern block + return parse_extern_block (std::move (vis), std::move (outer_attrs)); + case STRING_LITERAL: // for specifying extern ABI + // could be extern block or extern function, so more lookahead + t = lexer.peek_token (2); + + switch (t->get_id ()) + { + case FN_KW: + return parse_function (std::move (vis), std::move (outer_attrs)); + case LEFT_CURLY: + return parse_extern_block (std::move (vis), + std::move (outer_attrs)); + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of extern production", + t->get_token_description ())); + + lexer.skip_token (2); // TODO: is this right thing to do? + return nullptr; + } + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of extern production", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + case USE: + return parse_use_decl (std::move (vis), std::move (outer_attrs)); + case FN_KW: + return parse_function (std::move (vis), std::move (outer_attrs)); + case TYPE: + return parse_type_alias (std::move (vis), std::move (outer_attrs)); + case STRUCT_KW: + return parse_struct (std::move (vis), std::move (outer_attrs)); + case ENUM_KW: + return parse_enum (std::move (vis), std::move (outer_attrs)); + // TODO: implement union keyword but not really because of + // context-dependence case UNION: crappy hack to do union "keyword" + case IDENTIFIER: + if (t->get_str () == Values::WeakKeywords::UNION + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + return parse_union (std::move (vis), std::move (outer_attrs)); + // or should item switch go straight to parsing union? + } + else + { + break; + } + case CONST: + // lookahead to resolve syntactical production + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (std::move (vis), std::move (outer_attrs)); + case ASYNC: + return parse_async_item (std::move (vis), std::move (outer_attrs)); + case UNSAFE: + case EXTERN_KW: + case FN_KW: + return parse_function (std::move (vis), std::move (outer_attrs)); + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of const production", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + // for async functions + case ASYNC: + return parse_async_item (std::move (vis), std::move (outer_attrs)); + + case STATIC_KW: + return parse_static_item (std::move (vis), std::move (outer_attrs)); + case AUTO: + case TRAIT: + return parse_trait (std::move (vis), std::move (outer_attrs)); + case IMPL: + return parse_impl (std::move (vis), std::move (outer_attrs)); + case UNSAFE: // unsafe traits, unsafe functions, unsafe impls (trait impls), + // lookahead to resolve syntactical production + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case AUTO: + case TRAIT: + return parse_trait (std::move (vis), std::move (outer_attrs)); + case EXTERN_KW: + case FN_KW: + return parse_function (std::move (vis), std::move (outer_attrs)); + case IMPL: + return parse_impl (std::move (vis), std::move (outer_attrs)); + case MOD: + return parse_module (std::move (vis), std::move (outer_attrs)); + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in some sort of unsafe production", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + case MACRO: + return parse_decl_macro_def (std::move (vis), std::move (outer_attrs)); + default: + // otherwise vis item clearly doesn't exist, which is not an error + // has a catch-all post-switch return to allow other breaks to occur + break; + } + return nullptr; +} + +template +std::unique_ptr +Parser::parse_async_item (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + auto offset = (lexer.peek_token ()->get_id () == CONST) ? 1 : 0; + const_TokenPtr t = lexer.peek_token (offset); + + if (get_rust_edition () == Edition::E2015) + { + add_error (Error (t->get_locus (), ErrorCode::E0670, + "% is not permitted in Rust 2015")); + add_error ( + Error::Hint (t->get_locus (), + "to use %, switch to Rust 2018 or later")); + } + + t = lexer.peek_token (offset + 1); + + switch (t->get_id ()) + { + case UNSAFE: + case FN_KW: + return parse_function (std::move (vis), std::move (outer_attrs)); + + default: + add_error ( + Error (t->get_locus (), "expected item, found keyword %")); + + lexer.skip_token (1); + return nullptr; + } +} + +// Parses a macro rules definition syntax extension whatever thing. +template +std::unique_ptr +Parser::parse_macro_rules_def (AST::AttrVec outer_attrs) +{ + // ensure that first token is identifier saying "macro_rules" + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () != IDENTIFIER + || t->get_str () != Values::WeakKeywords::MACRO_RULES) + { + Error error ( + t->get_locus (), + "macro rules definition does not start with %"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + lexer.skip_token (); + location_t macro_locus = t->get_locus (); + + if (!skip_token (EXCLAM)) + { + // skip after somewhere? + return nullptr; + } + + // parse macro name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier rule_name{ident_tok}; + + // DEBUG + rust_debug ("in macro rules def, about to parse parens."); + + // save delim type to ensure it is reused later + AST::DelimType delim_type = AST::PARENS; + + // Map tokens to DelimType + t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + delim_type = AST::PARENS; + break; + case LEFT_SQUARE: + delim_type = AST::SQUARE; + break; + case LEFT_CURLY: + delim_type = AST::CURLY; + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs - expecting delimiters (for a " + "macro rules definition)", + t->get_token_description ())); + + return nullptr; + } + lexer.skip_token (); + + // parse actual macro rules + std::vector macro_rules; + + // must be at least one macro rule, so parse it + AST::MacroRule initial_rule = parse_macro_rule (); + if (initial_rule.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "required first macro rule in macro rules definition " + "could not be parsed"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + macro_rules.push_back (std::move (initial_rule)); + + // DEBUG + rust_debug ("successfully pushed back initial macro rule"); + + t = lexer.peek_token (); + // parse macro rules + while (t->get_id () == SEMICOLON) + { + // skip semicolon + lexer.skip_token (); + + // don't parse if end of macro rules + if (Parse::Utils::token_id_matches_delims (lexer.peek_token ()->get_id (), + delim_type)) + { + // DEBUG + rust_debug ( + "broke out of parsing macro rules loop due to finding delim"); + + break; + } + + // try to parse next rule + AST::MacroRule rule = parse_macro_rule (); + if (rule.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse macro rule in macro rules definition"); + add_error (std::move (error)); + + return nullptr; + } + + macro_rules.push_back (std::move (rule)); + + // DEBUG + rust_debug ("successfully pushed back another macro rule"); + + t = lexer.peek_token (); + } + + // parse end delimiters + t = lexer.peek_token (); + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) + { + // tokens match opening delimiter, so skip. + lexer.skip_token (); + + if (delim_type != AST::CURLY) + { + // skip semicolon at end of non-curly macro definitions + if (!skip_token (SEMICOLON)) + { + // as this is the end, allow recovery (probably) - may change + return std::unique_ptr ( + AST::MacroRulesDefinition::mbe ( + std::move (rule_name), delim_type, std::move (macro_rules), + std::move (outer_attrs), macro_locus)); + } + } + + return std::unique_ptr ( + AST::MacroRulesDefinition::mbe (std::move (rule_name), delim_type, + std::move (macro_rules), + std::move (outer_attrs), macro_locus)); + } + else + { + // tokens don't match opening delimiters, so produce error + Error error (t->get_locus (), + "unexpected token %qs - expecting closing delimiter %qs " + "(for a macro rules definition)", + t->get_token_description (), + (delim_type == AST::PARENS + ? ")" + : (delim_type == AST::SQUARE ? "]" : "}"))); + add_error (std::move (error)); + + /* return empty macro definiton despite possibly parsing mostly valid one + * - TODO is this a good idea? */ + return nullptr; + } +} + +// Parses a declarative macro 2.0 definition. +template +std::unique_ptr +Parser::parse_decl_macro_def (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + // ensure that first token is identifier saying "macro" + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () != MACRO) + { + Error error ( + t->get_locus (), + "declarative macro definition does not start with %"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + lexer.skip_token (); + location_t macro_locus = t->get_locus (); + + // parse macro name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier rule_name{ident_tok}; + + t = lexer.peek_token (); + if (t->get_id () == LEFT_PAREN) + { + // single definiton of macro rule + // e.g. `macro foo($e:expr) {}` + + // parse macro matcher + location_t locus = lexer.peek_token ()->get_locus (); + AST::MacroMatcher matcher = parse_macro_matcher (); + if (matcher.is_error ()) + return nullptr; + + // check delimiter of macro matcher + if (matcher.get_delim_type () != AST::DelimType::PARENS) + { + Error error (locus, "only parenthesis can be used for a macro " + "matcher in declarative macro definition"); + add_error (std::move (error)); + return nullptr; + } + + location_t transcriber_loc = lexer.peek_token ()->get_locus (); + auto delim_tok_tree = parse_delim_token_tree (); + if (!delim_tok_tree) + return nullptr; + + AST::MacroTranscriber transcriber (delim_tok_tree.value (), + transcriber_loc); + + if (transcriber.get_token_tree ().get_delim_type () + != AST::DelimType::CURLY) + { + Error error (transcriber_loc, + "only braces can be used for a macro transcriber " + "in declarative macro definition"); + add_error (std::move (error)); + return nullptr; + } + + std::vector macro_rules; + macro_rules.emplace_back (std::move (matcher), std::move (transcriber), + locus); + + return std::unique_ptr ( + AST::MacroRulesDefinition::decl_macro (std::move (rule_name), + macro_rules, + std::move (outer_attrs), + macro_locus, vis)); + } + else if (t->get_id () == LEFT_CURLY) + { + // multiple definitions of macro rule separated by comma + // e.g. `macro foo { () => {}, ($e:expr) => {}, }` + + // parse left curly + const_TokenPtr left_curly = expect_token (LEFT_CURLY); + if (left_curly == nullptr) + { + return nullptr; + } + + // parse actual macro rules + std::vector macro_rules; + + // must be at least one macro rule, so parse it + AST::MacroRule initial_rule = parse_macro_rule (); + if (initial_rule.is_error ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "required first macro rule in declarative macro definition " + "could not be parsed"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + macro_rules.push_back (std::move (initial_rule)); + + t = lexer.peek_token (); + // parse macro rules + while (t->get_id () == COMMA) + { + // skip comma + lexer.skip_token (); + + // don't parse if end of macro rules + if (Parse::Utils::token_id_matches_delims ( + lexer.peek_token ()->get_id (), AST::CURLY)) + { + break; + } + + // try to parse next rule + AST::MacroRule rule = parse_macro_rule (); + if (rule.is_error ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse macro rule in declarative macro definition"); + add_error (std::move (error)); + + return nullptr; + } + + macro_rules.push_back (std::move (rule)); + + t = lexer.peek_token (); + } + + // parse right curly + const_TokenPtr right_curly = expect_token (RIGHT_CURLY); + if (right_curly == nullptr) + { + return nullptr; + } + + return std::unique_ptr ( + AST::MacroRulesDefinition::decl_macro (std::move (rule_name), + std::move (macro_rules), + std::move (outer_attrs), + macro_locus, vis)); + } + else + { + add_error (Error (t->get_locus (), + "unexpected token %qs - expecting delimiters " + "(for a declarative macro definiton)", + t->get_token_description ())); + return nullptr; + } +} + +/* Parses a visibility syntactical production (i.e. creating a non-default + * visibility) */ +template +tl::expected +Parser::parse_visibility () +{ + // check for no visibility + if (lexer.peek_token ()->get_id () != PUB) + { + return AST::Visibility::create_private (); + } + + auto vis_loc = lexer.peek_token ()->get_locus (); + lexer.skip_token (); + + // create simple pub visibility if + // - found no parentheses + // - found unit type `()` + if (lexer.peek_token ()->get_id () != LEFT_PAREN + || lexer.peek_token (1)->get_id () == RIGHT_PAREN) + { + return AST::Visibility::create_public (vis_loc); + // or whatever + } + + lexer.skip_token (); + + const_TokenPtr t = lexer.peek_token (); + auto path_loc = t->get_locus (); + + switch (t->get_id ()) + { + case CRATE: + lexer.skip_token (); + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_crate (path_loc, vis_loc); + case SELF: + lexer.skip_token (); + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_self (path_loc, vis_loc); + case SUPER: + lexer.skip_token (); + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_super (path_loc, vis_loc); + case IN: + { + lexer.skip_token (); + + // parse the "in" path as well + auto path = parse_simple_path (); + if (!path) + { + Error error (lexer.peek_token ()->get_locus (), + "missing path in pub(in path) visibility"); + add_error (std::move (error)); + + // skip after somewhere? + return Parse::Error::Visibility::make_missing_path (); + } + + skip_token (RIGHT_PAREN); + + return AST::Visibility::create_in_path (std::move (path.value ()), + vis_loc); + } + default: + add_error (Error (t->get_locus (), "unexpected token %qs in visibility", + t->get_token_description ())); + + lexer.skip_token (); + return Parse::Error::Visibility::make_malformed (); + } +} + +// Parses a module - either a bodied module or a module defined in another file. +template +std::unique_ptr +Parser::parse_module (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + + Unsafety safety = Unsafety::Normal; + if (lexer.peek_token ()->get_id () == UNSAFE) + { + safety = Unsafety::Unsafe; + skip_token (UNSAFE); + } + + skip_token (MOD); + + const_TokenPtr module_name = expect_token (IDENTIFIER); + if (module_name == nullptr) + { + return nullptr; + } + Identifier name{module_name}; + + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case SEMICOLON: + lexer.skip_token (); + + // Construct an external module + return std::unique_ptr ( + new AST::Module (std::move (name), std::move (vis), + std::move (outer_attrs), locus, safety, + lexer.get_filename (), inline_module_stack)); + case LEFT_CURLY: + { + lexer.skip_token (); + + // parse inner attributes + AST::AttrVec inner_attrs = parse_inner_attributes (); + + std::string default_path = name.as_string (); + + if (inline_module_stack.empty ()) + { + std::string filename = lexer.get_filename (); + auto slash_idx = filename.rfind (file_separator); + if (slash_idx == std::string::npos) + slash_idx = 0; + else + slash_idx++; + filename = filename.substr (slash_idx); + + std::string subdir; + if (get_file_subdir (filename, subdir)) + default_path = subdir + file_separator + name.as_string (); + } + + std::string module_path_name + = extract_module_path (inner_attrs, outer_attrs, default_path); + InlineModuleStackScope scope (*this, std::move (module_path_name)); + + // parse items + std::vector> items; + const_TokenPtr tok = lexer.peek_token (); + while (tok->get_id () != RIGHT_CURLY) + { + auto item = parse_item (false); + if (!item) + { + Error error (tok->get_locus (), + "failed to parse item in module"); + add_error (std::move (error)); + + return nullptr; + } + + items.push_back (std::move (item.value ())); + + tok = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::Module (std::move (name), locus, std::move (items), + std::move (vis), safety, std::move (inner_attrs), + std::move (outer_attrs))); // module name? + } + default: + add_error ( + Error (t->get_locus (), + "unexpected token %qs in module declaration/definition item", + t->get_token_description ())); + + lexer.skip_token (); + return nullptr; + } +} + +// Parses an extern crate declaration (dependency on external crate) +template +std::unique_ptr +Parser::parse_extern_crate (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + if (!skip_token (EXTERN_KW)) + { + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (CRATE)) + { + skip_after_semicolon (); + return nullptr; + } + + /* parse crate reference name - this has its own syntactical rule in reference + * but seems to not be used elsewhere, so i'm putting it here */ + const_TokenPtr crate_name_tok = lexer.peek_token (); + std::string crate_name; + + switch (crate_name_tok->get_id ()) + { + case IDENTIFIER: + crate_name = crate_name_tok->get_str (); + lexer.skip_token (); + break; + case SELF: + crate_name = Values::Keywords::SELF; + lexer.skip_token (); + break; + default: + add_error ( + Error (crate_name_tok->get_locus (), + "expecting crate name (identifier or %), found %qs", + crate_name_tok->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + + // don't parse as clause if it doesn't exist + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + + return std::unique_ptr ( + new AST::ExternCrate (std::move (crate_name), std::move (vis), + std::move (outer_attrs), locus)); + } + + /* parse as clause - this also has its own syntactical rule in reference and + * also seems to not be used elsewhere, so including here again. */ + if (!skip_token (AS)) + { + skip_after_semicolon (); + return nullptr; + } + + const_TokenPtr as_name_tok = lexer.peek_token (); + std::string as_name; + + switch (as_name_tok->get_id ()) + { + case IDENTIFIER: + as_name = as_name_tok->get_str (); + lexer.skip_token (); + break; + case UNDERSCORE: + as_name = Values::Keywords::UNDERSCORE; + lexer.skip_token (); + break; + default: + add_error ( + Error (as_name_tok->get_locus (), + "expecting as clause name (identifier or %<_%>), found %qs", + as_name_tok->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (SEMICOLON)) + { + skip_after_semicolon (); + return nullptr; + } + + return std::unique_ptr ( + new AST::ExternCrate (std::move (crate_name), std::move (vis), + std::move (outer_attrs), locus, std::move (as_name))); +} + +// Parses a use declaration. +template +std::unique_ptr +Parser::parse_use_decl (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + if (!skip_token (USE)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse use tree, which is required + std::unique_ptr use_tree = parse_use_tree (); + if (use_tree == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse use tree in use declaration"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (SEMICOLON)) + { + skip_after_semicolon (); + return nullptr; + } + + return std::unique_ptr ( + new AST::UseDeclaration (std::move (use_tree), std::move (vis), + std::move (outer_attrs), locus)); +} + +// Parses a use tree (which can be recursive and is actually a base class). +template +std::unique_ptr +Parser::parse_use_tree () +{ + /* potential syntax definitions in attempt to get algorithm: + * Glob: + * <- SimplePath :: * + * <- :: * + * <- * + * Nested tree thing: + * <- SimplePath :: { COMPLICATED_INNER_TREE_THING } + * <- :: COMPLICATED_INNER_TREE_THING } + * <- { COMPLICATED_INNER_TREE_THING } + * Rebind thing: + * <- SimplePath as IDENTIFIER + * <- SimplePath as _ + * <- SimplePath + */ + + /* current plan of attack: try to parse SimplePath first - if fails, one of + * top two then try parse :: - if fails, one of top two. Next is deciding + * character for top two. */ + + /* Thus, parsing smaller parts of use tree may require feeding into function + * via parameters (or could handle all in this single function because other + * use tree types aren't recognised as separate in the spec) */ + + // TODO: I think this function is too complex, probably should split it + + location_t locus = lexer.peek_token ()->get_locus (); + + // bool has_path = false; + auto path = parse_simple_path (); + + if (!path) + { + // has no path, so must be glob or nested tree UseTree type + + bool is_global = false; + + // check for global scope resolution operator + if (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) + { + lexer.skip_token (); + is_global = true; + } + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case ASTERISK: + // glob UseTree type + lexer.skip_token (); + + if (is_global) + return std::unique_ptr ( + new AST::UseTreeGlob (AST::UseTreeGlob::GLOBAL, + AST::SimplePath::create_empty (), locus)); + else + return std::unique_ptr ( + new AST::UseTreeGlob (AST::UseTreeGlob::NO_PATH, + AST::SimplePath::create_empty (), locus)); + case LEFT_CURLY: + { + // nested tree UseTree type + lexer.skip_token (); + + std::vector> use_trees; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr use_tree = parse_use_tree (); + if (use_tree == nullptr) + { + break; + } + + use_trees.push_back (std::move (use_tree)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip end curly delimiter + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere? + return nullptr; + } + + if (is_global) + return std::unique_ptr ( + new AST::UseTreeList (AST::UseTreeList::GLOBAL, + AST::SimplePath::create_empty (), + std::move (use_trees), locus)); + else + return std::unique_ptr ( + new AST::UseTreeList (AST::UseTreeList::NO_PATH, + AST::SimplePath::create_empty (), + std::move (use_trees), locus)); + } + case AS: + // this is not allowed + add_error (Error ( + t->get_locus (), + "use declaration with rebind % requires a valid simple path - " + "none found")); + + skip_after_semicolon (); + return nullptr; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in use tree with " + "no valid simple path (i.e. list" + " or glob use tree)", + t->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + } + else + { + const_TokenPtr t = lexer.peek_token (); + + switch (t->get_id ()) + { + case AS: + { + // rebind UseTree type + lexer.skip_token (); + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + // skip lexer token + lexer.skip_token (); + + return std::unique_ptr ( + new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER, + std::move (path.value ()), locus, t)); + case UNDERSCORE: + // skip lexer token + lexer.skip_token (); + + return std::unique_ptr ( + new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD, + std::move (path.value ()), locus, + {Values::Keywords::UNDERSCORE, + t->get_locus ()})); + default: + add_error (Error ( + t->get_locus (), + "unexpected token %qs in use tree with as clause - expected " + "identifier or %<_%>", + t->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + } + case SEMICOLON: + // rebind UseTree type without rebinding - path only + + // don't skip semicolon - handled in parse_use_tree + // lexer.skip_token(); + case COMMA: + case RIGHT_CURLY: + // this may occur in recursive calls - assume it is ok and ignore it + return std::unique_ptr ( + new AST::UseTreeRebind (AST::UseTreeRebind::NONE, + std::move (path.value ()), locus)); + case SCOPE_RESOLUTION: + // keep going + break; + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in use tree with valid path", + t->get_token_description ())); + return nullptr; + } + + skip_token (); + t = lexer.peek_token (); + + switch (t->get_id ()) + { + case ASTERISK: + // glob UseTree type + lexer.skip_token (); + + return std::unique_ptr ( + new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED, + std::move (path.value ()), locus)); + case LEFT_CURLY: + { + // nested tree UseTree type + lexer.skip_token (); + + std::vector> use_trees; + + // TODO: think of better control structure + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr use_tree = parse_use_tree (); + if (use_tree == nullptr) + { + break; + } + + use_trees.push_back (std::move (use_tree)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + // skip end curly delimiter + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED, + std::move (path.value ()), + std::move (use_trees), locus)); + } + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in use tree with valid path", + t->get_token_description ())); + + // skip_after_semicolon(); + return nullptr; + } + } +} + +// Parses a function (not a method). +template +std::unique_ptr +Parser::parse_function (AST::Visibility vis, + AST::AttrVec outer_attrs, + bool is_external) +{ + location_t locus = lexer.peek_token ()->get_locus (); + // Get qualifiers for function if they exist + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_KW); + + // Save function name token + const_TokenPtr function_name_tok = expect_token (IDENTIFIER); + if (function_name_tok == nullptr) + { + skip_after_next_block (); + return nullptr; + } + Identifier function_name{function_name_tok}; + + // parse generic params - if exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "function declaration missing opening parentheses before " + "parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + auto initial_param = parse_self_param (); + + if (!initial_param.has_value () + && initial_param.error () != ParseSelfError::NOT_SELF) + return nullptr; + + if (initial_param.has_value () && lexer.peek_token ()->get_id () == COMMA) + skip_token (); + + // parse function parameters (only if next token isn't right paren) + std::vector> function_params; + + if (lexer.peek_token ()->get_id () != RIGHT_PAREN) + function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (initial_param.has_value ()) + function_params.insert (function_params.begin (), + std::move (*initial_param)); + + if (!skip_token (RIGHT_PAREN)) + { + Error error (lexer.peek_token ()->get_locus (), + "function declaration missing closing parentheses after " + "parameter list"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + // parse function return type - if exists + std::unique_ptr return_type = parse_function_return_type (); + + // parse where clause - if exists + AST::WhereClause where_clause = parse_where_clause (); + + tl::optional> body = tl::nullopt; + if (lexer.peek_token ()->get_id () == SEMICOLON) + lexer.skip_token (); + else + { + std::unique_ptr block_expr = parse_block_expr (); + if (block_expr == nullptr) + return nullptr; + body = std::move (block_expr); + } + + return std::unique_ptr ( + new AST::Function (std::move (function_name), std::move (qualifiers), + std::move (generic_params), std::move (function_params), + std::move (return_type), std::move (where_clause), + std::move (body), std::move (vis), + std::move (outer_attrs), locus, false, is_external)); +} + +// Parses function or method qualifiers (i.e. const, unsafe, and extern). +template +AST::FunctionQualifiers +Parser::parse_function_qualifiers () +{ + Async async_status = Async::No; + Const const_status = Const::No; + Unsafety unsafe_status = Unsafety::Normal; + bool has_extern = false; + std::string abi; + + const_TokenPtr t; + location_t locus; + // Check in order of const, unsafe, then extern + for (int i = 0; i < 2; i++) + { + t = lexer.peek_token (); + locus = t->get_locus (); + + switch (t->get_id ()) + { + case CONST: + lexer.skip_token (); + const_status = Const::Yes; + break; + case ASYNC: + lexer.skip_token (); + async_status = Async::Yes; + break; + default: + // const status is still none + break; + } + } + + if (lexer.peek_token ()->get_id () == UNSAFE) + { + lexer.skip_token (); + unsafe_status = Unsafety::Unsafe; + } + + if (lexer.peek_token ()->get_id () == EXTERN_KW) + { + lexer.skip_token (); + has_extern = true; + + // detect optional abi name + const_TokenPtr next_tok = lexer.peek_token (); + if (next_tok->get_id () == STRING_LITERAL) + { + lexer.skip_token (); + abi = next_tok->get_str (); + } + } + + return AST::FunctionQualifiers (locus, async_status, const_status, + unsafe_status, has_extern, std::move (abi)); +} + +// Parses generic (lifetime or type) params inside angle brackets (optional). +template +std::vector> +Parser::parse_generic_params_in_angles () +{ + if (lexer.peek_token ()->get_id () != LEFT_ANGLE) + { + // seems to be no generic params, so exit with empty vector + return std::vector> (); + } + lexer.skip_token (); + + // DEBUG: + rust_debug ("skipped left angle in generic param"); + + std::vector> generic_params + = parse_generic_params (Parse::Utils::is_right_angle_tok); + + // DEBUG: + rust_debug ("finished parsing actual generic params (i.e. inside angles)"); + + if (!skip_generics_right_angle ()) + { + // DEBUG + rust_debug ("failed to skip generics right angle - returning empty " + "generic params"); + + return std::vector> (); + } + + return generic_params; +} + +template +template +std::unique_ptr +Parser::parse_generic_param (EndTokenPred is_end_token) +{ + auto outer_attrs = parse_outer_attributes (); + std::unique_ptr param; + auto token = lexer.peek_token (); + + switch (token->get_id ()) + { + case LIFETIME: + { + auto lifetime = parse_lifetime (false); + if (!lifetime) + { + rust_error_at ( + token->get_locus (), + "failed to parse lifetime in generic parameter list"); + return nullptr; + } + + std::vector lifetime_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + // parse required bounds + lifetime_bounds + = parse_lifetime_bounds ([is_end_token] (TokenId id) { + return is_end_token (id) || id == COMMA; + }); + } + + param = std::unique_ptr (new AST::LifetimeParam ( + std::move (lifetime.value ()), std::move (lifetime_bounds), + std::move (outer_attrs), token->get_locus ())); + break; + } + case IDENTIFIER: + { + auto type_ident = token->get_str (); + lexer.skip_token (); + + std::vector> type_param_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse optional type param bounds + type_param_bounds = parse_type_param_bounds (); + } + + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + + // parse required type + type = parse_type (); + if (!type) + { + rust_error_at ( + lexer.peek_token ()->get_locus (), + "failed to parse type in type param in generic params"); + return nullptr; + } + } + + param = std::unique_ptr ( + new AST::TypeParam (std::move (type_ident), token->get_locus (), + std::move (type_param_bounds), std::move (type), + std::move (outer_attrs))); + break; + } + case CONST: + { + lexer.skip_token (); + + auto name_token = expect_token (IDENTIFIER); + + if (!name_token || !expect_token (COLON)) + return nullptr; + + auto type = parse_type (); + if (!type) + return nullptr; + + // optional default value + tl::optional default_expr = tl::nullopt; + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + auto tok = lexer.peek_token (); + default_expr = parse_generic_arg (); + + if (!default_expr) + { + rust_error_at (tok->get_locus (), + "invalid token for start of default value for " + "const generic parameter: expected %, " + "% or %, got %qs", + token_id_to_str (tok->get_id ())); + return nullptr; + } + + // At this point, we *know* that we are parsing a const + // expression + if (default_expr.value ().get_kind () + == AST::GenericArg::Kind::Either) + default_expr = default_expr.value ().disambiguate_to_const (); + } + + param = std::unique_ptr ( + new AST::ConstGenericParam (name_token->get_str (), std::move (type), + default_expr, std::move (outer_attrs), + token->get_locus ())); + + break; + } + default: + // FIXME: Can we clean this last call with a method call? + rust_error_at (token->get_locus (), + "unexpected token when parsing generic parameters: %qs", + token->as_string ().c_str ()); + return nullptr; + } + + return param; +} + +/* Parse generic (lifetime or type) params NOT INSIDE ANGLE BRACKETS!!! Almost + * always parse_generic_params_in_angles is what is wanted. */ +template +template +std::vector> +Parser::parse_generic_params (EndTokenPred is_end_token) +{ + std::vector> generic_params; + + /* can't parse lifetime and type params separately due to lookahead issues + * thus, parse them all here */ + + /* HACK: used to retain attribute data if a lifetime param is tentatively + * parsed but it turns out to be type param */ + AST::Attribute parsed_outer_attr = AST::Attribute::create_empty (); + + // Did we parse a generic type param yet + auto type_seen = false; + // Did we parse a const param with a default value yet + auto const_with_default_seen = false; + // Did the user write a lifetime parameter after a type one + auto order_error = false; + // Did the user write a const param with a default value after a type one + auto const_with_default_order_error = false; + + // parse lifetime params + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + auto param = parse_generic_param (is_end_token); + if (param) + { + if (param->get_kind () == AST::GenericParam::Kind::Type) + { + type_seen = true; + if (const_with_default_seen) + const_with_default_order_error = true; + } + else if (param->get_kind () == AST::GenericParam::Kind::Lifetime + && type_seen) + { + order_error = true; + if (const_with_default_seen) + const_with_default_order_error = true; + } + else if (param->get_kind () == AST::GenericParam::Kind::Const) + { + type_seen = true; + AST::ConstGenericParam *const_param + = static_cast (param.get ()); + if (const_param->has_default_value ()) + const_with_default_seen = true; + else if (const_with_default_seen) + const_with_default_order_error = true; + } + + generic_params.emplace_back (std::move (param)); + maybe_skip_token (COMMA); + } + else + break; + } + + // FIXME: Add reordering hint + if (order_error) + { + Error error (generic_params.front ()->get_locus (), + "invalid order for generic parameters: lifetime parameters " + "must be declared prior to type and const parameters"); + add_error (std::move (error)); + } + if (const_with_default_order_error) + { + Error error (generic_params.front ()->get_locus (), + "invalid order for generic parameters: generic parameters " + "with a default must be trailing"); + add_error (std::move (error)); + } + + generic_params.shrink_to_fit (); + return generic_params; +} + +/* Parses lifetime generic parameters (pointers). Will also consume any + * trailing comma. No extra checks for end token. */ +template +std::vector> +Parser::parse_lifetime_params () +{ + std::vector> lifetime_params; + + while (lexer.peek_token ()->get_id () != END_OF_FILE) + { + auto lifetime_param = parse_lifetime_param (); + + if (!lifetime_param) + { + // can't treat as error as only way to get out with trailing comma + break; + } + + lifetime_params.emplace_back ( + new AST::LifetimeParam (std::move (lifetime_param.value ()))); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses lifetime generic parameters (pointers). Will also consume any + * trailing comma. Has extra is_end_token predicate checking. */ +template +template +std::vector> +Parser::parse_lifetime_params (EndTokenPred is_end_token) +{ + std::vector> lifetime_params; + + // if end_token is not specified, it defaults to EOF, so should work fine + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + auto lifetime_param = parse_lifetime_param (); + + if (!lifetime_param) + { + /* TODO: is it worth throwing away all lifetime params just because + * one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime param in lifetime params"); + add_error (std::move (error)); + + return {}; + } + + lifetime_params.emplace_back ( + new AST::LifetimeParam (std::move (lifetime_param))); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses lifetime generic parameters (objects). Will also consume any + * trailing comma. No extra checks for end token. + * TODO: is this best solution? implements most of the same algorithm. + * TODO: seems to be unused, remove? */ +template +std::vector +Parser::parse_lifetime_params_objs () +{ + std::vector lifetime_params; + + // bad control structure as end token cannot be guaranteed + while (true) + { + auto lifetime_param = parse_lifetime_param (); + + if (!lifetime_param) + { + // not an error as only way to exit if trailing comma + break; + } + + lifetime_params.push_back (std::move (lifetime_param.value ())); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses lifetime generic parameters (objects). Will also consume any + * trailing comma. Has extra is_end_token predicate checking. + * TODO: is this best solution? implements most of the same algorithm. */ +template +template +std::vector +Parser::parse_lifetime_params_objs ( + EndTokenPred is_end_token) +{ + std::vector lifetime_params; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + auto lifetime_param = parse_lifetime_param (); + + if (!lifetime_param) + { + /* TODO: is it worth throwing away all lifetime params just because + * one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime param in lifetime params"); + add_error (std::move (error)); + + return {}; + } + + lifetime_params.push_back (std::move (lifetime_param.value ())); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + lifetime_params.shrink_to_fit (); + + return lifetime_params; +} + +/* Parses a sequence of a certain grammar rule in object form (not pointer or + * smart pointer), delimited by commas and ending when 'is_end_token' is + * satisfied (templated). Will also consume any trailing comma. + * FIXME: this cannot be used due to member function pointer problems (i.e. + * parsing_function cannot be specified properly) */ +template +template +auto +Parser::parse_non_ptr_sequence ( + ParseFunction parsing_function, EndTokenPred is_end_token, + std::string error_msg) -> std::vector +{ + std::vector params; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + auto param = parsing_function (); + + if (param.is_error ()) + { + // TODO: is it worth throwing away all params just because one + // failed? + Error error (lexer.peek_token ()->get_locus (), + std::move (error_msg)); + add_error (std::move (error)); + + return {}; + } + + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + params.shrink_to_fit (); + + return params; +} + +/* Parses a single lifetime generic parameter (not including comma). */ +template +tl::expected +Parser::parse_lifetime_param () +{ + // parse outer attributes, which are optional and may not exist + auto outer_attrs = parse_outer_attributes (); + + // save lifetime token - required + const_TokenPtr lifetime_tok = lexer.peek_token (); + if (lifetime_tok->get_id () != LIFETIME) + { + // if lifetime is missing, must not be a lifetime param, so return error + return tl::make_unexpected ({}); + } + lexer.skip_token (); + AST::Lifetime lifetime (AST::Lifetime::NAMED, lifetime_tok->get_str (), + lifetime_tok->get_locus ()); + + // parse lifetime bounds, if it exists + std::vector lifetime_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + // parse lifetime bounds + lifetime_bounds = parse_lifetime_bounds (); + // TODO: have end token passed in? + } + + return AST::LifetimeParam (std::move (lifetime), std::move (lifetime_bounds), + std::move (outer_attrs), + lifetime_tok->get_locus ()); +} + +// Parses type generic parameters. Will also consume any trailing comma. +template +std::vector> +Parser::parse_type_params () +{ + std::vector> type_params; + + // infinite loop with break on failure as no info on ending token + while (true) + { + std::unique_ptr type_param = parse_type_param (); + + if (type_param == nullptr) + { + // break if fails to parse + break; + } + + type_params.push_back (std::move (type_param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + type_params.shrink_to_fit (); + return type_params; +} + +// Parses type generic parameters. Will also consume any trailing comma. +template +template +std::vector> +Parser::parse_type_params (EndTokenPred is_end_token) +{ + std::vector> type_params; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + std::unique_ptr type_param = parse_type_param (); + + if (type_param == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type param in type params"); + add_error (std::move (error)); + + return {}; + } + + type_params.push_back (std::move (type_param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip commas, including trailing commas + lexer.skip_token (); + } + + type_params.shrink_to_fit (); + return type_params; + /* TODO: this shares most code with parse_lifetime_params - good place to + * use template (i.e. parse_non_ptr_sequence if doable) */ +} + +/* Parses a single type (generic) parameter, not including commas. May change + * to return value. */ +template +std::unique_ptr +Parser::parse_type_param () +{ + // parse outer attributes, which are optional and may not exist + auto outer_attrs = parse_outer_attributes (); + + const_TokenPtr identifier_tok = lexer.peek_token (); + if (identifier_tok->get_id () != IDENTIFIER) + { + // return null as type param can't exist without this required + // identifier + return nullptr; + } + Identifier ident{identifier_tok}; + lexer.skip_token (); + + // parse type param bounds (if they exist) + std::vector> type_param_bounds; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse type param bounds, which may or may not exist + type_param_bounds = parse_type_param_bounds (); + } + + // parse type (if it exists) + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + + // parse type (now required) + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in type param"); + add_error (std::move (error)); + + return nullptr; + } + } + + return std::unique_ptr ( + new AST::TypeParam (std::move (ident), identifier_tok->get_locus (), + std::move (type_param_bounds), std::move (type), + std::move (outer_attrs))); +} + +/* Parses regular (i.e. non-generic) parameters in functions or methods. Also + * has end token handling. */ +template +template +std::vector> +Parser::parse_function_params (EndTokenPred is_end_token) +{ + std::vector> params; + + if (is_end_token (lexer.peek_token ()->get_id ())) + return params; + + auto initial_param = parse_function_param (); + + // Return empty parameter list if no parameter there + if (initial_param == nullptr) + { + // TODO: is this an error? + return params; + } + + params.push_back (std::move (initial_param)); + + // maybe think of a better control structure here - do-while with an initial + // error state? basically, loop through parameter list until can't find any + // more params + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + // skip comma if applies + lexer.skip_token (); + + // TODO: strictly speaking, shouldn't there be no trailing comma? + if (is_end_token (lexer.peek_token ()->get_id ())) + break; + + // now, as right paren would break, function param is required + auto param = parse_function_param (); + if (param == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse function param (in function params)"); + add_error (std::move (error)); + + // skip somewhere? + return std::vector> (); + } + + params.push_back (std::move (param)); + + t = lexer.peek_token (); + } + + params.shrink_to_fit (); + return params; +} + +/* Parses a single regular (i.e. non-generic) parameter in a function or + * method, i.e. the "name: type" bit. Also handles it not existing. */ +template +std::unique_ptr +Parser::parse_function_param () +{ + // parse outer attributes if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // TODO: should saved location be at start of outer attributes or pattern? + location_t locus = lexer.peek_token ()->get_locus (); + + if (lexer.peek_token ()->get_id () == ELLIPSIS) // Unnamed variadic + { + lexer.skip_token (); // Skip ellipsis + return std::make_unique ( + AST::VariadicParam (std::move (outer_attrs), locus)); + } + + std::unique_ptr param_pattern = parse_pattern (); + + // create error function param if it doesn't exist + if (param_pattern == nullptr) + { + // skip after something + return nullptr; + } + + if (!skip_token (COLON)) + { + // skip after something + return nullptr; + } + + if (lexer.peek_token ()->get_id () == ELLIPSIS) // Named variadic + { + lexer.skip_token (); // Skip ellipsis + return std::make_unique ( + AST::VariadicParam (std::move (param_pattern), std::move (outer_attrs), + locus)); + } + else + { + std::unique_ptr param_type = parse_type (); + if (param_type == nullptr) + { + return nullptr; + } + return std::make_unique ( + AST::FunctionParam (std::move (param_pattern), std::move (param_type), + std::move (outer_attrs), locus)); + } +} + +/* Parses a function or method return type syntactical construction. Also + * handles a function return type not existing. */ +template +std::unique_ptr +Parser::parse_function_return_type () +{ + if (lexer.peek_token ()->get_id () != RETURN_TYPE) + return nullptr; + + // skip return type, as it now obviously exists + lexer.skip_token (); + + std::unique_ptr type = parse_type (); + + return type; +} + +/* Parses a "where clause" (in a function, struct, method, etc.). Also handles + * a where clause not existing, in which it will return + * WhereClause::create_empty(), which can be checked via + * WhereClause::is_empty(). */ +template +AST::WhereClause +Parser::parse_where_clause () +{ + const_TokenPtr where_tok = lexer.peek_token (); + if (where_tok->get_id () != WHERE) + { + // where clause doesn't exist, so create empty one + return AST::WhereClause::create_empty (); + } + + lexer.skip_token (); + + /* parse where clause items - this is not a separate rule in the reference + * so won't be here */ + std::vector> where_clause_items; + + std::vector for_lifetimes; + if (lexer.peek_token ()->get_id () == FOR) + for_lifetimes = parse_for_lifetimes (); + + /* HACK: where clauses end with a right curly or semicolon or equals in all + * uses currently */ + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != LEFT_CURLY && t->get_id () != SEMICOLON + && t->get_id () != EQUAL) + { + std::unique_ptr where_clause_item + = parse_where_clause_item (for_lifetimes); + + if (where_clause_item == nullptr) + { + Error error (t->get_locus (), "failed to parse where clause item"); + add_error (std::move (error)); + + return AST::WhereClause::create_empty (); + } + + where_clause_items.push_back (std::move (where_clause_item)); + + // also skip comma if it exists + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + where_clause_items.shrink_to_fit (); + return AST::WhereClause (std::move (where_clause_items)); +} + +/* Parses a where clause item (lifetime or type bound). Does not parse any + * commas. */ +template +std::unique_ptr +Parser::parse_where_clause_item ( + const std::vector &outer_for_lifetimes) +{ + // shitty cheat way of determining lifetime or type bound - test for + // lifetime + const_TokenPtr t = lexer.peek_token (); + + if (t->get_id () == LIFETIME) + return parse_lifetime_where_clause_item (); + else + return parse_type_bound_where_clause_item (outer_for_lifetimes); +} + +// Parses a lifetime where clause item. +template +std::unique_ptr +Parser::parse_lifetime_where_clause_item () +{ + auto parsed_lifetime = parse_lifetime (false); + if (!parsed_lifetime) + { + // TODO: error here? + return nullptr; + } + auto lifetime = parsed_lifetime.value (); + + if (!skip_token (COLON)) + { + // TODO: skip after somewhere + return nullptr; + } + + std::vector lifetime_bounds = parse_lifetime_bounds (); + // TODO: have end token passed in? + + location_t locus = lifetime.get_locus (); + + return std::unique_ptr ( + new AST::LifetimeWhereClauseItem (std::move (lifetime), + std::move (lifetime_bounds), locus)); +} + +// Parses a type bound where clause item. +template +std::unique_ptr +Parser::parse_type_bound_where_clause_item ( + const std::vector &outer_for_lifetimes) +{ + std::vector for_lifetimes = outer_for_lifetimes; + + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + return nullptr; + } + + if (!skip_token (COLON)) + { + // TODO: skip after somewhere + return nullptr; + } + + if (lexer.peek_token ()->get_id () == FOR) + { + auto for_lifetimes_inner = parse_for_lifetimes (); + for_lifetimes.insert (for_lifetimes.end (), for_lifetimes_inner.begin (), + for_lifetimes_inner.end ()); + } + + // parse type param bounds if they exist + std::vector> type_param_bounds + = parse_type_param_bounds (); + + location_t locus = lexer.peek_token ()->get_locus (); + + return std::unique_ptr ( + new AST::TypeBoundWhereClauseItem (std::move (for_lifetimes), + std::move (type), + std::move (type_param_bounds), locus)); +} + +// Parses a for lifetimes clause, including the for keyword and angle +// brackets. +template +std::vector +Parser::parse_for_lifetimes () +{ + std::vector params; + + if (!skip_token (FOR)) + { + // skip after somewhere? + return params; + } + + if (!skip_token (LEFT_ANGLE)) + { + // skip after somewhere? + return params; + } + + /* cannot specify end token due to parsing problems with '>' tokens being + * nested */ + params = parse_lifetime_params_objs (Parse::Utils::is_right_angle_tok); + + if (!skip_generics_right_angle ()) + { + // DEBUG + rust_debug ("failed to skip generics right angle after (supposedly) " + "finished parsing where clause items"); + // ok, well this gets called. + + // skip after somewhere? + return params; + } + + return params; +} + +// Parses type parameter bounds in where clause or generic arguments. +template +std::vector> +Parser::parse_type_param_bounds () +{ + std::vector> type_param_bounds; + + std::unique_ptr initial_bound + = parse_type_param_bound (); + + // quick exit if null + if (initial_bound == nullptr) + { + /* error? type param bounds must have at least one term, but are bounds + * optional? */ + return type_param_bounds; + } + type_param_bounds.push_back (std::move (initial_bound)); + + while (lexer.peek_token ()->get_id () == PLUS) + { + lexer.skip_token (); + + std::unique_ptr bound = parse_type_param_bound (); + if (bound == nullptr) + { + /* not an error: bound is allowed to be null as trailing plus is + * allowed */ + return type_param_bounds; + } + + type_param_bounds.push_back (std::move (bound)); + } + + type_param_bounds.shrink_to_fit (); + return type_param_bounds; +} + +/* Parses type parameter bounds in where clause or generic arguments, with end + * token handling. */ +template +template +std::vector> +Parser::parse_type_param_bounds (EndTokenPred is_end_token) +{ + std::vector> type_param_bounds; + + std::unique_ptr initial_bound + = parse_type_param_bound (); + + // quick exit if null + if (initial_bound == nullptr) + { + /* error? type param bounds must have at least one term, but are bounds + * optional? */ + return type_param_bounds; + } + type_param_bounds.push_back (std::move (initial_bound)); + + while (lexer.peek_token ()->get_id () == PLUS) + { + lexer.skip_token (); + + // break if end token character + if (is_end_token (lexer.peek_token ()->get_id ())) + break; + + std::unique_ptr bound = parse_type_param_bound (); + if (bound == nullptr) + { + // TODO how wise is it to ditch all bounds if only one failed? + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type param bound in type param bounds"); + add_error (std::move (error)); + + return {}; + } + + type_param_bounds.push_back (std::move (bound)); + } + + type_param_bounds.shrink_to_fit (); + return type_param_bounds; +} + +/* Parses a single type parameter bound in a where clause or generic argument. + * Does not parse the '+' between arguments. */ +template +std::unique_ptr +Parser::parse_type_param_bound () +{ + // shitty cheat way of determining lifetime or trait bound - test for + // lifetime + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LIFETIME: + return std::unique_ptr ( + new AST::Lifetime (parse_lifetime (false).value ())); + case LEFT_PAREN: + case QUESTION_MARK: + case FOR: + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: + return parse_trait_bound (); + default: + // don't error - assume this is fine TODO + return nullptr; + } +} + +// Parses a trait bound type param bound. +template +std::unique_ptr +Parser::parse_trait_bound () +{ + bool has_parens = false; + bool has_question_mark = false; + + location_t locus = lexer.peek_token ()->get_locus (); + + /* parse optional `for lifetimes`. */ + std::vector for_lifetimes; + if (lexer.peek_token ()->get_id () == FOR) + for_lifetimes = parse_for_lifetimes (); + + // handle trait bound being in parentheses + if (lexer.peek_token ()->get_id () == LEFT_PAREN) + { + has_parens = true; + lexer.skip_token (); + } + + // handle having question mark (optional) + if (lexer.peek_token ()->get_id () == QUESTION_MARK) + { + has_question_mark = true; + lexer.skip_token (); + } + + // handle TypePath + AST::TypePath type_path = parse_type_path (); + + // handle closing parentheses + if (has_parens) + { + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + } + + return std::unique_ptr ( + new AST::TraitBound (std::move (type_path), locus, has_parens, + has_question_mark, std::move (for_lifetimes))); +} + +// Parses lifetime bounds. +template +std::vector +Parser::parse_lifetime_bounds () +{ + std::vector lifetime_bounds; + + while (true) + { + auto lifetime = parse_lifetime (false); + + // quick exit for parsing failure + if (!lifetime) + break; + + lifetime_bounds.push_back (std::move (lifetime.value ())); + + /* plus is maybe not allowed at end - spec defines it weirdly, so + * assuming allowed at end */ + if (lexer.peek_token ()->get_id () != PLUS) + break; + + lexer.skip_token (); + } + + lifetime_bounds.shrink_to_fit (); + return lifetime_bounds; +} + +// Parses lifetime bounds, with added check for ending token. +template +template +std::vector +Parser::parse_lifetime_bounds (EndTokenPred is_end_token) +{ + std::vector lifetime_bounds; + + while (!is_end_token (lexer.peek_token ()->get_id ())) + { + auto lifetime = parse_lifetime (false); + + if (!lifetime) + { + /* TODO: is it worth throwing away all lifetime bound info just + * because one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime in lifetime bounds"); + add_error (std::move (error)); + + return {}; + } + + lifetime_bounds.push_back (std::move (lifetime.value ())); + + /* plus is maybe not allowed at end - spec defines it weirdly, so + * assuming allowed at end */ + if (lexer.peek_token ()->get_id () != PLUS) + break; + + lexer.skip_token (); + } + + lifetime_bounds.shrink_to_fit (); + return lifetime_bounds; +} + +/* Parses a lifetime token (named, 'static, or '_). Also handles lifetime not + * existing. */ +template +tl::expected +Parser::parse_lifetime (bool allow_elided) +{ + const_TokenPtr lifetime_tok = lexer.peek_token (); + if (lifetime_tok->get_id () != LIFETIME) + { + if (allow_elided) + { + return AST::Lifetime::elided (); + } + else + { + return tl::make_unexpected ({}); + } + } + lexer.skip_token (); + + return lifetime_from_token (lifetime_tok); +} + +template +AST::Lifetime +Parser::lifetime_from_token (const_TokenPtr tok) +{ + location_t locus = tok->get_locus (); + std::string lifetime_ident = tok->get_str (); + + if (lifetime_ident == "static") + { + return AST::Lifetime (AST::Lifetime::STATIC, "", locus); + } + else if (lifetime_ident == "_") + { + // Explicitly and implicitly elided lifetimes follow the same rules. + return AST::Lifetime (AST::Lifetime::WILDCARD, "", locus); + } + else + { + return AST::Lifetime (AST::Lifetime::NAMED, std::move (lifetime_ident), + locus); + } +} + +template +std::unique_ptr +Parser::parse_external_type_item (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (TYPE); + + const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); + if (alias_name_tok == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse identifier in external opaque type"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (SEMICOLON)) + return nullptr; + + return std::unique_ptr ( + new AST::ExternalTypeItem (alias_name_tok->get_str (), std::move (vis), + std::move (outer_attrs), std::move (locus))); +} + +// Parses a "type alias" (typedef) item. +template +std::unique_ptr +Parser::parse_type_alias (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (TYPE); + + // TODO: use this token for identifier when finished that + const_TokenPtr alias_name_tok = expect_token (IDENTIFIER); + if (alias_name_tok == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse identifier in type alias"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + Identifier alias_name{alias_name_tok}; + + // parse generic params, which may not exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + // parse where clause, which may not exist + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (EQUAL)) + { + skip_after_semicolon (); + return nullptr; + } + + std::unique_ptr type_to_alias = parse_type (); + + if (!skip_token (SEMICOLON)) + { + // should be skipping past this, not the next line + return nullptr; + } + + return std::unique_ptr ( + new AST::TypeAlias (std::move (alias_name), std::move (generic_params), + std::move (where_clause), std::move (type_to_alias), + std::move (vis), std::move (outer_attrs), locus)); +} + +// Parse a struct item AST node. +template +std::unique_ptr +Parser::parse_struct (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + /* TODO: determine best way to parse the proper struct vs tuple struct - + * share most of initial constructs so lookahead might be impossible, and if + * not probably too expensive. Best way is probably unified parsing for the + * initial parts and then pass them in as params to more derived functions. + * Alternatively, just parse everything in this one function - do this if + * function not too long. */ + + /* Proper struct <- 'struct' IDENTIFIER generic_params? where_clause? ( '{' + * struct_fields? '}' | ';' ) */ + /* Tuple struct <- 'struct' IDENTIFIER generic_params? '(' tuple_fields? ')' + * where_clause? ';' */ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (STRUCT_KW); + + // parse struct name + const_TokenPtr name_tok = expect_token (IDENTIFIER); + if (name_tok == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse struct or tuple struct identifier"); + add_error (std::move (error)); + + // skip after somewhere? + return nullptr; + } + Identifier struct_name{name_tok}; + + // parse generic params, which may or may not exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + // branch on next token - determines whether proper struct or tuple struct + if (lexer.peek_token ()->get_id () == LEFT_PAREN) + { + // tuple struct + + // skip left parenthesis + lexer.skip_token (); + + // parse tuple fields + std::vector tuple_fields; + // Might be empty tuple for unit tuple struct. + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + tuple_fields = std::vector (); + else + tuple_fields = parse_tuple_fields (); + + // tuple parameters must have closing parenthesis + if (!skip_token (RIGHT_PAREN)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse where clause, which is optional + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (SEMICOLON)) + { + // can't skip after semicolon because it's meant to be here + return nullptr; + } + + return std::unique_ptr ( + new AST::TupleStruct (std::move (tuple_fields), std::move (struct_name), + std::move (generic_params), + std::move (where_clause), std::move (vis), + std::move (outer_attrs), locus)); + } + + // assume it is a proper struct being parsed and continue outside of switch + // - label only here to suppress warning + + // parse where clause, which is optional + AST::WhereClause where_clause = parse_where_clause (); + + // branch on next token - determines whether struct is a unit struct + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_CURLY: + { + // struct with body + + // skip curly bracket + lexer.skip_token (); + + // parse struct fields, if any + std::vector struct_fields + = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere? + return nullptr; + } + + return std::unique_ptr (new AST::StructStruct ( + std::move (struct_fields), std::move (struct_name), + std::move (generic_params), std::move (where_clause), false, + std::move (vis), std::move (outer_attrs), locus)); + } + case SEMICOLON: + // unit struct declaration + + lexer.skip_token (); + + return std::unique_ptr ( + new AST::StructStruct (std::move (struct_name), + std::move (generic_params), + std::move (where_clause), std::move (vis), + std::move (outer_attrs), locus)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in struct declaration", + t->get_token_description ())); + + // skip somewhere? + return nullptr; + } +} + +// Parses struct fields in struct declarations. +template +std::vector +Parser::parse_struct_fields () +{ + std::vector fields; + + AST::StructField initial_field = parse_struct_field (); + + // Return empty field list if no field there + if (initial_field.is_error ()) + return fields; + + fields.push_back (std::move (initial_field)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + AST::StructField field = parse_struct_field (); + + if (field.is_error ()) + { + // would occur with trailing comma, so allowed + break; + } + + fields.push_back (std::move (field)); + } + + fields.shrink_to_fit (); + return fields; + // TODO: template if possible (parse_non_ptr_seq) +} + +// Parses struct fields in struct declarations. +template +template +std::vector +Parser::parse_struct_fields (EndTokenPred is_end_tok) +{ + std::vector fields; + + AST::StructField initial_field = parse_struct_field (); + + // Return empty field list if no field there + if (initial_field.is_error ()) + return fields; + + fields.push_back (std::move (initial_field)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + if (is_end_tok (lexer.peek_token ()->get_id ())) + break; + + AST::StructField field = parse_struct_field (); + if (field.is_error ()) + { + /* TODO: should every field be ditched just because one couldn't be + * parsed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse struct field in struct fields"); + add_error (std::move (error)); + + return {}; + } + + fields.push_back (std::move (field)); + } + + fields.shrink_to_fit (); + return fields; + // TODO: template if possible (parse_non_ptr_seq) +} + +// Parses a single struct field (in a struct definition). Does not parse +// commas. +template +AST::StructField +Parser::parse_struct_field () +{ + // parse outer attributes, if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse visibility, if it exists + auto vis = parse_visibility (); + if (!vis) + return AST::StructField::create_error (); + + location_t locus = lexer.peek_token ()->get_locus (); + + // parse field name + const_TokenPtr field_name_tok = lexer.peek_token (); + if (field_name_tok->get_id () != IDENTIFIER) + { + // if not identifier, assumes there is no struct field and exits - not + // necessarily error + return AST::StructField::create_error (); + } + Identifier field_name{field_name_tok}; + lexer.skip_token (); + + if (!skip_token (COLON)) + { + // skip after somewhere? + return AST::StructField::create_error (); + } + + // parse field type - this is required + std::unique_ptr field_type = parse_type (); + if (field_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in struct field definition"); + add_error (std::move (error)); + + // skip after somewhere + return AST::StructField::create_error (); + } + + return AST::StructField (std::move (field_name), std::move (field_type), + std::move (vis.value ()), locus, + std::move (outer_attrs)); +} + +// Parses tuple fields in tuple/tuple struct declarations. +template +std::vector +Parser::parse_tuple_fields () +{ + std::vector fields; + + AST::TupleField initial_field = parse_tuple_field (); + + // Return empty field list if no field there + if (initial_field.is_error ()) + { + return fields; + } + + fields.push_back (std::move (initial_field)); + + // maybe think of a better control structure here - do-while with an initial + // error state? basically, loop through field list until can't find any more + // params HACK: all current syntax uses of tuple fields have them ending + // with a right paren token + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + // skip comma if applies - e.g. trailing comma + lexer.skip_token (); + + // break out due to right paren if it exists + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + { + break; + } + + AST::TupleField field = parse_tuple_field (); + if (field.is_error ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse tuple field in tuple fields"); + add_error (std::move (error)); + + return std::vector (); + } + + fields.push_back (std::move (field)); + + t = lexer.peek_token (); + } + + fields.shrink_to_fit (); + return fields; + + // TODO: this shares basically all code with function params and struct + // fields + // - templates? +} + +/* Parses a single tuple struct field in a tuple struct definition. Does not + * parse commas. */ +template +AST::TupleField +Parser::parse_tuple_field () +{ + // parse outer attributes if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse visibility if it exists + auto visibility = parse_visibility (); + if (!visibility) + return AST::TupleField::create_error (); + + location_t locus = lexer.peek_token ()->get_locus (); + + // parse type, which is required + std::unique_ptr field_type = parse_type (); + if (field_type == nullptr) + { + // error if null + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in tuple struct field"); + add_error (std::move (error)); + + // skip after something + return AST::TupleField::create_error (); + } + + return AST::TupleField (std::move (field_type), + std::move (visibility.value ()), locus, + std::move (outer_attrs)); +} + +// Parses a Rust "enum" tagged union item definition. +template +std::unique_ptr +Parser::parse_enum (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (ENUM_KW); + + // parse enum name + const_TokenPtr enum_name_tok = expect_token (IDENTIFIER); + if (enum_name_tok == nullptr) + return nullptr; + + Identifier enum_name = {enum_name_tok}; + + // parse generic params (of enum container, not enum variants) if they exist + std::vector> generic_params + = parse_generic_params_in_angles (); + + // parse where clause if it exists + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + // parse actual enum variant definitions + std::vector> enum_items + = parse_enum_items ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + return std::unique_ptr ( + new AST::Enum (std::move (enum_name), std::move (vis), + std::move (generic_params), std::move (where_clause), + std::move (enum_items), std::move (outer_attrs), locus)); +} + +// Parses the enum variants inside an enum definiton. +template +std::vector> +Parser::parse_enum_items () +{ + std::vector> items; + + std::unique_ptr initial_item = parse_enum_item (); + + // Return empty item list if no field there + if (initial_item == nullptr) + return items; + + items.push_back (std::move (initial_item)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + std::unique_ptr item = parse_enum_item (); + if (item == nullptr) + { + // this would occur with a trailing comma, which is allowed + break; + } + + items.push_back (std::move (item)); + } + + items.shrink_to_fit (); + return items; + + /* TODO: use template if doable (parse_non_ptr_sequence) */ +} + +// Parses the enum variants inside an enum definiton. +template +template +std::vector> +Parser::parse_enum_items (EndTokenPred is_end_tok) +{ + std::vector> items; + + std::unique_ptr initial_item = parse_enum_item (); + + // Return empty item list if no field there + if (initial_item == nullptr) + return items; + + items.push_back (std::move (initial_item)); + + while (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + + if (is_end_tok (lexer.peek_token ()->get_id ())) + break; + + std::unique_ptr item = parse_enum_item (); + if (item == nullptr) + { + /* TODO should this ignore all successfully parsed enum items just + * because one failed? */ + Error error (lexer.peek_token ()->get_locus (), + "failed to parse enum item in enum items"); + add_error (std::move (error)); + + return {}; + } + + items.push_back (std::move (item)); + } + + items.shrink_to_fit (); + return items; + + /* TODO: use template if doable (parse_non_ptr_sequence) */ +} + +/* Parses a single enum variant item in an enum definition. Does not parse + * commas. */ +template +std::unique_ptr +Parser::parse_enum_item () +{ + // parse outer attributes if they exist + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse visibility, which may or may not exist + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); + + // parse name for enum item, which is required + const_TokenPtr item_name_tok = lexer.peek_token (); + if (item_name_tok->get_id () != IDENTIFIER) + { + // this may not be an error but it means there is no enum item here + return nullptr; + } + lexer.skip_token (); + Identifier item_name{item_name_tok}; + + // branch based on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case LEFT_PAREN: + { + // tuple enum item + lexer.skip_token (); + + std::vector tuple_fields; + // Might be empty tuple for unit tuple enum variant. + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + tuple_fields = std::vector (); + else + tuple_fields = parse_tuple_fields (); + + if (!skip_token (RIGHT_PAREN)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr (new AST::EnumItemTuple ( + std::move (item_name), std::move (vis), std::move (tuple_fields), + std::move (outer_attrs), item_name_tok->get_locus ())); + } + case LEFT_CURLY: + { + // struct enum item + lexer.skip_token (); + + std::vector struct_fields + = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr (new AST::EnumItemStruct ( + std::move (item_name), std::move (vis), std::move (struct_fields), + std::move (outer_attrs), item_name_tok->get_locus ())); + } + case EQUAL: + { + // discriminant enum item + lexer.skip_token (); + + std::unique_ptr discriminant_expr = parse_expr (); + + return std::unique_ptr ( + new AST::EnumItemDiscriminant (std::move (item_name), std::move (vis), + std::move (discriminant_expr), + std::move (outer_attrs), + item_name_tok->get_locus ())); + } + default: + // regular enum with just an identifier + return std::unique_ptr ( + new AST::EnumItem (std::move (item_name), std::move (vis), + std::move (outer_attrs), + item_name_tok->get_locus ())); + } +} + +// Parses a C-style (and C-compat) untagged union declaration. +template +std::unique_ptr +Parser::parse_union (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + /* hack - "weak keyword" by finding identifier called "union" (lookahead in + * item switch) */ + const_TokenPtr union_keyword = expect_token (IDENTIFIER); + rust_assert (union_keyword->get_str () == Values::WeakKeywords::UNION); + location_t locus = union_keyword->get_locus (); + + // parse actual union name + const_TokenPtr union_name_tok = expect_token (IDENTIFIER); + if (union_name_tok == nullptr) + { + skip_after_next_block (); + return nullptr; + } + Identifier union_name{union_name_tok}; + + // parse optional generic parameters + std::vector> generic_params + = parse_generic_params_in_angles (); + + // parse optional where clause + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + /* parse union inner items as "struct fields" because hey, syntax reuse. + * Spec said so. */ + std::vector union_fields + = parse_struct_fields ([] (TokenId id) { return id == RIGHT_CURLY; }); + + if (!skip_token (RIGHT_CURLY)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr ( + new AST::Union (std::move (union_name), std::move (vis), + std::move (generic_params), std::move (where_clause), + std::move (union_fields), std::move (outer_attrs), locus)); +} + +/* Parses a "constant item" (compile-time constant to maybe "inline" + * throughout the program - like constexpr). */ +template +std::unique_ptr +Parser::parse_const_item (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (CONST); + + /* get constant identifier - this is either a proper identifier or the _ + * wildcard */ + const_TokenPtr ident_tok = lexer.peek_token (); + // make default identifier the underscore wildcard one + std::string ident (Values::Keywords::UNDERSCORE); + switch (ident_tok->get_id ()) + { + case IDENTIFIER: + ident = ident_tok->get_str (); + lexer.skip_token (); + break; + case UNDERSCORE: + // do nothing - identifier is already "_" + lexer.skip_token (); + break; + default: + add_error ( + Error (ident_tok->get_locus (), + "expected item name (identifier or %<_%>) in constant item " + "declaration - found %qs", + ident_tok->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse constant type (required) + std::unique_ptr type = parse_type (); + + // A const with no given expression value + if (lexer.peek_token ()->get_id () == SEMICOLON) + { + lexer.skip_token (); + return std::unique_ptr ( + new AST::ConstantItem (std::move (ident), std::move (vis), + std::move (type), std::move (outer_attrs), + locus)); + } + + if (!skip_token (EQUAL)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse constant expression (required) + std::unique_ptr expr = parse_expr (); + + if (!skip_token (SEMICOLON)) + { + // skip somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ConstantItem (std::move (ident), std::move (vis), std::move (type), + std::move (expr), std::move (outer_attrs), locus)); +} + +// Parses a "static item" (static storage item, with 'static lifetime). +template +std::unique_ptr +Parser::parse_static_item (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (STATIC_KW); + + // determine whether static item is mutable + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + is_mut = true; + lexer.skip_token (); + } + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident{ident_tok}; + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse static item type (required) + std::unique_ptr type = parse_type (); + + if (!skip_token (EQUAL)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse static item expression (required) + std::unique_ptr expr = parse_expr (); + + if (!skip_token (SEMICOLON)) + { + // skip after somewhere + return nullptr; + } + + return std::unique_ptr ( + new AST::StaticItem (std::move (ident), is_mut, std::move (type), + std::move (expr), std::move (vis), + std::move (outer_attrs), locus)); +} + +// Parses a trait definition item, including unsafe ones. +template +std::unique_ptr +Parser::parse_trait (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + bool is_unsafe = false; + bool is_auto_trait = false; + + if (lexer.peek_token ()->get_id () == UNSAFE) + { + is_unsafe = true; + lexer.skip_token (); + } + + if (lexer.peek_token ()->get_id () == AUTO) + { + is_auto_trait = true; + lexer.skip_token (); + } + + skip_token (TRAIT); + + // parse trait name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident{ident_tok}; + + // parse generic parameters (if they exist) + std::vector> generic_params + = parse_generic_params_in_angles (); + + // create placeholder type param bounds in case they don't exist + std::vector> type_param_bounds; + + // parse type param bounds (if they exist) + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + type_param_bounds = parse_type_param_bounds ( + [] (TokenId id) { return id == WHERE || id == LEFT_CURLY; }); + // type_param_bounds = parse_type_param_bounds (); + } + + // parse where clause (if it exists) + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + // parse inner attrs (if they exist) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse trait items + std::vector> trait_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr trait_item = parse_trait_item (); + + if (trait_item == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse trait item in trait"); + add_error (std::move (error)); + + return nullptr; + } + trait_items.push_back (std::move (trait_item)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip after something + return nullptr; + } + + trait_items.shrink_to_fit (); + return std::unique_ptr ( + new AST::Trait (std::move (ident), is_unsafe, is_auto_trait, + std::move (generic_params), std::move (type_param_bounds), + std::move (where_clause), std::move (trait_items), + std::move (vis), std::move (outer_attrs), + std::move (inner_attrs), locus)); +} + +// Parses a trait item used inside traits (not trait, the Item). +template +std::unique_ptr +Parser::parse_trait_item () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + + auto vis = vis_res.value (); + + // lookahead to determine what type of trait item to parse + const_TokenPtr tok = lexer.peek_token (); + switch (tok->get_id ()) + { + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: + // these seem to be SimplePath tokens, so this is a macro invocation + // semi + return parse_macro_invocation_semi (std::move (outer_attrs)); + case IDENTIFIER: + if (lexer.peek_token ()->get_str () == Values::WeakKeywords::DEFAULT) + return parse_function (std::move (vis), std::move (outer_attrs)); + else + return parse_macro_invocation_semi (std::move (outer_attrs)); + case TYPE: + return parse_trait_type (std::move (outer_attrs), vis); + case CONST: + // disambiguate with function qualifier + if (lexer.peek_token (1)->get_id () == IDENTIFIER) + { + return parse_trait_const (std::move (outer_attrs)); + } + // else, fallthrough to function + // TODO: find out how to disable gcc "implicit fallthrough" error + gcc_fallthrough (); + case ASYNC: + case UNSAFE: + case EXTERN_KW: + case FN_KW: + return parse_function (std::move (vis), std::move (outer_attrs)); + default: + break; + } + add_error (Error (tok->get_locus (), + "unrecognised token %qs for item in trait", + tok->get_token_description ())); + // skip? + return nullptr; +} + +// Parse a typedef trait item. +template +std::unique_ptr +Parser::parse_trait_type (AST::AttrVec outer_attrs, + AST::Visibility vis) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (TYPE); + + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident{ident_tok}; + + // Parse optional generic parameters for GATs (Generic Associated Types) + std::vector> generic_params; + if (lexer.peek_token ()->get_id () == LEFT_ANGLE) + { + generic_params = parse_generic_params_in_angles (); + } + + std::vector> bounds; + + // parse optional colon + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse optional type param bounds + bounds + = parse_type_param_bounds ([] (TokenId id) { return id == SEMICOLON; }); + // bounds = parse_type_param_bounds (); + } + + if (!skip_token (SEMICOLON)) + { + // skip? + return nullptr; + } + + return std::unique_ptr ( + new AST::TraitItemType (std::move (ident), std::move (generic_params), + std::move (bounds), std::move (outer_attrs), vis, + locus)); +} + +// Parses a constant trait item. +template +std::unique_ptr +Parser::parse_trait_const (AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (CONST); + + // parse constant item name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident{ident_tok}; + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse constant trait item type + std::unique_ptr type = parse_type (); + + // parse constant trait body expression, if it exists + std::unique_ptr const_body = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + lexer.skip_token (); + + // expression must exist, so parse it + const_body = parse_expr (); + } + + if (!skip_token (SEMICOLON)) + { + // skip after something? + return nullptr; + } + + return std::unique_ptr (new AST::ConstantItem ( + std::move (ident), AST::Visibility::create_private (), std::move (type), + std::move (const_body), std::move (outer_attrs), locus)); +} + +/* Parses a struct "impl" item (both inherent impl and trait impl can be + * parsed here), */ +template +std::unique_ptr +Parser::parse_impl (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + /* Note that only trait impls are allowed to be unsafe. So if unsafe, it + * must be a trait impl. However, this isn't enough for full disambiguation, + * so don't branch here. */ + location_t locus = lexer.peek_token ()->get_locus (); + bool is_unsafe = false; + if (lexer.peek_token ()->get_id () == UNSAFE) + { + lexer.skip_token (); + is_unsafe = true; + } + + if (!skip_token (IMPL)) + { + skip_after_next_block (); + return nullptr; + } + + // parse generic params (shared by trait and inherent impls) + std::vector> generic_params + = parse_generic_params_in_angles (); + + // Again, trait impl-only feature, but optional one, so can be used for + // branching yet. + bool has_exclam = false; + if (lexer.peek_token ()->get_id () == EXCLAM) + { + lexer.skip_token (); + has_exclam = true; + } + + /* FIXME: code that doesn't look shit for TypePath. Also, make sure this + * doesn't parse too much and not work. */ + AST::TypePath type_path = parse_type_path (); + if (type_path.is_error () || lexer.peek_token ()->get_id () != FOR) + { + /* cannot parse type path (or not for token next, at least), so must be + * inherent impl */ + + // hacky conversion of TypePath stack object to Type pointer + std::unique_ptr type = nullptr; + if (!type_path.is_error ()) + type = std::unique_ptr ( + new AST::TypePath (std::move (type_path))); + else + type = parse_type (); + + // Type is required, so error if null + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in inherent impl"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + // parse optional where clause + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + // TODO: does this still skip properly? + skip_after_end_block (); + return nullptr; + } + + // parse inner attributes (optional) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse inherent impl items + std::vector> impl_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr impl_item + = parse_inherent_impl_item (); + + if (impl_item == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse inherent impl item in inherent impl"); + add_error (std::move (error)); + + return nullptr; + } + + impl_items.push_back (std::move (impl_item)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed inherent impl"); + + impl_items.shrink_to_fit (); + + return std::unique_ptr (new AST::InherentImpl ( + std::move (impl_items), std::move (generic_params), std::move (type), + std::move (where_clause), std::move (vis), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } + else + { + // type path must both be valid and next token is for, so trait impl + if (!skip_token (FOR)) + { + skip_after_next_block (); + return nullptr; + } + + // parse type + std::unique_ptr type = parse_type (); + // ensure type is included as it is required + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in trait impl"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + + // parse optional where clause + AST::WhereClause where_clause = parse_where_clause (); + + if (!skip_token (LEFT_CURLY)) + { + // TODO: does this still skip properly? + skip_after_end_block (); + return nullptr; + } + + // parse inner attributes (optional) + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse trait impl items + std::vector> impl_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr impl_item + = parse_trait_impl_item (); + + if (impl_item == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse trait impl item in trait impl"); + add_error (std::move (error)); + + return nullptr; + } + + impl_items.push_back (std::move (impl_item)); + + t = lexer.peek_token (); + + // DEBUG + rust_debug ("successfully parsed a trait impl item"); + } + // DEBUG + rust_debug ("successfully finished trait impl items"); + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere + return nullptr; + } + + // DEBUG + rust_debug ("successfully parsed trait impl"); + + impl_items.shrink_to_fit (); + + return std::unique_ptr ( + new AST::TraitImpl (std::move (type_path), is_unsafe, has_exclam, + std::move (impl_items), std::move (generic_params), + std::move (type), std::move (where_clause), + std::move (vis), std::move (inner_attrs), + std::move (outer_attrs), locus)); + } +} + +// Parses a single inherent impl item (item inside an inherent impl block). +template +std::unique_ptr +Parser::parse_inherent_impl_item () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // TODO: cleanup - currently an unreadable mess + + // branch on next token: + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + // FIXME: Arthur: Do we need to some lookahead here? + return parse_macro_invocation_semi (outer_attrs); + case SUPER: + case SELF: + case CRATE: + case PUB: + { + // visibility, so not a macro invocation semi - must be constant, + // function, or method + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); + + // TODO: is a recursive call to parse_inherent_impl_item better? + switch (lexer.peek_token ()->get_id ()) + { + case EXTERN_KW: + case UNSAFE: + case FN_KW: + // function or method + return parse_inherent_impl_function_or_method (std::move (vis), + std::move ( + outer_attrs)); + case CONST: + // lookahead to resolve production - could be function/method or + // const item + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (std::move (vis), + std::move (outer_attrs)); + case UNSAFE: + case EXTERN_KW: + case FN_KW: + return parse_inherent_impl_function_or_method (std::move (vis), + std::move ( + outer_attrs)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in some sort of const " + "item in inherent impl", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + default: + add_error ( + Error (t->get_locus (), + "unrecognised token %qs for item in inherent impl", + t->get_token_description ())); + // skip? + return nullptr; + } + } + case ASYNC: + case EXTERN_KW: + case UNSAFE: + case FN_KW: + // function or method + return parse_inherent_impl_function_or_method ( + AST::Visibility::create_private (), std::move (outer_attrs)); + case CONST: + /* lookahead to resolve production - could be function/method or const + * item */ + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (AST::Visibility::create_private (), + std::move (outer_attrs)); + case UNSAFE: + case EXTERN_KW: + case FN_KW: + return parse_inherent_impl_function_or_method ( + AST::Visibility::create_private (), std::move (outer_attrs)); + default: + add_error (Error (t->get_locus (), + "unexpected token %qs in some sort of const item " + "in inherent impl", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + rust_unreachable (); + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs for item in inherent impl", + t->get_token_description ())); + + // skip? + return nullptr; + } +} + +/* For internal use only by parse_inherent_impl_item() - splits giant method + * into smaller ones and prevents duplication of logic. Strictly, this parses + * a function or method item inside an inherent impl item block. */ +// TODO: make this a templated function with "return type" as type param - +// InherentImplItem is this specialisation of the template while TraitImplItem +// will be the other. +template +std::unique_ptr +Parser::parse_inherent_impl_function_or_method ( + AST::Visibility vis, AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + // parse function or method qualifiers + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_KW); + + // parse function or method name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + return nullptr; + + Identifier ident{ident_tok}; + + // parse generic params + std::vector> generic_params + = parse_generic_params_in_angles (); + + if (!skip_token (LEFT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + // now for function vs method disambiguation - method has opening "self" + // param + auto initial_param = parse_self_param (); + + if (!initial_param.has_value () + && initial_param.error () != ParseSelfError::NOT_SELF) + return nullptr; + + /* FIXME: ensure that self param doesn't accidently consume tokens for a + * function one idea is to lookahead up to 4 tokens to see whether self is + * one of them */ + bool is_method = false; + if (initial_param.has_value ()) + { + if ((*initial_param)->is_self ()) + is_method = true; + + /* skip comma so function and method regular params can be parsed in + * same way */ + if (lexer.peek_token ()->get_id () == COMMA) + lexer.skip_token (); + } + + // parse trait function params + std::vector> function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (initial_param.has_value ()) + function_params.insert (function_params.begin (), + std::move (*initial_param)); + + if (!skip_token (RIGHT_PAREN)) + { + skip_after_end_block (); + return nullptr; + } + + // parse return type (optional) + std::unique_ptr return_type = parse_function_return_type (); + + // parse where clause (optional) + AST::WhereClause where_clause = parse_where_clause (); + + tl::optional> body = tl::nullopt; + if (lexer.peek_token ()->get_id () == SEMICOLON) + lexer.skip_token (); + else + { + auto result = parse_block_expr (); + + if (result == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "could not parse definition in inherent impl %s definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + body = std::move (result); + } + + return std::unique_ptr ( + new AST::Function (std::move (ident), std::move (qualifiers), + std::move (generic_params), std::move (function_params), + std::move (return_type), std::move (where_clause), + std::move (body), std::move (vis), + std::move (outer_attrs), locus)); +} + +// Parses a single trait impl item (item inside a trait impl block). +template +std::unique_ptr +Parser::parse_trait_impl_item () +{ + // parse outer attributes (if they exist) + AST::AttrVec outer_attrs = parse_outer_attributes (); + + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto visibility = vis_res.value (); + + // branch on next token: + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case SUPER: + case SELF: + case CRATE: + case DOLLAR_SIGN: + // these seem to be SimplePath tokens, so this is a macro invocation + // semi + return parse_macro_invocation_semi (std::move (outer_attrs)); + case IDENTIFIER: + if (lexer.peek_token ()->get_str () == Values::WeakKeywords::DEFAULT) + return parse_trait_impl_function_or_method (visibility, + std::move (outer_attrs)); + else + return parse_macro_invocation_semi (std::move (outer_attrs)); + case TYPE: + return parse_type_alias (visibility, std::move (outer_attrs)); + case EXTERN_KW: + case UNSAFE: + case FN_KW: + // function or method + return parse_trait_impl_function_or_method (visibility, + std::move (outer_attrs)); + case ASYNC: + return parse_async_item (visibility, std::move (outer_attrs)); + case CONST: + // lookahead to resolve production - could be function/method or const + // item + t = lexer.peek_token (1); + + switch (t->get_id ()) + { + case IDENTIFIER: + case UNDERSCORE: + return parse_const_item (visibility, std::move (outer_attrs)); + case UNSAFE: + case EXTERN_KW: + case FN_KW: + return parse_trait_impl_function_or_method (visibility, + std::move (outer_attrs)); + default: + add_error (Error ( + t->get_locus (), + "unexpected token %qs in some sort of const item in trait impl", + t->get_token_description ())); + + lexer.skip_token (1); // TODO: is this right thing to do? + return nullptr; + } + rust_unreachable (); + default: + break; + } + add_error (Error (t->get_locus (), + "unrecognised token %qs for item in trait impl", + t->get_token_description ())); + + // skip? + return nullptr; +} + +/* For internal use only by parse_trait_impl_item() - splits giant method into + * smaller ones and prevents duplication of logic. Strictly, this parses a + * function or method item inside a trait impl item block. */ +template +std::unique_ptr +Parser::parse_trait_impl_function_or_method ( + AST::Visibility vis, AST::AttrVec outer_attrs) +{ + // this shares virtually all logic with + // parse_inherent_impl_function_or_method + // - template? + location_t locus = lexer.peek_token ()->get_locus (); + + auto is_default = false; + auto t = lexer.peek_token (); + if (t->get_id () == IDENTIFIER + && t->get_str () == Values::WeakKeywords::DEFAULT) + { + is_default = true; + lexer.skip_token (); + } + + // parse function or method qualifiers + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + skip_token (FN_KW); + + // parse function or method name + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + return nullptr; + } + Identifier ident{ident_tok}; + + // DEBUG: + rust_debug ( + "about to start parsing generic params in trait impl function or method"); + + // parse generic params + std::vector> generic_params + = parse_generic_params_in_angles (); + + // DEBUG: + rust_debug ( + "finished parsing generic params in trait impl function or method"); + + if (!skip_token (LEFT_PAREN)) + { + // skip after somewhere? + return nullptr; + } + + // now for function vs method disambiguation - method has opening "self" + // param + auto initial_param = parse_self_param (); + + if (!initial_param.has_value () + && initial_param.error () != ParseSelfError::NOT_SELF) + return nullptr; + + // FIXME: ensure that self param doesn't accidently consume tokens for a + // function + bool is_method = false; + if (initial_param.has_value ()) + { + if ((*initial_param)->is_self ()) + is_method = true; + + // skip comma so function and method regular params can be parsed in + // same way + if (lexer.peek_token ()->get_id () == COMMA) + { + lexer.skip_token (); + } + + // DEBUG + rust_debug ("successfully parsed self param in method trait impl item"); + } + + // DEBUG + rust_debug ( + "started to parse function params in function or method trait impl item"); + + // parse trait function params (only if next token isn't right paren) + std::vector> function_params; + if (lexer.peek_token ()->get_id () != RIGHT_PAREN) + { + function_params + = parse_function_params ([] (TokenId id) { return id == RIGHT_PAREN; }); + + if (function_params.empty ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse function params in trait impl %s definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + skip_after_next_block (); + return nullptr; + } + } + + if (initial_param.has_value ()) + function_params.insert (function_params.begin (), + std::move (*initial_param)); + + // DEBUG + rust_debug ("successfully parsed function params in function or method " + "trait impl item"); + + if (!skip_token (RIGHT_PAREN)) + { + skip_after_next_block (); + return nullptr; + } + + // parse return type (optional) + std::unique_ptr return_type = parse_function_return_type (); + + // DEBUG + rust_debug ( + "successfully parsed return type in function or method trait impl item"); + + // parse where clause (optional) + AST::WhereClause where_clause = parse_where_clause (); + + // DEBUG + rust_debug ( + "successfully parsed where clause in function or method trait impl item"); + + // parse function definition (in block) - semicolon not allowed + tl::optional> body = tl::nullopt; + + if (lexer.peek_token ()->get_id () == SEMICOLON) + lexer.skip_token (); + else + { + auto result = parse_block_expr (); + if (result == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse definition in trait impl %s definition", + is_method ? "method" : "function"); + add_error (std::move (error)); + + skip_after_end_block (); + return nullptr; + } + body = std::move (result); + } + + return std::unique_ptr ( + new AST::Function (std::move (ident), std::move (qualifiers), + std::move (generic_params), std::move (function_params), + std::move (return_type), std::move (where_clause), + std::move (body), std::move (vis), + std::move (outer_attrs), locus, is_default)); +} + +// Parses an extern block of declarations. +template +std::unique_ptr +Parser::parse_extern_block (AST::Visibility vis, + AST::AttrVec outer_attrs) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (EXTERN_KW); + + // detect optional abi name + std::string abi; + const_TokenPtr next_tok = lexer.peek_token (); + if (next_tok->get_id () == STRING_LITERAL) + { + lexer.skip_token (); + abi = next_tok->get_str (); + } + + if (!skip_token (LEFT_CURLY)) + { + skip_after_end_block (); + return nullptr; + } + + AST::AttrVec inner_attrs = parse_inner_attributes (); + + // parse declarations inside extern block + std::vector> extern_items; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_CURLY) + { + std::unique_ptr extern_item = parse_external_item (); + + if (extern_item == nullptr) + { + Error error (t->get_locus (), + "failed to parse external item despite not reaching " + "end of extern block"); + add_error (std::move (error)); + + return nullptr; + } + + extern_items.push_back (std::move (extern_item)); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_CURLY)) + { + // skip somewhere + return nullptr; + } + + extern_items.shrink_to_fit (); + + return std::unique_ptr ( + new AST::ExternBlock (std::move (abi), std::move (extern_items), + std::move (vis), std::move (inner_attrs), + std::move (outer_attrs), locus)); +} + +// Parses a single extern block item (static or function declaration). +template +std::unique_ptr +Parser::parse_external_item () +{ + // parse optional outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + location_t locus = lexer.peek_token ()->get_locus (); + + // parse optional visibility + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case IDENTIFIER: + return parse_macro_invocation_semi (outer_attrs); + case STATIC_KW: + { + // parse extern static item + lexer.skip_token (); + + // parse mut (optional) + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + has_mut = true; + } + + // parse identifier + const_TokenPtr ident_tok = expect_token (IDENTIFIER); + if (ident_tok == nullptr) + { + skip_after_semicolon (); + return nullptr; + } + Identifier ident{ident_tok}; + + if (!skip_token (COLON)) + { + skip_after_semicolon (); + return nullptr; + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in external static item"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + if (!skip_token (SEMICOLON)) + { + // skip after somewhere? + return nullptr; + } + + return std::unique_ptr ( + new AST::ExternalStaticItem (std::move (ident), std::move (type), + has_mut, std::move (vis), + std::move (outer_attrs), locus)); + } + case FN_KW: + return parse_function (std::move (vis), std::move (outer_attrs), true); + + case TYPE: + return parse_external_type_item (std::move (vis), + std::move (outer_attrs)); + default: + // error + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in extern block item declaration", + t->get_token_description ())); + + skip_after_semicolon (); + return nullptr; + } +} + +// Parses a statement (will further disambiguate any statement). +template +std::unique_ptr +Parser::parse_stmt (ParseRestrictions restrictions) +{ + // quick exit for empty statement + // FIXME: Can we have empty statements without semicolons? Just nothing? + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == SEMICOLON) + { + lexer.skip_token (); + return std::unique_ptr ( + new AST::EmptyStmt (t->get_locus ())); + } + + // parse outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parsing this will be annoying because of the many different possibilities + /* best may be just to copy paste in parse_item switch, and failing that try + * to parse outer attributes, and then pass them in to either a let + * statement or (fallback) expression statement. */ + // FIXME: think of a way to do this without such a large switch? + t = lexer.peek_token (); + switch (t->get_id ()) + { + case LET: + // let statement + return parse_let_stmt (std::move (outer_attrs), restrictions); + case PUB: + case MOD: + case EXTERN_KW: + case USE: + case FN_KW: + case TYPE: + case STRUCT_KW: + case ENUM_KW: + case CONST: + case STATIC_KW: + case AUTO: + case TRAIT: + case IMPL: + case MACRO: + /* TODO: implement union keyword but not really because of + * context-dependence crappy hack way to parse a union written below to + * separate it from the good code. */ + // case UNION: + case UNSAFE: // maybe - unsafe traits are a thing + /* if any of these (should be all possible VisItem prefixes), parse a + * VisItem can't parse item because would require reparsing outer + * attributes */ + // may also be unsafe block + if (lexer.peek_token (1)->get_id () == LEFT_CURLY) + { + return parse_expr_stmt (std::move (outer_attrs), restrictions); + } + else + { + return parse_vis_item (std::move (outer_attrs)); + } + break; + // crappy hack to do union "keyword" + case IDENTIFIER: + if (t->get_str () == Values::WeakKeywords::UNION + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + return parse_vis_item (std::move (outer_attrs)); + // or should this go straight to parsing union? + } + else if (is_macro_rules_def (t)) + { + // macro_rules! macro item + return parse_macro_rules_def (std::move (outer_attrs)); + } + gcc_fallthrough (); + // TODO: find out how to disable gcc "implicit fallthrough" warning + default: + // fallback: expression statement + return parse_expr_stmt (std::move (outer_attrs), restrictions); + break; + } +} + +// Parses a let statement. +template +std::unique_ptr +Parser::parse_let_stmt (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (LET); + + // parse pattern (required) + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in let statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + // parse type declaration (optional) + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == COLON) + { + // must have a type declaration + lexer.skip_token (); + + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in let statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + } + + // parse expression to set variable to (optional) + std::unique_ptr expr = nullptr; + if (lexer.peek_token ()->get_id () == EQUAL) + { + // must have an expression + lexer.skip_token (); + + expr = parse_expr (); + if (expr == nullptr) + { + skip_after_semicolon (); + return nullptr; + } + } + + tl::optional> else_expr = tl::nullopt; + if (maybe_skip_token (ELSE)) + else_expr = parse_block_expr (); + + if (restrictions.consume_semi) + { + // `stmt` macro variables are parsed without a semicolon, but should be + // parsed as a full statement when interpolated. This should be handled + // by having the interpolated statement be distinguishable from normal + // tokens, e.g. by NT tokens. + if (restrictions.allow_close_after_expr_stmt) + maybe_skip_token (SEMICOLON); + else if (!skip_token (SEMICOLON)) + return nullptr; + } + + return std::unique_ptr ( + new AST::LetStmt (std::move (pattern), std::move (expr), std::move (type), + std::move (else_expr), std::move (outer_attrs), locus)); +} + +template +tl::optional +Parser::parse_generic_arg () +{ + auto tok = lexer.peek_token (); + std::unique_ptr expr = nullptr; + + switch (tok->get_id ()) + { + case IDENTIFIER: + { + // This is a bit of a weird situation: With an identifier token, we + // could either have a valid type or a macro (FIXME: anything else?). So + // we need one bit of lookahead to differentiate if this is really + auto next_tok = lexer.peek_token (1); + if (next_tok->get_id () == LEFT_ANGLE + || next_tok->get_id () == SCOPE_RESOLUTION + || next_tok->get_id () == EXCLAM) + { + auto type = parse_type (); + if (type) + return AST::GenericArg::create_type (std::move (type)); + else + return tl::nullopt; + } + else if (next_tok->get_id () == COLON) + { + lexer.skip_token (); // skip ident + lexer.skip_token (); // skip colon + + auto tok = lexer.peek_token (); + std::vector> bounds + = parse_type_param_bounds (); + + auto type = std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), tok->get_locus (), + false)); + if (type) + return AST::GenericArg::create_type (std::move (type)); + else + return tl::nullopt; + } + lexer.skip_token (); + return AST::GenericArg::create_ambiguous (tok->get_str (), + tok->get_locus ()); + } + case LEFT_CURLY: + expr = parse_block_expr (); + break; + case MINUS: + case STRING_LITERAL: + case CHAR_LITERAL: + case INT_LITERAL: + case FLOAT_LITERAL: + case TRUE_LITERAL: + case FALSE_LITERAL: + expr = parse_literal_expr (); + break; + // FIXME: Because of this, error reporting is garbage for const generic + // parameter's default values + default: + { + auto type = parse_type (); + // FIXME: Find a better way to do this? + if (type) + return AST::GenericArg::create_type (std::move (type)); + else + return tl::nullopt; + } + } + + if (!expr) + return tl::nullopt; + + return AST::GenericArg::create_const (std::move (expr)); +} + +// Parses the generic arguments in each path segment. +template +AST::GenericArgs +Parser::parse_path_generic_args () +{ + if (lexer.peek_token ()->get_id () == LEFT_SHIFT) + lexer.split_current_token (LEFT_ANGLE, LEFT_ANGLE); + + if (!skip_token (LEFT_ANGLE)) + { + // skip after somewhere? + return AST::GenericArgs::create_empty (); + } + + // We need to parse all lifetimes, then parse types and const generics in + // any order. + + // try to parse lifetimes first + std::vector lifetime_args; + + const_TokenPtr t = lexer.peek_token (); + location_t locus = t->get_locus (); + while (!Parse::Utils::is_right_angle_tok (t->get_id ())) + { + auto lifetime = parse_lifetime (false); + if (!lifetime) + { + // not necessarily an error + break; + } + + lifetime_args.push_back (std::move (lifetime.value ())); + + // if next token isn't comma, then it must be end of list + if (lexer.peek_token ()->get_id () != COMMA) + { + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + + // try to parse types and const generics second + std::vector generic_args; + + // TODO: think of better control structure + t = lexer.peek_token (); + while (!Parse::Utils::is_right_angle_tok (t->get_id ())) + { + // FIXME: Is it fine to break if there is one binding? Can't there be + // bindings in between types? + + // ensure not binding being parsed as type accidently + if (t->get_id () == IDENTIFIER + && lexer.peek_token (1)->get_id () == EQUAL) + break; + + auto arg = parse_generic_arg (); + if (arg) + { + generic_args.emplace_back (std::move (arg.value ())); + } + + // FIXME: Do we need to break if we encounter an error? + + // if next token isn't comma, then it must be end of list + if (lexer.peek_token ()->get_id () != COMMA) + break; + + // skip comma + lexer.skip_token (); + t = lexer.peek_token (); + } + + // try to parse bindings third + std::vector binding_args; + + // TODO: think of better control structure + t = lexer.peek_token (); + while (!Parse::Utils::is_right_angle_tok (t->get_id ())) + { + AST::GenericArgsBinding binding = parse_generic_args_binding (); + if (binding.is_error ()) + { + // not necessarily an error + break; + } + + binding_args.push_back (std::move (binding)); + + // if next token isn't comma, then it must be end of list + if (lexer.peek_token ()->get_id () != COMMA) + { + break; + } + // skip comma + lexer.skip_token (); + + t = lexer.peek_token (); + } + + // skip any trailing commas + if (lexer.peek_token ()->get_id () == COMMA) + lexer.skip_token (); + + if (!skip_generics_right_angle ()) + return AST::GenericArgs::create_empty (); + + lifetime_args.shrink_to_fit (); + generic_args.shrink_to_fit (); + binding_args.shrink_to_fit (); + + return AST::GenericArgs (std::move (lifetime_args), std::move (generic_args), + std::move (binding_args), locus); +} + +// Parses a binding in a generic args path segment. +template +AST::GenericArgsBinding +Parser::parse_generic_args_binding () +{ + const_TokenPtr ident_tok = lexer.peek_token (); + if (ident_tok->get_id () != IDENTIFIER) + { + // allow non error-inducing use + // skip somewhere? + return AST::GenericArgsBinding::create_error (); + } + lexer.skip_token (); + Identifier ident{ident_tok}; + + if (!skip_token (EQUAL)) + { + // skip after somewhere? + return AST::GenericArgsBinding::create_error (); + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + // skip somewhere? + return AST::GenericArgsBinding::create_error (); + } + + return AST::GenericArgsBinding (std::move (ident), std::move (type), + ident_tok->get_locus ()); +} + +// Parses a self param. Also handles self param not existing. +template +tl::expected, ParseSelfError> +Parser::parse_self_param () +{ + bool has_reference = false; + AST::Lifetime lifetime = AST::Lifetime::elided (); + + location_t locus = lexer.peek_token ()->get_locus (); + + // TODO: Feels off, find a better way to clearly express this + std::vector> ptrs + = {{ASTERISK, SELF} /* *self */, + {ASTERISK, CONST, SELF} /* *const self */, + {ASTERISK, MUT, SELF} /* *mut self */}; + + for (auto &s : ptrs) + { + size_t i = 0; + for (i = 0; i < s.size (); i++) + if (lexer.peek_token (i)->get_id () != s[i]) + break; + if (i == s.size ()) + { + rust_error_at (lexer.peek_token ()->get_locus (), + "cannot pass % by raw pointer"); + return tl::make_unexpected (ParseSelfError::SELF_PTR); + } + } + + // Trying to find those patterns: + // + // &'lifetime mut self + // &'lifetime self + // & mut self + // & self + // mut self + // self + // + // If not found, it is probably a function, exit and let function parsing + // handle it. + bool is_self = false; + for (size_t i = 0; i < 5; i++) + if (lexer.peek_token (i)->get_id () == SELF) + is_self = true; + + if (!is_self) + return tl::make_unexpected (ParseSelfError::NOT_SELF); + + // test if self is a reference parameter + if (lexer.peek_token ()->get_id () == AMP) + { + has_reference = true; + lexer.skip_token (); + + // now test whether it has a lifetime + if (lexer.peek_token ()->get_id () == LIFETIME) + { + // something went wrong somehow + if (auto parsed_lifetime = parse_lifetime (true)) + { + lifetime = parsed_lifetime.value (); + } + else + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime in self param"); + add_error (std::move (error)); + + // skip after somewhere? + return tl::make_unexpected (ParseSelfError::PARSING); + } + } + } + + // test for mut + bool has_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + has_mut = true; + lexer.skip_token (); + } + + // skip self token + const_TokenPtr self_tok = lexer.peek_token (); + if (self_tok->get_id () != SELF) + { + // skip after somewhere? + return tl::make_unexpected (ParseSelfError::NOT_SELF); + } + lexer.skip_token (); + + // parse optional type + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // type is now required + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "could not parse type in self param"); + add_error (std::move (error)); + + // skip after somewhere? + return tl::make_unexpected (ParseSelfError::PARSING); + } + } + + // ensure that cannot have both type and reference + if (type != nullptr && has_reference) + { + Error error ( + lexer.peek_token ()->get_locus (), + "cannot have both a reference and a type specified in a self param"); + add_error (std::move (error)); + + // skip after somewhere? + return tl::make_unexpected (ParseSelfError::PARSING); + } + + if (has_reference) + { + return std::make_unique (std::move (lifetime), has_mut, + locus); + } + else + { + // note that type may be nullptr here and that's fine + return std::make_unique (std::move (type), has_mut, + locus); + } +} + +/* Parses an expression or macro statement. */ +template +std::unique_ptr +Parser::parse_expr_stmt (AST::AttrVec outer_attrs, + ParseRestrictions restrictions) +{ + location_t locus = lexer.peek_token ()->get_locus (); + + std::unique_ptr expr; + + switch (lexer.peek_token ()->get_id ()) + { + case IDENTIFIER: + case CRATE: + case SUPER: + case SELF: + case SELF_ALIAS: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: + { + AST::PathInExpression path = parse_path_in_expression (); + std::unique_ptr null_denotation; + + if (lexer.peek_token ()->get_id () == EXCLAM) + { + std::unique_ptr invoc + = parse_macro_invocation_partial (std::move (path), + std::move (outer_attrs)); + + if (restrictions.consume_semi && maybe_skip_token (SEMICOLON)) + { + invoc->add_semicolon (); + // Macro invocation with semicolon. + return invoc; + } + + TokenId after_macro = lexer.peek_token ()->get_id (); + + if (restrictions.allow_close_after_expr_stmt + && (after_macro == RIGHT_PAREN || after_macro == RIGHT_CURLY + || after_macro == RIGHT_SQUARE)) + return invoc; + + if (invoc->get_invoc_data ().get_delim_tok_tree ().get_delim_type () + == AST::CURLY + && after_macro != DOT && after_macro != QUESTION_MARK) + { + rust_debug ("braced macro statement"); + return invoc; + } + + null_denotation = std::move (invoc); + } + else + { + null_denotation + = null_denotation_path (std::move (path), {}, restrictions); + } + + expr = left_denotations (std::move (null_denotation), LBP_LOWEST, + std::move (outer_attrs), restrictions); + break; + } + default: + restrictions.expr_can_be_stmt = true; + expr = parse_expr (std::move (outer_attrs), restrictions); + break; + } + + if (expr == nullptr) + { + // expr is required, error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse expr in expr statement"); + add_error (std::move (error)); + + skip_after_semicolon (); + return nullptr; + } + + bool has_semi = false; + + if (restrictions.consume_semi) + { + if (maybe_skip_token (SEMICOLON)) + { + has_semi = true; + } + else if (expr->is_expr_without_block ()) + { + if (restrictions.allow_close_after_expr_stmt) + { + TokenId id = lexer.peek_token ()->get_id (); + if (id != RIGHT_PAREN && id != RIGHT_CURLY && id != RIGHT_SQUARE) + { + expect_token (SEMICOLON); + return nullptr; + } + } + else + { + expect_token (SEMICOLON); + return nullptr; + } + } + } + + return std::unique_ptr ( + new AST::ExprStmt (std::move (expr), locus, has_semi)); +} + +// Parses a loop label used in loop expressions. +template +tl::expected +Parser::parse_loop_label (const_TokenPtr tok) +{ + // parse lifetime - if doesn't exist, assume no label + if (tok->get_id () != LIFETIME) + { + // not necessarily an error + return tl::unexpected ( + ParseLoopLabelError::NOT_LOOP_LABEL); + } + /* FIXME: check for named lifetime requirement here? or check in semantic + * analysis phase? */ + AST::Lifetime label = lifetime_from_token (tok); + + if (!skip_token (COLON)) + { + // skip somewhere? + return tl::unexpected ( + ParseLoopLabelError::MISSING_COLON); + } + + return tl::expected ( + AST::LoopLabel (std::move (label), tok->get_locus ())); +} + +// Parses the "pattern" part of the match arm (the 'case x:' equivalent). +template +AST::MatchArm +Parser::parse_match_arm () +{ + // parse optional outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // DEBUG + rust_debug ("about to start parsing match arm patterns"); + + // break early if find right curly + if (lexer.peek_token ()->get_id () == RIGHT_CURLY) + { + // not an error + return AST::MatchArm::create_error (); + } + + // parse match arm patterns - at least 1 is required + std::vector> match_arm_patterns + = parse_match_arm_patterns (RIGHT_CURLY); + if (match_arm_patterns.empty ()) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse any patterns in match arm"); + add_error (std::move (error)); + + // skip somewhere? + return AST::MatchArm::create_error (); + } + + // DEBUG + rust_debug ("successfully parsed match arm patterns"); + + // parse match arm guard expr if it exists + std::unique_ptr guard_expr = nullptr; + if (lexer.peek_token ()->get_id () == IF) + { + lexer.skip_token (); + + guard_expr = parse_expr (); + if (guard_expr == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse guard expression in match arm"); + add_error (std::move (error)); + + // skip somewhere? + return AST::MatchArm::create_error (); + } + } + + // DEBUG + rust_debug ("successfully parsed match arm"); + + return AST::MatchArm (std::move (match_arm_patterns), + lexer.peek_token ()->get_locus (), + std::move (guard_expr), std::move (outer_attrs)); +} + +/* Parses the patterns used in a match arm. End token id is the id of the + * token that would exist after the patterns are done (e.g. '}' for match + * expr, '=' for if let and while let). */ +template +std::vector> +Parser::parse_match_arm_patterns (TokenId end_token_id) +{ + // skip optional leading '|' + if (lexer.peek_token ()->get_id () == PIPE) + lexer.skip_token (); + /* TODO: do I even need to store the result of this? can't be used. + * If semantically different, I need a wrapped "match arm patterns" object + * for this. */ + + std::vector> patterns; + + // quick break out if end_token_id + if (lexer.peek_token ()->get_id () == end_token_id) + return patterns; + + // parse required pattern - if doesn't exist, return empty + std::unique_ptr initial_pattern = parse_pattern (); + if (initial_pattern == nullptr) + { + // FIXME: should this be an error? + return patterns; + } + patterns.push_back (std::move (initial_pattern)); + + // DEBUG + rust_debug ("successfully parsed initial match arm pattern"); + + // parse new patterns as long as next char is '|' + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == PIPE) + { + // skip pipe token + lexer.skip_token (); + + // break if hit end token id + if (lexer.peek_token ()->get_id () == end_token_id) + break; + + // parse pattern + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + // this is an error + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in match arm patterns"); + add_error (std::move (error)); + + // skip somewhere? + return {}; + } + + patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + patterns.shrink_to_fit (); + + return patterns; +} + +// Parses a single parameter used in a closure definition. +template +AST::ClosureParam +Parser::parse_closure_param () +{ + AST::AttrVec outer_attrs = parse_outer_attributes (); + + // parse pattern (which is required) + std::unique_ptr pattern = parse_pattern_no_alt (); + if (pattern == nullptr) + { + // not necessarily an error + return AST::ClosureParam::create_error (); + } + + // parse optional type of param + std::unique_ptr type = nullptr; + if (lexer.peek_token ()->get_id () == COLON) + { + lexer.skip_token (); + + // parse type, which is now required + type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in closure parameter"); + add_error (std::move (error)); + + // skip somewhere? + return AST::ClosureParam::create_error (); + } + } + + location_t loc = pattern->get_locus (); + return AST::ClosureParam (std::move (pattern), loc, std::move (type), + std::move (outer_attrs)); +} + +// Parses a type (will further disambiguate any type). +template +std::unique_ptr +Parser::parse_type (bool save_errors) +{ + /* rules for all types: + * NeverType: '!' + * SliceType: '[' Type ']' + * InferredType: '_' + * MacroInvocation: SimplePath '!' DelimTokenTree + * ParenthesisedType: '(' Type ')' + * ImplTraitType: 'impl' TypeParamBounds + * TypeParamBounds (not type) TypeParamBound ( '+' TypeParamBound )* '+'? + * TypeParamBound Lifetime | TraitBound + * ImplTraitTypeOneBound: 'impl' TraitBound + * TraitObjectType: 'dyn'? TypeParamBounds + * TraitObjectTypeOneBound: 'dyn'? TraitBound + * TraitBound '?'? ForLifetimes? TypePath | '(' '?'? + * ForLifetimes? TypePath ')' BareFunctionType: ForLifetimes? + * FunctionQualifiers 'fn' etc. ForLifetimes (not type) 'for' '<' + * LifetimeParams '>' FunctionQualifiers ( 'async' | 'const' )? + * 'unsafe'? + * ('extern' abi?)? QualifiedPathInType: '<' Type ( 'as' TypePath )? '>' + * ( + * '::' TypePathSegment )+ TypePath: '::'? TypePathSegment ( + * '::' TypePathSegment)* ArrayType: '[' Type ';' Expr ']' + * ReferenceType: '&' Lifetime? 'mut'? TypeNoBounds + * RawPointerType: '*' ( 'mut' | 'const' ) TypeNoBounds + * TupleType: '(' Type etc. - regular tuple stuff. Also + * regular tuple vs parenthesised precedence + * + * Disambiguate between macro and type path via type path being parsed, and + * then if '!' found, convert type path to simple path for macro. Usual + * disambiguation for tuple vs parenthesised. For ImplTraitType and + * TraitObjectType individual disambiguations, they seem more like "special + * cases", so probably just try to parse the more general ImplTraitType or + * TraitObjectType and return OneBound versions if they satisfy those + * criteria. */ + + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // never type - can't be macro as no path beforehand + lexer.skip_token (); + return std::unique_ptr ( + new AST::NeverType (t->get_locus ())); + case LEFT_SQUARE: + // slice type or array type - requires further disambiguation + return parse_slice_or_array_type (); + case LEFT_SHIFT: + case LEFT_ANGLE: + { + // qualified path in type + AST::QualifiedPathInType path = parse_qualified_path_in_type (); + if (path.is_error ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse qualified path in type"); + add_error (std::move (error)); + } + + return nullptr; + } + return std::unique_ptr ( + new AST::QualifiedPathInType (std::move (path))); + } + case UNDERSCORE: + // inferred type + lexer.skip_token (); + return std::unique_ptr ( + new AST::InferredType (t->get_locus ())); + case ASTERISK: + // raw pointer type + return parse_raw_pointer_type (); + case AMP: // does this also include AMP_AMP? + case LOGICAL_AND: + // reference type + return parse_reference_type (); + case LIFETIME: + { + /* probably a lifetime bound, so probably type param bounds in + * TraitObjectType */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + false)); + } + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: + { + // macro invocation or type path - requires further disambiguation. + /* for parsing path component of each rule, perhaps parse it as a + * typepath and attempt conversion to simplepath if a trailing '!' is + * found */ + /* Type path also includes TraitObjectTypeOneBound BUT if it starts + * with it, it is exactly the same as a TypePath syntactically, so + * this is a syntactical ambiguity. As such, the parser will parse it + * as a TypePath. This, however, does not prevent TraitObjectType from + * starting with a typepath. */ + + // parse path as type path + AST::TypePath path = parse_type_path (); + if (path.is_error ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse path as first component of type"); + add_error (std::move (error)); + } + + return nullptr; + } + location_t locus = path.get_locus (); + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + { + // macro invocation + // convert to simple path + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + if (save_errors) + { + Error error (t->get_locus (), + "failed to parse simple path in macro " + "invocation (for type)"); + add_error (std::move (error)); + } + + return nullptr; + } + + lexer.skip_token (); + + auto tok_tree = parse_delim_token_tree (); + if (!tok_tree) + return nullptr; + + return AST::MacroInvocation::Regular ( + AST::MacroInvocData (std::move (macro_path), + std::move (tok_tree.value ())), + {}, locus); + } + case PLUS: + { + // type param bounds + std::vector> bounds; + + // convert type path to trait bound + std::unique_ptr path_bound ( + new AST::TraitBound (std::move (path), locus, false, false)); + bounds.push_back (std::move (path_bound)); + + /* parse rest of bounds - FIXME: better way to find when to stop + * parsing */ + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists - if not, assume end of sequence + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), locus, false)); + } + default: + // assume that this is a type path and not an error + return std::unique_ptr ( + new AST::TypePath (std::move (path))); + } + } + case LEFT_PAREN: + /* tuple type or parenthesised type - requires further disambiguation + * (the usual). ok apparently can be a parenthesised TraitBound too, so + * could be TraitObjectTypeOneBound or TraitObjectType */ + return parse_paren_prefixed_type (); + case FOR: + // TraitObjectTypeOneBound or BareFunctionType + return parse_for_prefixed_type (); + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_KW: + case FN_KW: + // bare function type (with no for lifetimes) + return parse_bare_function_type (std::vector ()); + case IMPL: + lexer.skip_token (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::ImplTraitType (std::move (bounds), t->get_locus ())); + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + if (save_errors) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse ImplTraitType initial bound"); + add_error (std::move (error)); + } + + return nullptr; + } + + location_t locus = t->get_locus (); + + // short cut if next token isn't '+' + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + return std::unique_ptr ( + new AST::ImplTraitTypeOneBound (std::move (initial_bound), + locus)); + } + + // parse additional type param bounds + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error as trailing plus may exist + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::ImplTraitType (std::move (bounds), locus)); + } + case DYN: + case QUESTION_MARK: + { + // either TraitObjectType or TraitObjectTypeOneBound + bool has_dyn = false; + if (t->get_id () == DYN) + { + lexer.skip_token (); + has_dyn = true; + } + + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound */ + std::vector> bounds + = parse_type_param_bounds (); + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + has_dyn)); + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound + = parse_trait_bound (); + if (initial_bound == nullptr) + { + if (save_errors) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse TraitObjectType initial bound"); + add_error (std::move (error)); + } + + return nullptr; + } + + // short cut if next token isn't '+' + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + // DEBUG: removed as unique ptr, so should auto delete + // delete initial_bound; + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (value_bound), + t->get_locus (), has_dyn)); + } + + // parse additional type param bounds + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error as trailing plus may exist + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), t->get_locus (), + has_dyn)); + } + } + default: + if (save_errors) + add_error (Error (t->get_locus (), "unrecognised token %qs in type", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a type that has '(' as its first character. Returns a tuple type, + * parenthesised type, TraitObjectTypeOneBound, or TraitObjectType depending + * on following characters. */ +template +std::unique_ptr +Parser::parse_paren_prefixed_type () +{ + /* NOTE: Syntactical ambiguity of a parenthesised trait bound is considered + * a trait bound, not a parenthesised type, so that it can still be used in + * type param bounds. */ + + /* NOTE: this implementation is really shit but I couldn't think of a better + * one. It requires essentially breaking polymorphism and downcasting via + * virtual method abuse, as it was copied from the rustc implementation (in + * which types are reified due to tagged union), after a more OOP attempt by + * me failed. */ + location_t left_delim_locus = lexer.peek_token ()->get_locus (); + + // skip left delim + lexer.skip_token (); + /* while next token isn't close delim, parse comma-separated types, saving + * whether trailing comma happens */ + const_TokenPtr t = lexer.peek_token (); + bool trailing_comma = true; + std::vector> types; + + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (t->get_locus (), + "failed to parse type inside parentheses (probably " + "tuple or parenthesised)"); + add_error (std::move (error)); + + return nullptr; + } + types.push_back (std::move (type)); + + t = lexer.peek_token (); + if (t->get_id () != COMMA) + { + trailing_comma = false; + break; + } + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // if only one type and no trailing comma, then not a tuple type + if (types.size () == 1 && !trailing_comma) + { + // must be a TraitObjectType (with more than one bound) + if (lexer.peek_token ()->get_id () == PLUS) + { + // create type param bounds vector + std::vector> bounds; + + // HACK: convert type to traitbound and add to bounds + std::unique_ptr released_ptr = std::move (types[0]); + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to hackily converted parsed type to trait bound"); + add_error (std::move (error)); + + return nullptr; + } + bounds.push_back (std::move (converted_bound)); + + t = lexer.peek_token (); + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // attempt to parse typeparambound + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error if null + break; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), left_delim_locus, + false)); + } + else + { + // release vector pointer + std::unique_ptr released_ptr = std::move (types[0]); + /* HACK: attempt to convert to trait bound. if fails, parenthesised + * type */ + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + // parenthesised type + return std::unique_ptr ( + new AST::ParenthesisedType (std::move (released_ptr), + left_delim_locus)); + } + else + { + // trait object type (one bound) + + // get value semantics trait bound + AST::TraitBound value_bound (*converted_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (value_bound, + left_delim_locus)); + } + } + } + else + { + return std::unique_ptr ( + new AST::TupleType (std::move (types), left_delim_locus)); + } + /* TODO: ensure that this ensures that dynamic dispatch for traits is not + * lost somehow */ +} + +/* Parses a type that has 'for' as its first character. This means it has a + * "for lifetimes", so returns either a BareFunctionType, TraitObjectType, or + * TraitObjectTypeOneBound depending on following characters. */ +template +std::unique_ptr +Parser::parse_for_prefixed_type () +{ + location_t for_locus = lexer.peek_token ()->get_locus (); + // parse for lifetimes in type + std::vector for_lifetimes = parse_for_lifetimes (); + + // branch on next token - either function or a trait type + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_KW: + case FN_KW: + return parse_bare_function_type (std::move (for_lifetimes)); + case SCOPE_RESOLUTION: + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + { + // path, so trait type + + // parse type path to finish parsing trait bound + AST::TypePath path = parse_type_path (); + + t = lexer.peek_token (); + if (t->get_id () != PLUS) + { + // must be one-bound trait type + // create trait bound value object + AST::TraitBound bound (std::move (path), for_locus, false, false, + std::move (for_lifetimes)); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (bound), for_locus)); + } + + /* more than one bound trait type (or at least parsed as it - could be + * trailing '+') create trait bound pointer and bounds */ + std::unique_ptr initial_bound ( + new AST::TraitBound (std::move (path), for_locus, false, false, + std::move (for_lifetimes))); + std::vector> bounds; + bounds.push_back (std::move (initial_bound)); + + while (t->get_id () == PLUS) + { + lexer.skip_token (); + + // parse type param bound if it exists + std::unique_ptr bound + = parse_type_param_bound (); + if (bound == nullptr) + { + // not an error - e.g. trailing plus + return nullptr; + } + bounds.push_back (std::move (bound)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TraitObjectType (std::move (bounds), for_locus, false)); + } + default: + // error + add_error (Error (t->get_locus (), + "unrecognised token %qs in bare function type or trait " + "object type or trait object type one bound", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a maybe named param used in bare function types. +template +AST::MaybeNamedParam +Parser::parse_maybe_named_param (AST::AttrVec outer_attrs) +{ + /* Basically guess that param is named if first token is identifier or + * underscore and second token is semicolon. This should probably have no + * exceptions. rustc uses backtracking to parse these, but at the time of + * writing gccrs has no backtracking capabilities. */ + const_TokenPtr current = lexer.peek_token (); + const_TokenPtr next = lexer.peek_token (1); + + Identifier name; + AST::MaybeNamedParam::ParamKind kind = AST::MaybeNamedParam::UNNAMED; + + if (current->get_id () == IDENTIFIER && next->get_id () == COLON) + { + // named param + name = {current}; + kind = AST::MaybeNamedParam::IDENTIFIER; + lexer.skip_token (1); + } + else if (current->get_id () == UNDERSCORE && next->get_id () == COLON) + { + // wildcard param + name = {Values::Keywords::UNDERSCORE, current->get_locus ()}; + kind = AST::MaybeNamedParam::WILDCARD; + lexer.skip_token (1); + } + + // parse type (required) + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse type in maybe named param"); + add_error (std::move (error)); + + return AST::MaybeNamedParam::create_error (); + } + + return AST::MaybeNamedParam (std::move (name), kind, std::move (type), + std::move (outer_attrs), current->get_locus ()); +} + +/* Parses a bare function type (with the given for lifetimes for convenience - + * does not parse them itself). */ +template +std::unique_ptr +Parser::parse_bare_function_type ( + std::vector for_lifetimes) +{ + // TODO: pass in for lifetime location as param + location_t best_try_locus = lexer.peek_token ()->get_locus (); + + AST::FunctionQualifiers qualifiers = parse_function_qualifiers (); + + if (!skip_token (FN_KW)) + return nullptr; + + if (!skip_token (LEFT_PAREN)) + return nullptr; + + // parse function params, if they exist + std::vector params; + bool is_variadic = false; + AST::AttrVec variadic_attrs; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN) + { + AST::AttrVec temp_attrs = parse_outer_attributes (); + + if (lexer.peek_token ()->get_id () == ELLIPSIS) + { + lexer.skip_token (); + is_variadic = true; + variadic_attrs = std::move (temp_attrs); + + t = lexer.peek_token (); + + if (t->get_id () != RIGHT_PAREN) + { + Error error (t->get_locus (), + "expected right parentheses after variadic in maybe " + "named function " + "parameters, found %qs", + t->get_token_description ()); + add_error (std::move (error)); + + return nullptr; + } + + break; + } + + AST::MaybeNamedParam param + = parse_maybe_named_param (std::move (temp_attrs)); + if (param.is_error ()) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse maybe named param in bare function type"); + add_error (std::move (error)); + + return nullptr; + } + params.push_back (std::move (param)); + + if (lexer.peek_token ()->get_id () != COMMA) + break; + + lexer.skip_token (); + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + return nullptr; + + // bare function return type, if exists + std::unique_ptr return_type = nullptr; + if (lexer.peek_token ()->get_id () == RETURN_TYPE) + { + lexer.skip_token (); + + // parse required TypeNoBounds + return_type = parse_type_no_bounds (); + if (return_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse return type (type no bounds) in bare " + "function type"); + add_error (std::move (error)); + + return nullptr; + } + } + + return std::unique_ptr ( + new AST::BareFunctionType (std::move (for_lifetimes), + std::move (qualifiers), std::move (params), + is_variadic, std::move (variadic_attrs), + std::move (return_type), best_try_locus)); +} + +template +std::unique_ptr +Parser::parse_reference_type_inner (location_t locus) +{ + // parse optional lifetime + AST::Lifetime lifetime = AST::Lifetime::elided (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + auto parsed_lifetime = parse_lifetime (true); + if (parsed_lifetime) + { + lifetime = parsed_lifetime.value (); + } + else + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse lifetime in reference type"); + add_error (std::move (error)); + + return nullptr; + } + } + + bool is_mut = false; + if (lexer.peek_token ()->get_id () == MUT) + { + lexer.skip_token (); + is_mut = true; + } + + // parse type no bounds, which is required + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse referenced type in reference type"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::ReferenceType (is_mut, std::move (type), locus, + std::move (lifetime))); +} + +// Parses a reference type (mutable or immutable, with given lifetime). +template +std::unique_ptr +Parser::parse_reference_type () +{ + auto t = lexer.peek_token (); + auto locus = t->get_locus (); + + switch (t->get_id ()) + { + case AMP: + skip_token (AMP); + return parse_reference_type_inner (locus); + case LOGICAL_AND: + skip_token (LOGICAL_AND); + return std::unique_ptr ( + new AST::ReferenceType (false, parse_reference_type_inner (locus), + locus)); + default: + rust_unreachable (); + } +} + +// Parses a raw (unsafe) pointer type. +template +std::unique_ptr +Parser::parse_raw_pointer_type () +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (ASTERISK); + + AST::RawPointerType::PointerType kind = AST::RawPointerType::CONST; + + // branch on next token for pointer kind info + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case MUT: + kind = AST::RawPointerType::MUT; + lexer.skip_token (); + break; + case CONST: + kind = AST::RawPointerType::CONST; + lexer.skip_token (); + break; + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs in raw pointer type", + t->get_token_description ())); + + return nullptr; + } + + // parse type no bounds (required) + std::unique_ptr type = parse_type_no_bounds (); + if (type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pointed type of raw pointer type"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::RawPointerType (kind, std::move (type), locus)); +} + +/* Parses a slice or array type, depending on following arguments (as + * lookahead is not possible). */ +template +std::unique_ptr +Parser::parse_slice_or_array_type () +{ + location_t locus = lexer.peek_token ()->get_locus (); + skip_token (LEFT_SQUARE); + + // parse inner type (required) + std::unique_ptr inner_type = parse_type (); + if (inner_type == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse inner type in slice or array type"); + add_error (std::move (error)); + + return nullptr; + } + + // branch on next token + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_SQUARE: + // slice type + lexer.skip_token (); + + return std::unique_ptr ( + new AST::SliceType (std::move (inner_type), locus)); + case SEMICOLON: + { + // array type + lexer.skip_token (); + + // parse required array size expression + auto size = parse_anon_const (); + + if (!size) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse size expression in array type"); + add_error (std::move (error)); + + return nullptr; + } + + if (!skip_token (RIGHT_SQUARE)) + { + return nullptr; + } + + return std::unique_ptr ( + new AST::ArrayType (std::move (inner_type), std::move (*size), + locus)); + } + default: + // error + add_error ( + Error (t->get_locus (), + "unrecognised token %qs in slice or array type after inner type", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a type, taking into account type boundary disambiguation. +template +std::unique_ptr +Parser::parse_type_no_bounds () +{ + const_TokenPtr t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + // never type - can't be macro as no path beforehand + lexer.skip_token (); + return std::unique_ptr ( + new AST::NeverType (t->get_locus ())); + case LEFT_SQUARE: + // slice type or array type - requires further disambiguation + return parse_slice_or_array_type (); + case LEFT_SHIFT: + case LEFT_ANGLE: + { + // qualified path in type + AST::QualifiedPathInType path = parse_qualified_path_in_type (); + if (path.is_error ()) + { + Error error (t->get_locus (), + "failed to parse qualified path in type"); + add_error (std::move (error)); + + return nullptr; + } + return std::unique_ptr ( + new AST::QualifiedPathInType (std::move (path))); + } + case UNDERSCORE: + // inferred type + lexer.skip_token (); + return std::unique_ptr ( + new AST::InferredType (t->get_locus ())); + case ASTERISK: + // raw pointer type + return parse_raw_pointer_type (); + case AMP: // does this also include AMP_AMP? Yes! Which is... LOGICAL_AND? + case LOGICAL_AND: + // reference type + return parse_reference_type (); + case LIFETIME: + /* probably a lifetime bound, so probably type param bounds in + * TraitObjectType. this is not allowed, but detection here for error + * message */ + add_error (Error (t->get_locus (), + "lifetime bounds (i.e. in type param bounds, in " + "TraitObjectType) are not allowed as TypeNoBounds")); + + return nullptr; + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: + { + // macro invocation or type path - requires further disambiguation. + /* for parsing path component of each rule, perhaps parse it as a + * typepath and attempt conversion to simplepath if a trailing '!' is + * found */ + /* Type path also includes TraitObjectTypeOneBound BUT if it starts + * with it, it is exactly the same as a TypePath syntactically, so + * this is a syntactical ambiguity. As such, the parser will parse it + * as a TypePath. This, however, does not prevent TraitObjectType from + * starting with a typepath. */ + + // parse path as type path + AST::TypePath path = parse_type_path (); + if (path.is_error ()) + { + Error error ( + t->get_locus (), + "failed to parse path as first component of type no bounds"); + add_error (std::move (error)); + + return nullptr; + } + location_t locus = path.get_locus (); + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case EXCLAM: + { + // macro invocation + // convert to simple path + AST::SimplePath macro_path = path.as_simple_path (); + if (macro_path.is_empty ()) + { + Error error (t->get_locus (), + "failed to parse simple path in macro " + "invocation (for type)"); + add_error (std::move (error)); + + return nullptr; + } + + lexer.skip_token (); + + auto tok_tree = parse_delim_token_tree (); + if (!tok_tree) + return nullptr; + + return AST::MacroInvocation::Regular ( + AST::MacroInvocData (std::move (macro_path), + std::move (tok_tree.value ())), + {}, locus); + } + default: + // assume that this is a type path and not an error + return std::unique_ptr ( + new AST::TypePath (std::move (path))); + } + } + case LEFT_PAREN: + /* tuple type or parenthesised type - requires further disambiguation + * (the usual). ok apparently can be a parenthesised TraitBound too, so + * could be TraitObjectTypeOneBound */ + return parse_paren_prefixed_type_no_bounds (); + case FOR: + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_KW: + case FN_KW: + // bare function type (with no for lifetimes) + return parse_bare_function_type (std::vector ()); + case IMPL: + lexer.skip_token (); + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* cannot be one bound because lifetime prevents it from being + * traitbound not allowed as type no bounds, only here for error + * message */ + Error error ( + lexer.peek_token ()->get_locus (), + "lifetime (probably lifetime bound, in type param " + "bounds, in ImplTraitType) is not allowed in TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse ImplTraitTypeOneBound bound"); + add_error (std::move (error)); + + return nullptr; + } + + location_t locus = t->get_locus (); + + // ensure not a trait with multiple bounds + t = lexer.peek_token (); + if (t->get_id () == PLUS) + { + Error error (t->get_locus (), + "plus after trait bound means an ImplTraitType, " + "which is not allowed as a TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + return std::unique_ptr ( + new AST::ImplTraitTypeOneBound (std::move (initial_bound), locus)); + } + case DYN: + case QUESTION_MARK: + { + // either TraitObjectTypeOneBound + bool has_dyn = false; + if (t->get_id () == DYN) + { + lexer.skip_token (); + has_dyn = true; + } + + if (lexer.peek_token ()->get_id () == LIFETIME) + { + /* means that cannot be TraitObjectTypeOneBound - so here for + * error message */ + Error error (lexer.peek_token ()->get_locus (), + "lifetime as bound in TraitObjectTypeOneBound " + "is not allowed, so cannot be TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // should be trait bound, so parse trait bound + std::unique_ptr initial_bound = parse_trait_bound (); + if (initial_bound == nullptr) + { + Error error ( + lexer.peek_token ()->get_locus (), + "failed to parse TraitObjectTypeOneBound initial bound"); + add_error (std::move (error)); + + return nullptr; + } + + location_t locus = t->get_locus (); + + // detect error with plus as next token + t = lexer.peek_token (); + if (t->get_id () == PLUS) + { + Error error (t->get_locus (), + "plus after trait bound means a TraitObjectType, " + "which is not allowed as a TypeNoBounds"); + add_error (std::move (error)); + + return nullptr; + } + + // convert trait bound to value object + AST::TraitBound value_bound (*initial_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (std::move (value_bound), locus, + has_dyn)); + } + default: + add_error (Error (t->get_locus (), + "unrecognised token %qs in type no bounds", + t->get_token_description ())); + + return nullptr; + } +} + +// Parses a type no bounds beginning with '('. +template +std::unique_ptr +Parser::parse_paren_prefixed_type_no_bounds () +{ + /* NOTE: this could probably be parsed without the HACK solution of + * parse_paren_prefixed_type, but I was lazy. So FIXME for future.*/ + + /* NOTE: again, syntactical ambiguity of a parenthesised trait bound is + * considered a trait bound, not a parenthesised type, so that it can still + * be used in type param bounds. */ + + location_t left_paren_locus = lexer.peek_token ()->get_locus (); + + // skip left delim + lexer.skip_token (); + /* while next token isn't close delim, parse comma-separated types, saving + * whether trailing comma happens */ + const_TokenPtr t = lexer.peek_token (); + bool trailing_comma = true; + std::vector> types; + + while (t->get_id () != RIGHT_PAREN) + { + std::unique_ptr type = parse_type (); + if (type == nullptr) + { + Error error (t->get_locus (), + "failed to parse type inside parentheses (probably " + "tuple or parenthesised)"); + add_error (std::move (error)); + + return nullptr; + } + types.push_back (std::move (type)); + + t = lexer.peek_token (); + if (t->get_id () != COMMA) + { + trailing_comma = false; + break; + } + lexer.skip_token (); + + t = lexer.peek_token (); + } + + if (!skip_token (RIGHT_PAREN)) + { + return nullptr; + } + + // if only one type and no trailing comma, then not a tuple type + if (types.size () == 1 && !trailing_comma) + { + // must be a TraitObjectType (with more than one bound) + if (lexer.peek_token ()->get_id () == PLUS) + { + // error - this is not allowed for type no bounds + Error error (lexer.peek_token ()->get_locus (), + "plus (implying TraitObjectType as type param " + "bounds) is not allowed in type no bounds"); + add_error (std::move (error)); + + return nullptr; + } + else + { + // release vector pointer + std::unique_ptr released_ptr = std::move (types[0]); + /* HACK: attempt to convert to trait bound. if fails, parenthesised + * type */ + std::unique_ptr converted_bound ( + released_ptr->to_trait_bound (true)); + if (converted_bound == nullptr) + { + // parenthesised type + return std::unique_ptr ( + new AST::ParenthesisedType (std::move (released_ptr), + left_paren_locus)); + } + else + { + // trait object type (one bound) + + // get value semantics trait bound + AST::TraitBound value_bound (*converted_bound); + + return std::unique_ptr ( + new AST::TraitObjectTypeOneBound (value_bound, + left_paren_locus)); + } + } + } + else + { + return std::unique_ptr ( + new AST::TupleType (std::move (types), left_paren_locus)); + } + /* TODO: ensure that this ensures that dynamic dispatch for traits is not + * lost somehow */ +} + +// Parses tuple struct items if they exist. Does not parse parentheses. +template +std::unique_ptr +Parser::parse_tuple_struct_items () +{ + std::vector> lower_patterns; + + // DEBUG + rust_debug ("started parsing tuple struct items"); + + // check for '..' at front + if (lexer.peek_token ()->get_id () == DOT_DOT) + { + // only parse upper patterns + lexer.skip_token (); + + // DEBUG + rust_debug ("'..' at front in tuple struct items detected"); + + std::vector> upper_patterns; + + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern, which is now required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + // DEBUG + rust_debug ( + "finished parsing tuple struct items ranged (upper/none only)"); + + return std::unique_ptr ( + new AST::TupleStructItemsHasRest (std::move (lower_patterns), + std::move (upper_patterns))); + } + + // has at least some lower patterns + const_TokenPtr t = lexer.peek_token (); + while (t->get_id () != RIGHT_PAREN && t->get_id () != DOT_DOT) + { + // DEBUG + rust_debug ("about to parse pattern in tuple struct items"); + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (t->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + lower_patterns.push_back (std::move (pattern)); + + // DEBUG + rust_debug ("successfully parsed pattern in tuple struct items"); + + if (lexer.peek_token ()->get_id () != COMMA) + { + // DEBUG + rust_debug ("broke out of parsing patterns in tuple struct " + "items as no comma"); + + break; + } + lexer.skip_token (); + t = lexer.peek_token (); + } + + // branch on next token + t = lexer.peek_token (); + switch (t->get_id ()) + { + case RIGHT_PAREN: + return std::unique_ptr ( + new AST::TupleStructItemsNoRest (std::move (lower_patterns))); + case DOT_DOT: + { + // has an upper range that must be parsed separately + lexer.skip_token (); + + std::vector> upper_patterns; + + t = lexer.peek_token (); + while (t->get_id () == COMMA) + { + lexer.skip_token (); + + // break if next token is right paren + if (lexer.peek_token ()->get_id () == RIGHT_PAREN) + break; + + // parse pattern, which is required + std::unique_ptr pattern = parse_pattern (); + if (pattern == nullptr) + { + Error error (lexer.peek_token ()->get_locus (), + "failed to parse pattern in tuple struct items"); + add_error (std::move (error)); + + return nullptr; + } + upper_patterns.push_back (std::move (pattern)); + + t = lexer.peek_token (); + } + + return std::unique_ptr ( + new AST::TupleStructItemsHasRest (std::move (lower_patterns), + std::move (upper_patterns))); + } + default: + // error + add_error (Error (t->get_locus (), + "unexpected token %qs in tuple struct items", + t->get_token_description ())); + + return nullptr; + } +} + +/* Parses a statement or expression (depending on whether a trailing semicolon + * exists). Useful for block expressions where it cannot be determined through + * lookahead whether it is a statement or expression to be parsed. */ +template +ExprOrStmt +Parser::parse_stmt_or_expr () +{ + // quick exit for empty statement + const_TokenPtr t = lexer.peek_token (); + if (t->get_id () == SEMICOLON) + { + lexer.skip_token (); + std::unique_ptr stmt ( + new AST::EmptyStmt (t->get_locus ())); + return ExprOrStmt (std::move (stmt)); + } + + // parse outer attributes + AST::AttrVec outer_attrs = parse_outer_attributes (); + ParseRestrictions restrictions; + restrictions.expr_can_be_stmt = true; + std::unique_ptr expr; + + // parsing this will be annoying because of the many different possibilities + /* best may be just to copy paste in parse_item switch, and failing that try + * to parse outer attributes, and then pass them in to either a let + * statement or (fallback) expression statement. */ + // FIXME: think of a way to do this without such a large switch? + + /* FIXME: for expressions at least, the only way that they can really be + * parsed properly in this way is if they don't support operators on them. + * They must be pratt-parsed otherwise. As such due to composability, only + * explicit statements will have special cases here. This should roughly + * correspond to "expr-with-block", but this warning is here in case it + * isn't the case. */ + t = lexer.peek_token (); + switch (t->get_id ()) + { + case LET: + { + // let statement + std::unique_ptr stmt ( + parse_let_stmt (std::move (outer_attrs))); + return ExprOrStmt (std::move (stmt)); + } + case PUB: + case MOD: + case EXTERN_KW: + case USE: + case FN_KW: + case TYPE: + case STRUCT_KW: + case ENUM_KW: + case CONST: + case STATIC_KW: + case AUTO: + case TRAIT: + case IMPL: + { + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + /* TODO: implement union keyword but not really because of + * context-dependence crappy hack way to parse a union written below to + * separate it from the good code. */ + // case UNION: + case UNSAFE: + { // maybe - unsafe traits are a thing + /* if any of these (should be all possible VisItem prefixes), parse a + * VisItem - can't parse item because would require reparsing outer + * attributes */ + const_TokenPtr t2 = lexer.peek_token (1); + switch (t2->get_id ()) + { + case LEFT_CURLY: + { + // unsafe block: parse as expression + expr = parse_expr (std::move (outer_attrs), restrictions); + break; + } + case AUTO: + case TRAIT: + { + // unsafe trait + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + case EXTERN_KW: + case FN_KW: + { + // unsafe function + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + case IMPL: + { + // unsafe trait impl + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + default: + add_error (Error (t2->get_locus (), + "unrecognised token %qs after parsing unsafe - " + "expected beginning of expression or statement", + t->get_token_description ())); + + // skip somewhere? + return ExprOrStmt::create_error (); + } + break; + } + /* FIXME: this is either a macro invocation or macro invocation semi. + * start parsing to determine which one it is. */ + // FIXME: old code there + + // crappy hack to do union "keyword" + case IDENTIFIER: + if (t->get_str () == Values::WeakKeywords::UNION + && lexer.peek_token (1)->get_id () == IDENTIFIER) + { + std::unique_ptr item ( + parse_vis_item (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + // or should this go straight to parsing union? + } + else if (t->get_str () == Values::WeakKeywords::MACRO_RULES + && lexer.peek_token (1)->get_id () == EXCLAM) + { + // macro_rules! macro item + std::unique_ptr item ( + parse_macro_rules_def (std::move (outer_attrs))); + return ExprOrStmt (std::move (item)); + } + gcc_fallthrough (); + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case SCOPE_RESOLUTION: + case DOLLAR_SIGN: + { + AST::PathInExpression path = parse_path_in_expression (); + std::unique_ptr null_denotation; + + if (lexer.peek_token ()->get_id () == EXCLAM) + { + std::unique_ptr invoc + = parse_macro_invocation_partial (std::move (path), + std::move (outer_attrs)); + if (invoc == nullptr) + return ExprOrStmt::create_error (); + + if (restrictions.consume_semi && maybe_skip_token (SEMICOLON)) + { + invoc->add_semicolon (); + // Macro invocation with semicolon. + return ExprOrStmt ( + std::unique_ptr (std::move (invoc))); + } + + TokenId after_macro = lexer.peek_token ()->get_id (); + + AST::DelimType delim_type = invoc->get_invoc_data () + .get_delim_tok_tree () + .get_delim_type (); + + if (delim_type == AST::CURLY && after_macro != DOT + && after_macro != QUESTION_MARK) + { + rust_debug ("braced macro statement"); + return ExprOrStmt ( + std::unique_ptr (std::move (invoc))); + } + + null_denotation = std::move (invoc); + } + else + { + null_denotation + = null_denotation_path (std::move (path), {}, restrictions); + } + + expr = left_denotations (std::move (null_denotation), LBP_LOWEST, + std::move (outer_attrs), restrictions); + break; + } + default: + /* expression statement or expression itself - parse + * expression then make it statement if semi afterwards */ + expr = parse_expr (std::move (outer_attrs), restrictions); + break; + } + + const_TokenPtr after_expr = lexer.peek_token (); + if (after_expr->get_id () == SEMICOLON) + { + // must be expression statement + lexer.skip_token (); + + if (expr) + { + std::unique_ptr stmt ( + new AST::ExprStmt (std::move (expr), t->get_locus (), true)); + return ExprOrStmt (std::move (stmt)); + } + else + { + return ExprOrStmt::create_error (); + } + } + + if (expr && !expr->is_expr_without_block () + && after_expr->get_id () != RIGHT_CURLY) + { + // block expression statement. + std::unique_ptr stmt ( + new AST::ExprStmt (std::move (expr), t->get_locus (), false)); + return ExprOrStmt (std::move (stmt)); + } + + // return expression + return ExprOrStmt (std::move (expr)); +} + +} // namespace Rust + +#include "rust-parse-impl-utils.hxx" +#include "rust-parse-impl-attribute.hxx" +#include "rust-parse-impl-ttree.hxx" +#include "rust-parse-impl-macro.hxx" +#include "rust-parse-impl-path.hxx" +#include "rust-parse-impl-pattern.hxx" +#include "rust-parse-impl-expr.hxx" diff --git a/gcc/rust/parse/rust-parse-utils.h b/gcc/rust/parse/rust-parse-utils.h index 1791f6e839f..9d937206e69 100644 --- a/gcc/rust/parse/rust-parse-utils.h +++ b/gcc/rust/parse/rust-parse-utils.h @@ -65,6 +65,60 @@ is_simple_path_segment (TokenId id) } } +/* Returns whether the token id is (or is likely to be) a right angle bracket. + * i.e. '>', '>>', '>=' and '>>=' tokens. */ +inline bool +is_right_angle_tok (TokenId id) +{ + switch (id) + { + case RIGHT_ANGLE: + case RIGHT_SHIFT: + case GREATER_OR_EQUAL: + case RIGHT_SHIFT_EQ: + return true; + default: + return false; + } +} + +/* Returns whether the token can start a type (i.e. there is a valid type + * beginning with the token). */ +inline bool +can_tok_start_type (TokenId id) +{ + switch (id) + { + case EXCLAM: + case LEFT_SQUARE: + case LEFT_ANGLE: + case UNDERSCORE: + case ASTERISK: + case AMP: + case LIFETIME: + case IDENTIFIER: + case SUPER: + case SELF: + case SELF_ALIAS: + case CRATE: + case DOLLAR_SIGN: + case SCOPE_RESOLUTION: + case LEFT_PAREN: + case FOR: + case ASYNC: + case CONST: + case UNSAFE: + case EXTERN_KW: + case FN_KW: + case IMPL: + case DYN: + case QUESTION_MARK: + return true; + default: + return false; + } +} + } // namespace Utils } // namespace Parse diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index 19b280af965..27c8185433b 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -55,6 +55,106 @@ enum class ParseSelfError NOT_SELF, }; +// Left binding powers of operations. +enum binding_powers +{ + // Highest priority + LBP_HIGHEST = 100, + + LBP_PATH = 95, + + LBP_METHOD_CALL = 90, + + LBP_FIELD_EXPR = 85, + + LBP_FUNCTION_CALL = 80, + LBP_ARRAY_REF = LBP_FUNCTION_CALL, + + LBP_QUESTION_MARK = 75, // unary postfix - counts as left + + LBP_UNARY_PLUS = 70, // Used only when the null denotation is + + LBP_UNARY_MINUS = LBP_UNARY_PLUS, // Used only when the null denotation is - + LBP_UNARY_ASTERISK = LBP_UNARY_PLUS, // deref operator - unary prefix + LBP_UNARY_EXCLAM = LBP_UNARY_PLUS, + LBP_UNARY_AMP = LBP_UNARY_PLUS, + LBP_UNARY_AMP_MUT = LBP_UNARY_PLUS, + + LBP_AS = 65, + + LBP_MUL = 60, + LBP_DIV = LBP_MUL, + LBP_MOD = LBP_MUL, + + LBP_PLUS = 55, + LBP_MINUS = LBP_PLUS, + + LBP_L_SHIFT = 50, + LBP_R_SHIFT = LBP_L_SHIFT, + + LBP_AMP = 45, + + LBP_CARET = 40, + + LBP_PIPE = 35, + + LBP_EQUAL = 30, + LBP_NOT_EQUAL = LBP_EQUAL, + LBP_SMALLER_THAN = LBP_EQUAL, + LBP_SMALLER_EQUAL = LBP_EQUAL, + LBP_GREATER_THAN = LBP_EQUAL, + LBP_GREATER_EQUAL = LBP_EQUAL, + + LBP_LOGICAL_AND = 25, + + LBP_LOGICAL_OR = 20, + + LBP_DOT_DOT = 15, + LBP_DOT_DOT_EQ = LBP_DOT_DOT, + + // TODO: note all these assig operators are RIGHT associative! + LBP_ASSIG = 10, + LBP_PLUS_ASSIG = LBP_ASSIG, + LBP_MINUS_ASSIG = LBP_ASSIG, + LBP_MULT_ASSIG = LBP_ASSIG, + LBP_DIV_ASSIG = LBP_ASSIG, + LBP_MOD_ASSIG = LBP_ASSIG, + LBP_AMP_ASSIG = LBP_ASSIG, + LBP_PIPE_ASSIG = LBP_ASSIG, + LBP_CARET_ASSIG = LBP_ASSIG, + LBP_L_SHIFT_ASSIG = LBP_ASSIG, + LBP_R_SHIFT_ASSIG = LBP_ASSIG, + + // return, break, and closures as lowest priority? + LBP_RETURN = 5, + LBP_BREAK = LBP_RETURN, + LBP_CLOSURE = LBP_RETURN, // unary prefix operators + +#if 0 + // rust precedences + // used for closures + PREC_CLOSURE = -40, + // used for break, continue, return, and yield + PREC_JUMP = -30, + // used for range (although weird comment in rustc about this) + PREC_RANGE = -10, + // used for binary operators mentioned below - also cast, colon (type), + // assign, assign_op + PREC_BINOP = FROM_ASSOC_OP, + // used for box, address_of, let, unary (again, weird comment on let) + PREC_PREFIX = 50, + // used for await, call, method call, field, index, try, + // inline asm, macro invocation + PREC_POSTFIX = 60, + // used for array, repeat, tuple, literal, path, paren, if, + // while, for, 'loop', match, block, try block, async, struct + PREC_PAREN = 99, + PREC_FORCE_PAREN = 100, +#endif + + // lowest priority + LBP_LOWEST = 0 +}; + /* HACK: used to resolve the expression-or-statement problem at the end of a * block by allowing either to be returned (technically). Tagged union would * probably take up the same amount of space. */ @@ -244,8 +344,7 @@ template class Parser parse_outer_attribute (); tl::expected, Parse::Error::AttrInput> parse_attr_input (); - std::tuple, location_t> - parse_doc_comment (); + Parse::AttributeBody parse_doc_comment (); // Path-related tl::expected parse_simple_path ();