From f075d236fad3932e8652444d002dfcc6542d0c5a Mon Sep 17 00:00:00 2001 From: Devon Tingley Date: Sat, 4 Mar 2023 07:53:02 -0500 Subject: [PATCH] Ch2 (up to if expressions) --- examples/basic.ms | 8 +- src/lexer/error.rs | 16 ++ src/lexer/mod.rs | 64 +++++-- src/lexer/tokens.rs | 170 +++++++++++++++++-- src/main.rs | 13 +- src/parser/ast.rs | 154 +++++++++++++++++ src/parser/error.rs | 32 ++++ src/parser/mod.rs | 354 +++++++++++++++++++++++++++++++++++++++ src/parser/precedence.rs | 30 ++++ 9 files changed, 801 insertions(+), 40 deletions(-) create mode 100644 src/lexer/error.rs create mode 100644 src/parser/ast.rs create mode 100644 src/parser/error.rs create mode 100644 src/parser/mod.rs create mode 100644 src/parser/precedence.rs diff --git a/examples/basic.ms b/examples/basic.ms index a36f43e..00f69d4 100644 --- a/examples/basic.ms +++ b/examples/basic.ms @@ -1,7 +1 @@ -print(5 + 5); - -if (5 == 5) { - print(true); -} else { - print(false); -} +let x = 5 + 6 + 7; diff --git a/src/lexer/error.rs b/src/lexer/error.rs new file mode 100644 index 0000000..4bdd46c --- /dev/null +++ b/src/lexer/error.rs @@ -0,0 +1,16 @@ +#[derive(Debug)] +pub enum LexerError { + IllegalToken, + InvalidToken, +} + +impl std::error::Error for LexerError {} + +impl std::fmt::Display for LexerError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LexerError::IllegalToken => write!(f, "illegal token found; only ascii is valid"), + LexerError::InvalidToken => write!(f, "improper token type"), + } + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 5794c50..3aa0801 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,27 +1,36 @@ +mod error; mod tokens; use std::{iter::Peekable, str::Chars}; -use tokens::Token; +pub use error::LexerError; +pub use tokens::{InfixOperator, Keyword, PrefixOperator, Term, Token, Tokens}; -pub fn tokenize(input: &str) -> Vec { +pub fn tokenize(input: &str) -> Result { let mut input = input.chars().into_iter().peekable(); - let mut toks = Vec::new(); - while let Some(tok) = next_token(&mut input) { - toks.push(tok) + let mut tokens = Vec::new(); + while let Some(tok) = next_token(&mut input)? { + tokens.push(tok) } - toks + Ok(tokens.into_iter().peekable()) } -fn next_token(input: &mut Peekable) -> Option { - let tok = match input.next()? { +fn next_token(input: &mut Peekable) -> Result, LexerError> { + let tok = match input.next() { + Some(tok) => tok, + None => return Ok(None), + }; + + let tok = match tok { '+' => Token::Plus, '-' => Token::Minus, '*' => Token::Asterisk, '/' => Token::ForwardSlash, + // We increment the token count for each of these if they match + // their duo since the duo takes up characters '!' => { if input.peek() == Some(&'=') { input.next(); @@ -39,14 +48,29 @@ fn next_token(input: &mut Peekable) -> Option { } } - '<' => Token::LessThan, - '>' => Token::GreaterThan, + '<' => { + if input.peek() == Some(&'=') { + input.next(); + Token::LessThanEqual + } else { + Token::LessThan + } + } + '>' => { + if input.peek() == Some(&'=') { + input.next(); + Token::GreaterThanEqual + } else { + Token::GreaterThan + } + } ',' => Token::Comma, ';' => Token::Semicolon, '(' => Token::LeftParenthesis, ')' => Token::RightParenthesis, + '{' => Token::LeftBrace, '}' => Token::RightBrace, @@ -55,12 +79,15 @@ fn next_token(input: &mut Peekable) -> Option { tok if tok.is_ascii_digit() => read_int(input, tok), // Skip whitespace - tok if tok.is_ascii_whitespace() => next_token(input)?, + tok if tok.is_ascii_whitespace() => match next_token(input)? { + Some(tok) => tok, + None => return Ok(None), + }, - _ => Token::Illegal, + _ => return Err(LexerError::IllegalToken), }; - Some(tok) + Ok(Some(tok)) } fn read_ident(input: &mut Peekable, first: char) -> Token { @@ -78,14 +105,14 @@ fn read_ident(input: &mut Peekable, first: char) -> Token { // Check if our ident is a keyword let ident = toks.iter().cloned().collect::(); match ident.as_str() { - "true" => Token::True, - "false" => Token::False, "fn" => Token::Function, "let" => Token::Let, "if" => Token::If, "else" => Token::Else, "return" => Token::Return, + "true" => Token::True, + "false" => Token::False, ident => Token::Ident(ident.to_owned()), } } @@ -107,6 +134,7 @@ fn read_int(input: &mut Peekable, first: char) -> Token { .collect::() .parse::() .unwrap(); + Token::Int(int) } @@ -228,8 +256,8 @@ mod tests { Token::RightParenthesis, Token::Semicolon, ])] - fn test_next_token(#[case] input: &str, #[case] expected: Vec) { - let res = tokenize(input); - assert_eq!(res, expected); + fn test_lexer(#[case] input: &str, #[case] expected: Vec) { + let res = tokenize(input).unwrap(); + assert_eq!(res.collect::>(), expected); } } diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs index d426e22..6c2e205 100644 --- a/src/lexer/tokens.rs +++ b/src/lexer/tokens.rs @@ -1,15 +1,28 @@ -#[derive(Debug, PartialEq, PartialOrd)] -pub enum Token { - Illegal, +use std::{iter::Peekable, vec::IntoIter}; - // Ident + Literals +use super::LexerError; + +pub type Tokens = Peekable>; + +#[derive(Debug, PartialEq, PartialOrd, Clone)] +pub enum Token { + Assign, + + // Keywords + Function, + Let, + If, + Else, + Return, + + // Terms Ident(String), Int(i64), + True, + False, - // Operators + //Operators Bang, - - Assign, Plus, Minus, Asterisk, @@ -19,23 +32,158 @@ pub enum Token { NotEqual, LessThan, GreaterThan, + LessThanEqual, + GreaterThanEqual, // Delimiters Comma, Semicolon, + // Expression groupers LeftParenthesis, RightParenthesis, LeftBrace, RightBrace, +} - // Keywords +// ==================== Token Types ==================== +// Terms +#[derive(Debug, PartialEq, PartialOrd, Clone)] +pub enum Term { + Ident(String), + Int(i64), True, False, +} - Function, - Let, +impl Term { + pub fn is(tok: &Token) -> bool { + match tok { + Token::Ident(_) | Token::Int(_) | Token::True | Token::False => true, + _ => false, + } + } +} + +impl TryFrom for Term { + type Error = LexerError; + + fn try_from(token: Token) -> Result { + let term = match token { + Token::Ident(val) => Term::Ident(val), + Token::Int(val) => Term::Int(val), + Token::True => Term::True, + Token::False => Term::False, + + _ => return Err(LexerError::InvalidToken), + }; + + Ok(term) + } +} + +// Prefix Operators +#[derive(Debug)] +pub enum PrefixOperator { + Bang, + Minus, If, - Else, +} + +impl TryFrom<&Token> for PrefixOperator { + type Error = LexerError; + + fn try_from(token: &Token) -> Result { + let term = match token { + Token::Bang => Self::Bang, + Token::Minus => Self::Minus, + Token::If => Self::If, + + _ => return Err(LexerError::InvalidToken), + }; + + Ok(term) + } +} + +impl PrefixOperator { + pub fn is(tok: &Token) -> bool { + Self::try_from(tok).is_ok() + } +} + +// Infix Operators +#[derive(Debug, PartialEq, PartialOrd, Clone)] +pub enum InfixOperator { + Plus, + Minus, + Asterisk, + ForwardSlash, + + Equal, + NotEqual, + LessThan, + GreaterThan, + LessThanEqual, + GreaterThanEqual, +} + +impl InfixOperator { + pub fn is(tok: &Token) -> bool { + Self::try_from(tok).is_ok() + } +} + +impl TryFrom<&Token> for InfixOperator { + type Error = LexerError; + + fn try_from(token: &Token) -> Result { + let term = match token { + Token::Plus => Self::Plus, + Token::Minus => Self::Minus, + Token::Asterisk => Self::Asterisk, + Token::ForwardSlash => Self::ForwardSlash, + + Token::Equal => Self::Equal, + Token::NotEqual => Self::NotEqual, + Token::LessThan => Self::LessThan, + Token::LessThanEqual => Self::LessThanEqual, + Token::GreaterThan => Self::GreaterThan, + Token::GreaterThanEqual => Self::GreaterThanEqual, + + _ => return Err(LexerError::InvalidToken), + }; + + Ok(term) + } +} + +// Keywords +#[derive(Debug, PartialEq, PartialOrd, Clone)] +pub enum Keyword { + Let, + Function, Return, } + +impl Keyword { + pub fn is(tok: &Token) -> bool { + Self::try_from(tok).is_ok() + } +} + +impl TryFrom<&Token> for Keyword { + type Error = LexerError; + + fn try_from(token: &Token) -> Result { + let term = match token { + Token::Let => Self::Let, + Token::Function => Self::Function, + Token::Return => Self::Return, + + _ => return Err(LexerError::InvalidToken), + }; + + Ok(term) + } +} diff --git a/src/main.rs b/src/main.rs index 942d417..2f64cef 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ use std::io::{self, Write}; use clap::{CommandFactory, Parser}; mod lexer; +mod parser; #[derive(Parser)] #[command(author, version, about)] @@ -18,8 +19,9 @@ fn main() { match args.path { Some(path) => { let input = fs::read_to_string(&path).unwrap(); - let tokens = lexer::tokenize(&input); - println!("{:?}", tokens); + let tokens = lexer::tokenize(&input).unwrap(); + let ast = parser::parse(tokens); + println!("{}", ast); } None => start_repl(cmd.get_version().unwrap()), @@ -37,7 +39,10 @@ fn start_repl(version: &str) { .read_line(&mut input) .expect("failed to read from stdin"); - let tokens = lexer::tokenize(&input); - println!("{:?}", tokens); + let tokens = lexer::tokenize(&input).unwrap(); + let ast = parser::parse(tokens); + println!("{}", ast); + + input.clear(); } } diff --git a/src/parser/ast.rs b/src/parser/ast.rs new file mode 100644 index 0000000..6b1e23e --- /dev/null +++ b/src/parser/ast.rs @@ -0,0 +1,154 @@ +use std::fmt::Display; + +// Program +pub struct Program(Vec); + +impl Program { + pub fn new(vec: Vec) -> Program { + Program(vec) + } +} + +impl Display for Program { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + self.0 + .iter() + .map(|stmt| stmt.to_string()) + .collect::>() + .join("\n") + ) + } +} + +// Statements +#[derive(Debug)] +pub enum Node { + Let(String, Expression), + Return(Expression), + Block(Vec), + Expression(Expression), +} + +impl Display for Node { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Node::Let(name, val) => write!(f, "let {} be {}", name, val), + Node::Return(val) => write!(f, "returning {}", val), + Node::Block(vec) => write!( + f, + "{{ {} }}", + vec.iter() + .map(|node| node.to_string()) + .collect::>() + .join("\n") + ), + Node::Expression(val) => write!(f, "{}", val), + } + } +} + +// Expressions +#[derive(Debug)] +pub enum Expression { + Identifier(String), + + // Literals + Integer(i64), + Bool(bool), + + // Prefix Operators + Not(Box), + Negative(Box), + + // Infix Operators + Add(Box, Box), + Subtract(Box, Box), + Multiply(Box, Box), + Divide(Box, Box), + + Equal(Box, Box), + NotEqual(Box, Box), + LessThan(Box, Box), + GreaterThan(Box, Box), + LessThanEqual(Box, Box), + GreaterThanEqual(Box, Box), + + If { + condition: Box, + consequence: Box, + alternative: Option>, + }, +} + +impl Expression { + pub fn is_bool(&self) -> bool { + match *self { + Expression::Identifier(_) + | Expression::Not(_) + | Expression::Bool(_) + | Expression::Equal(_, _) + | Expression::NotEqual(_, _) + | Expression::LessThan(_, _) + | Expression::GreaterThan(_, _) + | Expression::LessThanEqual(_, _) + | Expression::GreaterThanEqual(_, _) => true, + _ => false, + } + } + + pub fn is_numeric(&self) -> bool { + match *self { + Expression::Identifier(_) + | Expression::Negative(_) + | Expression::Integer(_) + | Expression::Add(_, _) + | Expression::Subtract(_, _) + | Expression::Multiply(_, _) + | Expression::Divide(_, _) => true, + _ => false, + } + } +} + +impl Display for Expression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expression::Identifier(val) => write!(f, "{}", val), + Expression::Integer(val) => write!(f, "{}", val), + Expression::Bool(val) => write!(f, "{}", val), + + Expression::Not(expr) => write!(f, "(!{})", expr), + Expression::Negative(expr) => write!(f, "(-{})", expr), + + Expression::Add(rhs, lhs) => write!(f, "({} + {})", rhs, lhs), + Expression::Subtract(rhs, lhs) => write!(f, "({} - {})", rhs, lhs), + Expression::Multiply(rhs, lhs) => write!(f, "({} * {})", rhs, lhs), + Expression::Divide(rhs, lhs) => write!(f, "({} / {})", rhs, lhs), + + Expression::Equal(rhs, lhs) => write!(f, "({} == {})", rhs, lhs), + Expression::NotEqual(rhs, lhs) => write!(f, "({} != {})", rhs, lhs), + Expression::LessThan(rhs, lhs) => write!(f, "({} < {})", rhs, lhs), + Expression::GreaterThan(rhs, lhs) => write!(f, "({} > {})", rhs, lhs), + Expression::LessThanEqual(rhs, lhs) => write!(f, "({} <= {})", rhs, lhs), + Expression::GreaterThanEqual(rhs, lhs) => write!(f, "({} >= {})", rhs, lhs), + + Expression::If { + condition, + consequence, + alternative, + } => write!( + f, + "if {} then {} else {}", + condition, + consequence, + match alternative { + Some(expr) => expr.to_string(), + None => "N/A".to_string(), + } + ), + } + } +} diff --git a/src/parser/error.rs b/src/parser/error.rs new file mode 100644 index 0000000..f91f6c3 --- /dev/null +++ b/src/parser/error.rs @@ -0,0 +1,32 @@ +use std::fmt::Display; + +use crate::lexer::Token; + +#[derive(Debug)] +pub enum ParserError { + EOF, + UnexpectedToken(&'static str, Token), + ExpectedExpression, + ExpectedBoolean, + ExpectedNumeric, + ExpectedBlock, + ExpectedRightParenthesis, +} + +impl Display for ParserError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParserError::EOF => write!(f, "unexpected EOF"), + ParserError::UnexpectedToken(expected, got) => { + write!(f, "invalid token; expected {} got {:?}", expected, got) + } + ParserError::ExpectedExpression => write!(f, "expected expression"), + ParserError::ExpectedBoolean => write!(f, "expected boolean expression"), + ParserError::ExpectedNumeric => write!(f, "expected numeric expression"), + ParserError::ExpectedBlock => write!(f, "expected block"), + ParserError::ExpectedRightParenthesis => write!(f, "expected right parenthesis"), + } + } +} + +impl std::error::Error for ParserError {} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..c653da7 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,354 @@ +use crate::lexer::{InfixOperator, Keyword, PrefixOperator, Term, Token, Tokens}; + +mod ast; +mod error; +mod precedence; + +pub use ast::{Expression, Node, Program}; +pub use error::ParserError; + +use self::precedence::{get_prescedence, Precedence}; + +pub fn parse(tokens: Tokens) -> Program { + let mut tokens = tokens; + let mut ast = Vec::new(); + while let Some(node) = next_node(&mut tokens) { + match node { + Ok(node) => ast.push(node), + // TODO: Handle this more gracefully than a panic + Err(err) => panic!("{}", err), + } + } + + Program::new(ast) +} + +fn next_node<'a>(tokens: &mut Tokens) -> Option> { + let node = match tokens.peek()? { + tok if Keyword::is(tok) => { + let keyword = Keyword::try_from(tok).unwrap(); + + match keyword { + Keyword::Let => parse_let_statement(tokens), + Keyword::Return => parse_return_statement(tokens), + _ => panic!("not implemented"), + } + } + + // Parse Valid expression + tok if PrefixOperator::is(tok) || Term::is(tok) || tok == &Token::LeftParenthesis => { + parse_expression_statement(tokens) + } + + Token::LeftBrace => parse_block_statement(tokens), + + Token::Semicolon => { + // Eat ; + tokens.next(); + next_node(tokens)? + } + + tok => panic!("not implemented: {:?}", tok), + }; + + Some(node) +} + +// Statement parsing +fn parse_let_statement(tokens: &mut Tokens) -> Result { + // Get rid of `let` + tokens.next(); + + let ident = match tokens.next() { + Some(Token::Ident(ident)) => ident, + Some(tok) => return Err(ParserError::UnexpectedToken("identifier", tok)), + None => return Err(ParserError::EOF), + }; + + // Ensure `=` follows the identifier + match tokens.next() { + Some(tok) if tok != Token::Assign => return Err(ParserError::UnexpectedToken("=", tok)), + _ => {} + } + + let val = parse_expression(tokens, None, Precedence::Lowest)?; + + Ok(Node::Let(ident, val)) +} + +fn parse_return_statement(tokens: &mut Tokens) -> Result { + // Get rid of `return` + tokens.next(); + + let val = parse_expression(tokens, None, Precedence::Lowest)?; + Ok(Node::Return(val)) +} + +fn parse_expression_statement(tokens: &mut Tokens) -> Result { + let val = parse_expression(tokens, None, Precedence::Lowest)?; + Ok(Node::Expression(val)) +} + +fn parse_block_statement(tokens: &mut Tokens) -> Result { + let mut statements: Vec = Vec::new(); + + // Ensure block starts with { + if tokens.next() != Some(Token::LeftBrace) { + return Err(ParserError::ExpectedBlock); + }; + + while tokens.peek() != Some(&Token::RightBrace) { + match next_node(tokens) { + Some(Ok(stmt)) => statements.push(stmt), + Some(Err(err)) => return Err(err), + None => return Err(ParserError::EOF), + } + } + + // Eat } + tokens.next(); + + Ok(Node::Block(statements)) +} + +// Expression parsing +fn parse_expression( + tokens: &mut Tokens, + lhs: Option, + precedence: Precedence, +) -> Result { + // If LHS exists, then unwrap it. Otherwise, parse the next token to determine what LHS is + let lhs = match lhs { + Some(lhs) => lhs, + None => match tokens.next() { + // Prefix operators + Some(operator) if PrefixOperator::is(&operator) => { + parse_prefix_operator(tokens, PrefixOperator::try_from(&operator).unwrap())? + } + + // Grouped expressions + Some(Token::LeftParenthesis) => { + let res = parse_expression(tokens, None, Precedence::Lowest)?; + + if tokens.next() != Some(Token::RightParenthesis) { + return Err(ParserError::ExpectedRightParenthesis); + } + + res + } + + // Parse terms + Some(term) if Term::is(&term) => parse_term(term.try_into().unwrap())?, + + Some(_) => return Err(ParserError::ExpectedExpression), + None => return Err(ParserError::EOF), + }, + }; + + let expr = match tokens.peek() { + None + | Some(Token::RightParenthesis) + | Some(Token::LeftBrace) + | Some(Token::RightBrace) + | Some(Token::Semicolon) => return Ok(lhs), + + Some(tok) => match tok { + // Infix Operator + tok if InfixOperator::is(tok) => { + let operator = InfixOperator::try_from(tok).unwrap(); + if precedence >= get_prescedence(&operator) { + return Ok(lhs); + } + + parse_infix_operator(tokens, lhs)? + } + + // Prefix Operator + // Since `-` is a prefix and infix operator, we give way to InfixOperator::Minus first + tok if PrefixOperator::is(tok) => { + let operator = tok.try_into().unwrap(); + parse_prefix_operator(tokens, operator)? + } + + // Term + tok if Term::is(tok) => { + let term = tok.clone().try_into().unwrap(); + parse_term(term)? + } + + // Invalid tokens + _ => return Err(ParserError::ExpectedExpression), + }, + }; + + parse_expression(tokens, Some(expr), precedence) +} + +fn parse_term(token: Term) -> Result { + let res = match token { + // Variables, functions, etc. + Term::Ident(val) => Expression::Identifier(val), + + // Literals + Term::Int(val) => Expression::Integer(val), + Term::True => Expression::Bool(true), + Term::False => Expression::Bool(false), + }; + + Ok(res) +} + +fn parse_prefix_operator( + tokens: &mut Tokens, + operator: PrefixOperator, +) -> Result { + let expr = match operator { + // Not + PrefixOperator::Bang => match parse_expression(tokens, None, Precedence::Prefix)? { + expr if expr.is_bool() => Expression::Not(Box::new(expr)), + _ => return Err(ParserError::ExpectedBoolean), + }, + + // Negative + PrefixOperator::Minus => match parse_expression(tokens, None, Precedence::Prefix)? { + expr if expr.is_numeric() => { + let val = Box::new(expr); + Expression::Negative(val) + } + _ => return Err(ParserError::ExpectedNumeric), + }, + + PrefixOperator::If => { + let condition = parse_expression(tokens, None, Precedence::Lowest)?; + let consequence = parse_block_statement(tokens)?; + + let alternative = if tokens.peek() == Some(&Token::Else) { + // Eat else + tokens.next(); + + Some(Box::new(parse_block_statement(tokens)?)) + } else { + None + }; + + Expression::If { + condition: Box::new(condition), + consequence: Box::new(consequence), + alternative, + } + } + }; + + Ok(expr) +} + +fn parse_infix_operator(tokens: &mut Tokens, lhs: Expression) -> Result { + let operator = match tokens.next() { + Some(operator) if InfixOperator::is(&operator) => { + InfixOperator::try_from(&operator).unwrap() + } + Some(tok) => return Err(ParserError::UnexpectedToken("infix operator", tok)), + None => return Err(ParserError::EOF), + }; + + let lhs = Box::new(lhs); + let rhs = parse_expression(tokens, None, get_prescedence(&operator))?; + let rhs = Box::new(rhs); + + let res = match operator { + InfixOperator::Plus => Expression::Add(lhs, rhs), + InfixOperator::Minus => Expression::Subtract(lhs, rhs), + + InfixOperator::Asterisk => Expression::Multiply(lhs, rhs), + InfixOperator::ForwardSlash => Expression::Divide(lhs, rhs), + + InfixOperator::Equal => Expression::Equal(lhs, rhs), + InfixOperator::NotEqual => Expression::NotEqual(lhs, rhs), + + InfixOperator::GreaterThan => Expression::GreaterThan(lhs, rhs), + InfixOperator::LessThan => Expression::LessThan(lhs, rhs), + InfixOperator::GreaterThanEqual => Expression::GreaterThanEqual(lhs, rhs), + InfixOperator::LessThanEqual => Expression::LessThanEqual(lhs, rhs), + }; + + Ok(res) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::lexer; + use rstest::rstest; + + #[rstest] + #[case("let int = 5", "let int be 5")] + #[case("return 7", "returning 7")] + #[case("let x = 5 + 6", "let x be (5 + 6)")] + #[case("return 5 + 6", "returning (5 + 6)")] + #[case("5 + 6; 7+3", "(5 + 6)\n(7 + 3)")] + #[case("(5 + 5) * 3; 2 + 2", "((5 + 5) * 3)\n(2 + 2)")] + fn test_parser<'a>(#[case] input: &str, #[case] expected: &str) { + let tokens = lexer::tokenize(input).unwrap(); + let res = parse(tokens); + + assert_eq!(&res.to_string(), expected); + } + + #[rstest] + #[case("let x 7")] + #[case("return")] + #[case("let = 8")] + #[should_panic] + fn test_parser_failure(#[case] input: &str) { + let tokens = lexer::tokenize(input).unwrap(); + parse(tokens); + } + + #[rstest] + // Terms + #[case("5", "5")] + #[case("uwu", "uwu")] + #[case("true", "true")] + #[case("false", "false")] + // Prefix operators + #[case("!true", "(!true)")] + #[case("!false", "(!false)")] + #[case("-5", "(-5)")] + // Infix operators + #[case("5 + 6", "(5 + 6)")] + #[case("5 - 6", "(5 - 6)")] + #[case("5 * 6", "(5 * 6)")] + #[case("5 / 6", "(5 / 6)")] + #[case("5 == 6", "(5 == 6)")] + #[case("5 != 6", "(5 != 6)")] + #[case("5 < 6", "(5 < 6)")] + #[case("5 > 6", "(5 > 6)")] + #[case("5 <= 6", "(5 <= 6)")] + #[case("5 >= 6", "(5 >= 6)")] + // Boolean and numeric operators + #[case("3 < 5 == true", "((3 < 5) == true)")] + // Operator associativity + #[case("5 + 6 + 7", "((5 + 6) + 7)")] + #[case("a + b - c", "((a + b) - c)")] + // Operator Prescedence + #[case("5 + 6 * 8", "(5 + (6 * 8))")] + #[case("5 < 7 == 4 > 3", "((5 < 7) == (4 > 3))")] + #[case("5 - 6 * 7 + 2", "((5 - (6 * 7)) + 2)")] + #[case("1 + (2 + 3) + 4", "((1 + (2 + 3)) + 4)")] + #[case("(5 + 5) * 2", "((5 + 5) * 2)")] + fn test_parse_expression(#[case] input: &str, #[case] expected: &str) { + let mut tokens = lexer::tokenize(input).unwrap(); + let res = parse_expression(&mut tokens, None, Precedence::Lowest).unwrap(); + assert_eq!(&res.to_string(), expected); + } + + #[rstest] + #[case("if true { 5 + 5 };", "if true then { (5 + 5) } else N/A")] + #[case("if x > y { x }", "if (x > y) then { x } else N/A")] + #[case("if x > y { x } else { y }", "if (x > y) then { x } else { y }")] + fn test_if_expression(#[case] input: &str, #[case] expected: &str) { + let mut tokens = lexer::tokenize(input).unwrap(); + let res = parse_expression(&mut tokens, None, Precedence::Lowest).unwrap(); + assert_eq!(&res.to_string(), expected); + } +} diff --git a/src/parser/precedence.rs b/src/parser/precedence.rs new file mode 100644 index 0000000..202e872 --- /dev/null +++ b/src/parser/precedence.rs @@ -0,0 +1,30 @@ +use crate::lexer::InfixOperator; + +#[derive(Debug, PartialEq, PartialOrd)] +pub enum Precedence { + Lowest, + Equals, + Ordering, + Sum, + Product, + Prefix, + // Call, +} + +pub(super) fn get_prescedence(tok: &InfixOperator) -> Precedence { + match *tok { + InfixOperator::Equal => Precedence::Equals, + InfixOperator::NotEqual => Precedence::Equals, + + InfixOperator::LessThan => Precedence::Ordering, + InfixOperator::GreaterThan => Precedence::Ordering, + InfixOperator::LessThanEqual => Precedence::Ordering, + InfixOperator::GreaterThanEqual => Precedence::Ordering, + + InfixOperator::Plus => Precedence::Sum, + InfixOperator::Minus => Precedence::Sum, + + InfixOperator::Asterisk => Precedence::Product, + InfixOperator::ForwardSlash => Precedence::Product, + } +}