Ch2 (up to if expressions)
This commit is contained in:
parent
0fd41b6890
commit
f075d236fa
9 changed files with 801 additions and 40 deletions
|
@ -1,7 +1 @@
|
|||
print(5 + 5);
|
||||
|
||||
if (5 == 5) {
|
||||
print(true);
|
||||
} else {
|
||||
print(false);
|
||||
}
|
||||
let x = 5 + 6 + 7;
|
||||
|
|
16
src/lexer/error.rs
Normal file
16
src/lexer/error.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
#[derive(Debug)]
|
||||
pub enum LexerError {
|
||||
IllegalToken,
|
||||
InvalidToken,
|
||||
}
|
||||
|
||||
impl std::error::Error for LexerError {}
|
||||
|
||||
impl std::fmt::Display for LexerError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LexerError::IllegalToken => write!(f, "illegal token found; only ascii is valid"),
|
||||
LexerError::InvalidToken => write!(f, "improper token type"),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,27 +1,36 @@
|
|||
mod error;
|
||||
mod tokens;
|
||||
|
||||
use std::{iter::Peekable, str::Chars};
|
||||
|
||||
use tokens::Token;
|
||||
pub use error::LexerError;
|
||||
pub use tokens::{InfixOperator, Keyword, PrefixOperator, Term, Token, Tokens};
|
||||
|
||||
pub fn tokenize(input: &str) -> Vec<Token> {
|
||||
pub fn tokenize(input: &str) -> Result<Tokens, LexerError> {
|
||||
let mut input = input.chars().into_iter().peekable();
|
||||
|
||||
let mut toks = Vec::new();
|
||||
while let Some(tok) = next_token(&mut input) {
|
||||
toks.push(tok)
|
||||
let mut tokens = Vec::new();
|
||||
while let Some(tok) = next_token(&mut input)? {
|
||||
tokens.push(tok)
|
||||
}
|
||||
|
||||
toks
|
||||
Ok(tokens.into_iter().peekable())
|
||||
}
|
||||
|
||||
fn next_token(input: &mut Peekable<Chars>) -> Option<Token> {
|
||||
let tok = match input.next()? {
|
||||
fn next_token(input: &mut Peekable<Chars>) -> Result<Option<Token>, LexerError> {
|
||||
let tok = match input.next() {
|
||||
Some(tok) => tok,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let tok = match tok {
|
||||
'+' => Token::Plus,
|
||||
'-' => Token::Minus,
|
||||
'*' => Token::Asterisk,
|
||||
'/' => Token::ForwardSlash,
|
||||
|
||||
// We increment the token count for each of these if they match
|
||||
// their duo since the duo takes up characters
|
||||
'!' => {
|
||||
if input.peek() == Some(&'=') {
|
||||
input.next();
|
||||
|
@ -39,14 +48,29 @@ fn next_token(input: &mut Peekable<Chars>) -> Option<Token> {
|
|||
}
|
||||
}
|
||||
|
||||
'<' => Token::LessThan,
|
||||
'>' => Token::GreaterThan,
|
||||
'<' => {
|
||||
if input.peek() == Some(&'=') {
|
||||
input.next();
|
||||
Token::LessThanEqual
|
||||
} else {
|
||||
Token::LessThan
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
if input.peek() == Some(&'=') {
|
||||
input.next();
|
||||
Token::GreaterThanEqual
|
||||
} else {
|
||||
Token::GreaterThan
|
||||
}
|
||||
}
|
||||
|
||||
',' => Token::Comma,
|
||||
';' => Token::Semicolon,
|
||||
|
||||
'(' => Token::LeftParenthesis,
|
||||
')' => Token::RightParenthesis,
|
||||
|
||||
'{' => Token::LeftBrace,
|
||||
'}' => Token::RightBrace,
|
||||
|
||||
|
@ -55,12 +79,15 @@ fn next_token(input: &mut Peekable<Chars>) -> Option<Token> {
|
|||
tok if tok.is_ascii_digit() => read_int(input, tok),
|
||||
|
||||
// Skip whitespace
|
||||
tok if tok.is_ascii_whitespace() => next_token(input)?,
|
||||
tok if tok.is_ascii_whitespace() => match next_token(input)? {
|
||||
Some(tok) => tok,
|
||||
None => return Ok(None),
|
||||
},
|
||||
|
||||
_ => Token::Illegal,
|
||||
_ => return Err(LexerError::IllegalToken),
|
||||
};
|
||||
|
||||
Some(tok)
|
||||
Ok(Some(tok))
|
||||
}
|
||||
|
||||
fn read_ident(input: &mut Peekable<Chars>, first: char) -> Token {
|
||||
|
@ -78,14 +105,14 @@ fn read_ident(input: &mut Peekable<Chars>, first: char) -> Token {
|
|||
// Check if our ident is a keyword
|
||||
let ident = toks.iter().cloned().collect::<String>();
|
||||
match ident.as_str() {
|
||||
"true" => Token::True,
|
||||
"false" => Token::False,
|
||||
"fn" => Token::Function,
|
||||
"let" => Token::Let,
|
||||
"if" => Token::If,
|
||||
"else" => Token::Else,
|
||||
"return" => Token::Return,
|
||||
|
||||
"true" => Token::True,
|
||||
"false" => Token::False,
|
||||
ident => Token::Ident(ident.to_owned()),
|
||||
}
|
||||
}
|
||||
|
@ -107,6 +134,7 @@ fn read_int(input: &mut Peekable<Chars>, first: char) -> Token {
|
|||
.collect::<String>()
|
||||
.parse::<i64>()
|
||||
.unwrap();
|
||||
|
||||
Token::Int(int)
|
||||
}
|
||||
|
||||
|
@ -228,8 +256,8 @@ mod tests {
|
|||
Token::RightParenthesis,
|
||||
Token::Semicolon,
|
||||
])]
|
||||
fn test_next_token(#[case] input: &str, #[case] expected: Vec<Token>) {
|
||||
let res = tokenize(input);
|
||||
assert_eq!(res, expected);
|
||||
fn test_lexer(#[case] input: &str, #[case] expected: Vec<Token>) {
|
||||
let res = tokenize(input).unwrap();
|
||||
assert_eq!(res.collect::<Vec<Token>>(), expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,28 @@
|
|||
#[derive(Debug, PartialEq, PartialOrd)]
|
||||
pub enum Token {
|
||||
Illegal,
|
||||
use std::{iter::Peekable, vec::IntoIter};
|
||||
|
||||
// Ident + Literals
|
||||
use super::LexerError;
|
||||
|
||||
pub type Tokens = Peekable<IntoIter<Token>>;
|
||||
|
||||
#[derive(Debug, PartialEq, PartialOrd, Clone)]
|
||||
pub enum Token {
|
||||
Assign,
|
||||
|
||||
// Keywords
|
||||
Function,
|
||||
Let,
|
||||
If,
|
||||
Else,
|
||||
Return,
|
||||
|
||||
// Terms
|
||||
Ident(String),
|
||||
Int(i64),
|
||||
True,
|
||||
False,
|
||||
|
||||
//Operators
|
||||
Bang,
|
||||
|
||||
Assign,
|
||||
Plus,
|
||||
Minus,
|
||||
Asterisk,
|
||||
|
@ -19,23 +32,158 @@ pub enum Token {
|
|||
NotEqual,
|
||||
LessThan,
|
||||
GreaterThan,
|
||||
LessThanEqual,
|
||||
GreaterThanEqual,
|
||||
|
||||
// Delimiters
|
||||
Comma,
|
||||
Semicolon,
|
||||
|
||||
// Expression groupers
|
||||
LeftParenthesis,
|
||||
RightParenthesis,
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
}
|
||||
|
||||
// Keywords
|
||||
// ==================== Token Types ====================
|
||||
// Terms
|
||||
#[derive(Debug, PartialEq, PartialOrd, Clone)]
|
||||
pub enum Term {
|
||||
Ident(String),
|
||||
Int(i64),
|
||||
True,
|
||||
False,
|
||||
}
|
||||
|
||||
Function,
|
||||
Let,
|
||||
impl Term {
|
||||
pub fn is(tok: &Token) -> bool {
|
||||
match tok {
|
||||
Token::Ident(_) | Token::Int(_) | Token::True | Token::False => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Token> for Term {
|
||||
type Error = LexerError;
|
||||
|
||||
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||
let term = match token {
|
||||
Token::Ident(val) => Term::Ident(val),
|
||||
Token::Int(val) => Term::Int(val),
|
||||
Token::True => Term::True,
|
||||
Token::False => Term::False,
|
||||
|
||||
_ => return Err(LexerError::InvalidToken),
|
||||
};
|
||||
|
||||
Ok(term)
|
||||
}
|
||||
}
|
||||
|
||||
// Prefix Operators
|
||||
#[derive(Debug)]
|
||||
pub enum PrefixOperator {
|
||||
Bang,
|
||||
Minus,
|
||||
If,
|
||||
Else,
|
||||
}
|
||||
|
||||
impl TryFrom<&Token> for PrefixOperator {
|
||||
type Error = LexerError;
|
||||
|
||||
fn try_from(token: &Token) -> Result<Self, Self::Error> {
|
||||
let term = match token {
|
||||
Token::Bang => Self::Bang,
|
||||
Token::Minus => Self::Minus,
|
||||
Token::If => Self::If,
|
||||
|
||||
_ => return Err(LexerError::InvalidToken),
|
||||
};
|
||||
|
||||
Ok(term)
|
||||
}
|
||||
}
|
||||
|
||||
impl PrefixOperator {
|
||||
pub fn is(tok: &Token) -> bool {
|
||||
Self::try_from(tok).is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
// Infix Operators
|
||||
#[derive(Debug, PartialEq, PartialOrd, Clone)]
|
||||
pub enum InfixOperator {
|
||||
Plus,
|
||||
Minus,
|
||||
Asterisk,
|
||||
ForwardSlash,
|
||||
|
||||
Equal,
|
||||
NotEqual,
|
||||
LessThan,
|
||||
GreaterThan,
|
||||
LessThanEqual,
|
||||
GreaterThanEqual,
|
||||
}
|
||||
|
||||
impl InfixOperator {
|
||||
pub fn is(tok: &Token) -> bool {
|
||||
Self::try_from(tok).is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Token> for InfixOperator {
|
||||
type Error = LexerError;
|
||||
|
||||
fn try_from(token: &Token) -> Result<Self, Self::Error> {
|
||||
let term = match token {
|
||||
Token::Plus => Self::Plus,
|
||||
Token::Minus => Self::Minus,
|
||||
Token::Asterisk => Self::Asterisk,
|
||||
Token::ForwardSlash => Self::ForwardSlash,
|
||||
|
||||
Token::Equal => Self::Equal,
|
||||
Token::NotEqual => Self::NotEqual,
|
||||
Token::LessThan => Self::LessThan,
|
||||
Token::LessThanEqual => Self::LessThanEqual,
|
||||
Token::GreaterThan => Self::GreaterThan,
|
||||
Token::GreaterThanEqual => Self::GreaterThanEqual,
|
||||
|
||||
_ => return Err(LexerError::InvalidToken),
|
||||
};
|
||||
|
||||
Ok(term)
|
||||
}
|
||||
}
|
||||
|
||||
// Keywords
|
||||
#[derive(Debug, PartialEq, PartialOrd, Clone)]
|
||||
pub enum Keyword {
|
||||
Let,
|
||||
Function,
|
||||
Return,
|
||||
}
|
||||
|
||||
impl Keyword {
|
||||
pub fn is(tok: &Token) -> bool {
|
||||
Self::try_from(tok).is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Token> for Keyword {
|
||||
type Error = LexerError;
|
||||
|
||||
fn try_from(token: &Token) -> Result<Self, Self::Error> {
|
||||
let term = match token {
|
||||
Token::Let => Self::Let,
|
||||
Token::Function => Self::Function,
|
||||
Token::Return => Self::Return,
|
||||
|
||||
_ => return Err(LexerError::InvalidToken),
|
||||
};
|
||||
|
||||
Ok(term)
|
||||
}
|
||||
}
|
||||
|
|
13
src/main.rs
13
src/main.rs
|
@ -4,6 +4,7 @@ use std::io::{self, Write};
|
|||
use clap::{CommandFactory, Parser};
|
||||
|
||||
mod lexer;
|
||||
mod parser;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about)]
|
||||
|
@ -18,8 +19,9 @@ fn main() {
|
|||
match args.path {
|
||||
Some(path) => {
|
||||
let input = fs::read_to_string(&path).unwrap();
|
||||
let tokens = lexer::tokenize(&input);
|
||||
println!("{:?}", tokens);
|
||||
let tokens = lexer::tokenize(&input).unwrap();
|
||||
let ast = parser::parse(tokens);
|
||||
println!("{}", ast);
|
||||
}
|
||||
|
||||
None => start_repl(cmd.get_version().unwrap()),
|
||||
|
@ -37,7 +39,10 @@ fn start_repl(version: &str) {
|
|||
.read_line(&mut input)
|
||||
.expect("failed to read from stdin");
|
||||
|
||||
let tokens = lexer::tokenize(&input);
|
||||
println!("{:?}", tokens);
|
||||
let tokens = lexer::tokenize(&input).unwrap();
|
||||
let ast = parser::parse(tokens);
|
||||
println!("{}", ast);
|
||||
|
||||
input.clear();
|
||||
}
|
||||
}
|
||||
|
|
154
src/parser/ast.rs
Normal file
154
src/parser/ast.rs
Normal file
|
@ -0,0 +1,154 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
// Program
|
||||
pub struct Program(Vec<Node>);
|
||||
|
||||
impl Program {
|
||||
pub fn new(vec: Vec<Node>) -> Program {
|
||||
Program(vec)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Program {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
self.0
|
||||
.iter()
|
||||
.map(|stmt| stmt.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Statements
|
||||
#[derive(Debug)]
|
||||
pub enum Node {
|
||||
Let(String, Expression),
|
||||
Return(Expression),
|
||||
Block(Vec<Node>),
|
||||
Expression(Expression),
|
||||
}
|
||||
|
||||
impl Display for Node {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Node::Let(name, val) => write!(f, "let {} be {}", name, val),
|
||||
Node::Return(val) => write!(f, "returning {}", val),
|
||||
Node::Block(vec) => write!(
|
||||
f,
|
||||
"{{ {} }}",
|
||||
vec.iter()
|
||||
.map(|node| node.to_string())
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
),
|
||||
Node::Expression(val) => write!(f, "{}", val),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Expressions
|
||||
#[derive(Debug)]
|
||||
pub enum Expression {
|
||||
Identifier(String),
|
||||
|
||||
// Literals
|
||||
Integer(i64),
|
||||
Bool(bool),
|
||||
|
||||
// Prefix Operators
|
||||
Not(Box<Expression>),
|
||||
Negative(Box<Expression>),
|
||||
|
||||
// Infix Operators
|
||||
Add(Box<Expression>, Box<Expression>),
|
||||
Subtract(Box<Expression>, Box<Expression>),
|
||||
Multiply(Box<Expression>, Box<Expression>),
|
||||
Divide(Box<Expression>, Box<Expression>),
|
||||
|
||||
Equal(Box<Expression>, Box<Expression>),
|
||||
NotEqual(Box<Expression>, Box<Expression>),
|
||||
LessThan(Box<Expression>, Box<Expression>),
|
||||
GreaterThan(Box<Expression>, Box<Expression>),
|
||||
LessThanEqual(Box<Expression>, Box<Expression>),
|
||||
GreaterThanEqual(Box<Expression>, Box<Expression>),
|
||||
|
||||
If {
|
||||
condition: Box<Expression>,
|
||||
consequence: Box<Node>,
|
||||
alternative: Option<Box<Node>>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
pub fn is_bool(&self) -> bool {
|
||||
match *self {
|
||||
Expression::Identifier(_)
|
||||
| Expression::Not(_)
|
||||
| Expression::Bool(_)
|
||||
| Expression::Equal(_, _)
|
||||
| Expression::NotEqual(_, _)
|
||||
| Expression::LessThan(_, _)
|
||||
| Expression::GreaterThan(_, _)
|
||||
| Expression::LessThanEqual(_, _)
|
||||
| Expression::GreaterThanEqual(_, _) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_numeric(&self) -> bool {
|
||||
match *self {
|
||||
Expression::Identifier(_)
|
||||
| Expression::Negative(_)
|
||||
| Expression::Integer(_)
|
||||
| Expression::Add(_, _)
|
||||
| Expression::Subtract(_, _)
|
||||
| Expression::Multiply(_, _)
|
||||
| Expression::Divide(_, _) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Expression {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Expression::Identifier(val) => write!(f, "{}", val),
|
||||
Expression::Integer(val) => write!(f, "{}", val),
|
||||
Expression::Bool(val) => write!(f, "{}", val),
|
||||
|
||||
Expression::Not(expr) => write!(f, "(!{})", expr),
|
||||
Expression::Negative(expr) => write!(f, "(-{})", expr),
|
||||
|
||||
Expression::Add(rhs, lhs) => write!(f, "({} + {})", rhs, lhs),
|
||||
Expression::Subtract(rhs, lhs) => write!(f, "({} - {})", rhs, lhs),
|
||||
Expression::Multiply(rhs, lhs) => write!(f, "({} * {})", rhs, lhs),
|
||||
Expression::Divide(rhs, lhs) => write!(f, "({} / {})", rhs, lhs),
|
||||
|
||||
Expression::Equal(rhs, lhs) => write!(f, "({} == {})", rhs, lhs),
|
||||
Expression::NotEqual(rhs, lhs) => write!(f, "({} != {})", rhs, lhs),
|
||||
Expression::LessThan(rhs, lhs) => write!(f, "({} < {})", rhs, lhs),
|
||||
Expression::GreaterThan(rhs, lhs) => write!(f, "({} > {})", rhs, lhs),
|
||||
Expression::LessThanEqual(rhs, lhs) => write!(f, "({} <= {})", rhs, lhs),
|
||||
Expression::GreaterThanEqual(rhs, lhs) => write!(f, "({} >= {})", rhs, lhs),
|
||||
|
||||
Expression::If {
|
||||
condition,
|
||||
consequence,
|
||||
alternative,
|
||||
} => write!(
|
||||
f,
|
||||
"if {} then {} else {}",
|
||||
condition,
|
||||
consequence,
|
||||
match alternative {
|
||||
Some(expr) => expr.to_string(),
|
||||
None => "N/A".to_string(),
|
||||
}
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
32
src/parser/error.rs
Normal file
32
src/parser/error.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use crate::lexer::Token;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ParserError {
|
||||
EOF,
|
||||
UnexpectedToken(&'static str, Token),
|
||||
ExpectedExpression,
|
||||
ExpectedBoolean,
|
||||
ExpectedNumeric,
|
||||
ExpectedBlock,
|
||||
ExpectedRightParenthesis,
|
||||
}
|
||||
|
||||
impl Display for ParserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
ParserError::EOF => write!(f, "unexpected EOF"),
|
||||
ParserError::UnexpectedToken(expected, got) => {
|
||||
write!(f, "invalid token; expected {} got {:?}", expected, got)
|
||||
}
|
||||
ParserError::ExpectedExpression => write!(f, "expected expression"),
|
||||
ParserError::ExpectedBoolean => write!(f, "expected boolean expression"),
|
||||
ParserError::ExpectedNumeric => write!(f, "expected numeric expression"),
|
||||
ParserError::ExpectedBlock => write!(f, "expected block"),
|
||||
ParserError::ExpectedRightParenthesis => write!(f, "expected right parenthesis"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParserError {}
|
354
src/parser/mod.rs
Normal file
354
src/parser/mod.rs
Normal file
|
@ -0,0 +1,354 @@
|
|||
use crate::lexer::{InfixOperator, Keyword, PrefixOperator, Term, Token, Tokens};
|
||||
|
||||
mod ast;
|
||||
mod error;
|
||||
mod precedence;
|
||||
|
||||
pub use ast::{Expression, Node, Program};
|
||||
pub use error::ParserError;
|
||||
|
||||
use self::precedence::{get_prescedence, Precedence};
|
||||
|
||||
pub fn parse(tokens: Tokens) -> Program {
|
||||
let mut tokens = tokens;
|
||||
let mut ast = Vec::new();
|
||||
while let Some(node) = next_node(&mut tokens) {
|
||||
match node {
|
||||
Ok(node) => ast.push(node),
|
||||
// TODO: Handle this more gracefully than a panic
|
||||
Err(err) => panic!("{}", err),
|
||||
}
|
||||
}
|
||||
|
||||
Program::new(ast)
|
||||
}
|
||||
|
||||
fn next_node<'a>(tokens: &mut Tokens) -> Option<Result<Node, ParserError>> {
|
||||
let node = match tokens.peek()? {
|
||||
tok if Keyword::is(tok) => {
|
||||
let keyword = Keyword::try_from(tok).unwrap();
|
||||
|
||||
match keyword {
|
||||
Keyword::Let => parse_let_statement(tokens),
|
||||
Keyword::Return => parse_return_statement(tokens),
|
||||
_ => panic!("not implemented"),
|
||||
}
|
||||
}
|
||||
|
||||
// Parse Valid expression
|
||||
tok if PrefixOperator::is(tok) || Term::is(tok) || tok == &Token::LeftParenthesis => {
|
||||
parse_expression_statement(tokens)
|
||||
}
|
||||
|
||||
Token::LeftBrace => parse_block_statement(tokens),
|
||||
|
||||
Token::Semicolon => {
|
||||
// Eat ;
|
||||
tokens.next();
|
||||
next_node(tokens)?
|
||||
}
|
||||
|
||||
tok => panic!("not implemented: {:?}", tok),
|
||||
};
|
||||
|
||||
Some(node)
|
||||
}
|
||||
|
||||
// Statement parsing
|
||||
fn parse_let_statement(tokens: &mut Tokens) -> Result<Node, ParserError> {
|
||||
// Get rid of `let`
|
||||
tokens.next();
|
||||
|
||||
let ident = match tokens.next() {
|
||||
Some(Token::Ident(ident)) => ident,
|
||||
Some(tok) => return Err(ParserError::UnexpectedToken("identifier", tok)),
|
||||
None => return Err(ParserError::EOF),
|
||||
};
|
||||
|
||||
// Ensure `=` follows the identifier
|
||||
match tokens.next() {
|
||||
Some(tok) if tok != Token::Assign => return Err(ParserError::UnexpectedToken("=", tok)),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let val = parse_expression(tokens, None, Precedence::Lowest)?;
|
||||
|
||||
Ok(Node::Let(ident, val))
|
||||
}
|
||||
|
||||
fn parse_return_statement(tokens: &mut Tokens) -> Result<Node, ParserError> {
|
||||
// Get rid of `return`
|
||||
tokens.next();
|
||||
|
||||
let val = parse_expression(tokens, None, Precedence::Lowest)?;
|
||||
Ok(Node::Return(val))
|
||||
}
|
||||
|
||||
fn parse_expression_statement(tokens: &mut Tokens) -> Result<Node, ParserError> {
|
||||
let val = parse_expression(tokens, None, Precedence::Lowest)?;
|
||||
Ok(Node::Expression(val))
|
||||
}
|
||||
|
||||
fn parse_block_statement(tokens: &mut Tokens) -> Result<Node, ParserError> {
|
||||
let mut statements: Vec<Node> = Vec::new();
|
||||
|
||||
// Ensure block starts with {
|
||||
if tokens.next() != Some(Token::LeftBrace) {
|
||||
return Err(ParserError::ExpectedBlock);
|
||||
};
|
||||
|
||||
while tokens.peek() != Some(&Token::RightBrace) {
|
||||
match next_node(tokens) {
|
||||
Some(Ok(stmt)) => statements.push(stmt),
|
||||
Some(Err(err)) => return Err(err),
|
||||
None => return Err(ParserError::EOF),
|
||||
}
|
||||
}
|
||||
|
||||
// Eat }
|
||||
tokens.next();
|
||||
|
||||
Ok(Node::Block(statements))
|
||||
}
|
||||
|
||||
// Expression parsing
|
||||
fn parse_expression(
|
||||
tokens: &mut Tokens,
|
||||
lhs: Option<Expression>,
|
||||
precedence: Precedence,
|
||||
) -> Result<Expression, ParserError> {
|
||||
// If LHS exists, then unwrap it. Otherwise, parse the next token to determine what LHS is
|
||||
let lhs = match lhs {
|
||||
Some(lhs) => lhs,
|
||||
None => match tokens.next() {
|
||||
// Prefix operators
|
||||
Some(operator) if PrefixOperator::is(&operator) => {
|
||||
parse_prefix_operator(tokens, PrefixOperator::try_from(&operator).unwrap())?
|
||||
}
|
||||
|
||||
// Grouped expressions
|
||||
Some(Token::LeftParenthesis) => {
|
||||
let res = parse_expression(tokens, None, Precedence::Lowest)?;
|
||||
|
||||
if tokens.next() != Some(Token::RightParenthesis) {
|
||||
return Err(ParserError::ExpectedRightParenthesis);
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
// Parse terms
|
||||
Some(term) if Term::is(&term) => parse_term(term.try_into().unwrap())?,
|
||||
|
||||
Some(_) => return Err(ParserError::ExpectedExpression),
|
||||
None => return Err(ParserError::EOF),
|
||||
},
|
||||
};
|
||||
|
||||
let expr = match tokens.peek() {
|
||||
None
|
||||
| Some(Token::RightParenthesis)
|
||||
| Some(Token::LeftBrace)
|
||||
| Some(Token::RightBrace)
|
||||
| Some(Token::Semicolon) => return Ok(lhs),
|
||||
|
||||
Some(tok) => match tok {
|
||||
// Infix Operator
|
||||
tok if InfixOperator::is(tok) => {
|
||||
let operator = InfixOperator::try_from(tok).unwrap();
|
||||
if precedence >= get_prescedence(&operator) {
|
||||
return Ok(lhs);
|
||||
}
|
||||
|
||||
parse_infix_operator(tokens, lhs)?
|
||||
}
|
||||
|
||||
// Prefix Operator
|
||||
// Since `-` is a prefix and infix operator, we give way to InfixOperator::Minus first
|
||||
tok if PrefixOperator::is(tok) => {
|
||||
let operator = tok.try_into().unwrap();
|
||||
parse_prefix_operator(tokens, operator)?
|
||||
}
|
||||
|
||||
// Term
|
||||
tok if Term::is(tok) => {
|
||||
let term = tok.clone().try_into().unwrap();
|
||||
parse_term(term)?
|
||||
}
|
||||
|
||||
// Invalid tokens
|
||||
_ => return Err(ParserError::ExpectedExpression),
|
||||
},
|
||||
};
|
||||
|
||||
parse_expression(tokens, Some(expr), precedence)
|
||||
}
|
||||
|
||||
fn parse_term(token: Term) -> Result<Expression, ParserError> {
|
||||
let res = match token {
|
||||
// Variables, functions, etc.
|
||||
Term::Ident(val) => Expression::Identifier(val),
|
||||
|
||||
// Literals
|
||||
Term::Int(val) => Expression::Integer(val),
|
||||
Term::True => Expression::Bool(true),
|
||||
Term::False => Expression::Bool(false),
|
||||
};
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn parse_prefix_operator(
|
||||
tokens: &mut Tokens,
|
||||
operator: PrefixOperator,
|
||||
) -> Result<Expression, ParserError> {
|
||||
let expr = match operator {
|
||||
// Not
|
||||
PrefixOperator::Bang => match parse_expression(tokens, None, Precedence::Prefix)? {
|
||||
expr if expr.is_bool() => Expression::Not(Box::new(expr)),
|
||||
_ => return Err(ParserError::ExpectedBoolean),
|
||||
},
|
||||
|
||||
// Negative
|
||||
PrefixOperator::Minus => match parse_expression(tokens, None, Precedence::Prefix)? {
|
||||
expr if expr.is_numeric() => {
|
||||
let val = Box::new(expr);
|
||||
Expression::Negative(val)
|
||||
}
|
||||
_ => return Err(ParserError::ExpectedNumeric),
|
||||
},
|
||||
|
||||
PrefixOperator::If => {
|
||||
let condition = parse_expression(tokens, None, Precedence::Lowest)?;
|
||||
let consequence = parse_block_statement(tokens)?;
|
||||
|
||||
let alternative = if tokens.peek() == Some(&Token::Else) {
|
||||
// Eat else
|
||||
tokens.next();
|
||||
|
||||
Some(Box::new(parse_block_statement(tokens)?))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Expression::If {
|
||||
condition: Box::new(condition),
|
||||
consequence: Box::new(consequence),
|
||||
alternative,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn parse_infix_operator(tokens: &mut Tokens, lhs: Expression) -> Result<Expression, ParserError> {
|
||||
let operator = match tokens.next() {
|
||||
Some(operator) if InfixOperator::is(&operator) => {
|
||||
InfixOperator::try_from(&operator).unwrap()
|
||||
}
|
||||
Some(tok) => return Err(ParserError::UnexpectedToken("infix operator", tok)),
|
||||
None => return Err(ParserError::EOF),
|
||||
};
|
||||
|
||||
let lhs = Box::new(lhs);
|
||||
let rhs = parse_expression(tokens, None, get_prescedence(&operator))?;
|
||||
let rhs = Box::new(rhs);
|
||||
|
||||
let res = match operator {
|
||||
InfixOperator::Plus => Expression::Add(lhs, rhs),
|
||||
InfixOperator::Minus => Expression::Subtract(lhs, rhs),
|
||||
|
||||
InfixOperator::Asterisk => Expression::Multiply(lhs, rhs),
|
||||
InfixOperator::ForwardSlash => Expression::Divide(lhs, rhs),
|
||||
|
||||
InfixOperator::Equal => Expression::Equal(lhs, rhs),
|
||||
InfixOperator::NotEqual => Expression::NotEqual(lhs, rhs),
|
||||
|
||||
InfixOperator::GreaterThan => Expression::GreaterThan(lhs, rhs),
|
||||
InfixOperator::LessThan => Expression::LessThan(lhs, rhs),
|
||||
InfixOperator::GreaterThanEqual => Expression::GreaterThanEqual(lhs, rhs),
|
||||
InfixOperator::LessThanEqual => Expression::LessThanEqual(lhs, rhs),
|
||||
};
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::lexer;
|
||||
use rstest::rstest;
|
||||
|
||||
#[rstest]
|
||||
#[case("let int = 5", "let int be 5")]
|
||||
#[case("return 7", "returning 7")]
|
||||
#[case("let x = 5 + 6", "let x be (5 + 6)")]
|
||||
#[case("return 5 + 6", "returning (5 + 6)")]
|
||||
#[case("5 + 6; 7+3", "(5 + 6)\n(7 + 3)")]
|
||||
#[case("(5 + 5) * 3; 2 + 2", "((5 + 5) * 3)\n(2 + 2)")]
|
||||
fn test_parser<'a>(#[case] input: &str, #[case] expected: &str) {
|
||||
let tokens = lexer::tokenize(input).unwrap();
|
||||
let res = parse(tokens);
|
||||
|
||||
assert_eq!(&res.to_string(), expected);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case("let x 7")]
|
||||
#[case("return")]
|
||||
#[case("let = 8")]
|
||||
#[should_panic]
|
||||
fn test_parser_failure(#[case] input: &str) {
|
||||
let tokens = lexer::tokenize(input).unwrap();
|
||||
parse(tokens);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
// Terms
|
||||
#[case("5", "5")]
|
||||
#[case("uwu", "uwu")]
|
||||
#[case("true", "true")]
|
||||
#[case("false", "false")]
|
||||
// Prefix operators
|
||||
#[case("!true", "(!true)")]
|
||||
#[case("!false", "(!false)")]
|
||||
#[case("-5", "(-5)")]
|
||||
// Infix operators
|
||||
#[case("5 + 6", "(5 + 6)")]
|
||||
#[case("5 - 6", "(5 - 6)")]
|
||||
#[case("5 * 6", "(5 * 6)")]
|
||||
#[case("5 / 6", "(5 / 6)")]
|
||||
#[case("5 == 6", "(5 == 6)")]
|
||||
#[case("5 != 6", "(5 != 6)")]
|
||||
#[case("5 < 6", "(5 < 6)")]
|
||||
#[case("5 > 6", "(5 > 6)")]
|
||||
#[case("5 <= 6", "(5 <= 6)")]
|
||||
#[case("5 >= 6", "(5 >= 6)")]
|
||||
// Boolean and numeric operators
|
||||
#[case("3 < 5 == true", "((3 < 5) == true)")]
|
||||
// Operator associativity
|
||||
#[case("5 + 6 + 7", "((5 + 6) + 7)")]
|
||||
#[case("a + b - c", "((a + b) - c)")]
|
||||
// Operator Prescedence
|
||||
#[case("5 + 6 * 8", "(5 + (6 * 8))")]
|
||||
#[case("5 < 7 == 4 > 3", "((5 < 7) == (4 > 3))")]
|
||||
#[case("5 - 6 * 7 + 2", "((5 - (6 * 7)) + 2)")]
|
||||
#[case("1 + (2 + 3) + 4", "((1 + (2 + 3)) + 4)")]
|
||||
#[case("(5 + 5) * 2", "((5 + 5) * 2)")]
|
||||
fn test_parse_expression(#[case] input: &str, #[case] expected: &str) {
|
||||
let mut tokens = lexer::tokenize(input).unwrap();
|
||||
let res = parse_expression(&mut tokens, None, Precedence::Lowest).unwrap();
|
||||
assert_eq!(&res.to_string(), expected);
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case("if true { 5 + 5 };", "if true then { (5 + 5) } else N/A")]
|
||||
#[case("if x > y { x }", "if (x > y) then { x } else N/A")]
|
||||
#[case("if x > y { x } else { y }", "if (x > y) then { x } else { y }")]
|
||||
fn test_if_expression(#[case] input: &str, #[case] expected: &str) {
|
||||
let mut tokens = lexer::tokenize(input).unwrap();
|
||||
let res = parse_expression(&mut tokens, None, Precedence::Lowest).unwrap();
|
||||
assert_eq!(&res.to_string(), expected);
|
||||
}
|
||||
}
|
30
src/parser/precedence.rs
Normal file
30
src/parser/precedence.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use crate::lexer::InfixOperator;
|
||||
|
||||
#[derive(Debug, PartialEq, PartialOrd)]
|
||||
pub enum Precedence {
|
||||
Lowest,
|
||||
Equals,
|
||||
Ordering,
|
||||
Sum,
|
||||
Product,
|
||||
Prefix,
|
||||
// Call,
|
||||
}
|
||||
|
||||
pub(super) fn get_prescedence(tok: &InfixOperator) -> Precedence {
|
||||
match *tok {
|
||||
InfixOperator::Equal => Precedence::Equals,
|
||||
InfixOperator::NotEqual => Precedence::Equals,
|
||||
|
||||
InfixOperator::LessThan => Precedence::Ordering,
|
||||
InfixOperator::GreaterThan => Precedence::Ordering,
|
||||
InfixOperator::LessThanEqual => Precedence::Ordering,
|
||||
InfixOperator::GreaterThanEqual => Precedence::Ordering,
|
||||
|
||||
InfixOperator::Plus => Precedence::Sum,
|
||||
InfixOperator::Minus => Precedence::Sum,
|
||||
|
||||
InfixOperator::Asterisk => Precedence::Product,
|
||||
InfixOperator::ForwardSlash => Precedence::Product,
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue