Mal Step 1

This commit is contained in:
Roman Godmaire 2023-09-13 08:47:24 -04:00
parent ac87083777
commit 96f822ace8
5 changed files with 764 additions and 8 deletions

273
Cargo.lock generated
View file

@ -2,6 +2,279 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "aho-corasick"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783"
dependencies = [
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "futures"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
[[package]]
name = "futures-executor"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
[[package]]
name = "futures-macro"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "futures-sink"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
[[package]]
name = "futures-task"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
[[package]]
name = "futures-timer"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
[[package]]
name = "futures-util"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-macro",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]] [[package]]
name = "mal" name = "mal"
version = "0.1.0" version = "0.1.0"
dependencies = [
"anyhow",
"rstest",
]
[[package]]
name = "memchr"
version = "2.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
[[package]]
name = "pin-project-lite"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "proc-macro2"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "relative-path"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca"
[[package]]
name = "rstest"
version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97eeab2f3c0a199bc4be135c36c924b6590b88c377d416494288c14f2db30199"
dependencies = [
"futures",
"futures-timer",
"rstest_macros",
"rustc_version",
]
[[package]]
name = "rstest_macros"
version = "0.18.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
dependencies = [
"cfg-if",
"glob",
"proc-macro2",
"quote",
"regex",
"relative-path",
"rustc_version",
"syn",
"unicode-ident",
]
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "semver"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
[[package]]
name = "slab"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg",
]
[[package]]
name = "syn"
version = "2.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"

View file

@ -6,3 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
anyhow = "1.0.75"
[dev-dependencies]
rstest = "0.18.2"

253
src/lexer.rs Normal file
View file

@ -0,0 +1,253 @@
use std::{iter::Peekable, str::Chars};
use anyhow::{bail, Result};
#[derive(Debug, PartialEq, PartialOrd)]
pub enum Token {
LeftParen,
RightParen,
LeftBracket,
RightBracket,
LeftBrace,
RightBrace,
WeirdSign,
Apostrophe,
Grave,
Tilde,
Carot,
AtSign,
// Math Operators
Plus,
Minus,
Asterisk,
Slash,
// Values
Keyword(String),
Int(i64),
String(String),
Ident(String),
True,
False,
Nil,
}
pub fn read(input: &str) -> Result<Vec<Token>> {
let mut input = input.chars().peekable();
let mut tokens = Vec::new();
while let Some(tok) = next_token(&mut input)? {
tokens.push(tok)
}
Ok(tokens)
}
fn next_token(input: &mut Peekable<Chars>) -> Result<Option<Token>> {
let tok = match input.next() {
Some(tok) => tok,
None => return Ok(None),
};
let tok = match tok {
// Weird sign
'~' if input.peek().is_some_and(|c| c == &'@') => {
// Munch the @
input.next();
Token::WeirdSign
}
// Negative numbers
'-' if input.peek().is_some_and(|c| c.is_ascii_digit()) => read_int(input, '-'),
// Munch comments
';' => {
for c in input.by_ref() {
if c == '\n' {
break;
}
}
match next_token(input)? {
Some(tok) => tok,
None => return Ok(None),
}
}
'(' => Token::LeftParen,
')' => Token::RightParen,
'[' => Token::LeftBracket,
']' => Token::RightBracket,
'{' => Token::LeftBrace,
'}' => Token::RightBrace,
'\'' => Token::Apostrophe,
'`' => Token::Grave,
'~' => Token::Tilde,
'^' => Token::Carot,
'@' => Token::AtSign,
'+' => Token::Plus,
'-' => Token::Minus,
'*' => Token::Asterisk,
'/' => Token::Slash,
'"' => read_string(input)?,
':' => read_keyword(input),
c if c.is_ascii_digit() => read_int(input, c),
c if c.is_ascii_alphabetic() => read_ident(input, c),
// Munch whitespace
c if c.is_whitespace() => match next_token(input)? {
Some(tok) => tok,
None => return Ok(None),
},
_ => bail!("ilegal token"),
};
Ok(Some(tok))
}
fn read_string(input: &mut Peekable<Chars>) -> Result<Token> {
let mut raw_str = Vec::new();
loop {
match input.peek() {
Some(&'"') => {
// We want to eat the tailing "
input.next();
break;
}
Some(_) => (),
None => bail!("unbalanced string"),
}
raw_str.push(input.next().unwrap())
}
Ok(Token::String(raw_str.into_iter().collect()))
}
fn read_keyword(input: &mut Peekable<Chars>) -> Token {
let mut raw_keyword = Vec::new();
while let Some(c) = input.peek() {
if !c.is_ascii_alphanumeric() {
break;
}
raw_keyword.push(input.next().unwrap());
}
Token::Keyword(raw_keyword.into_iter().collect())
}
fn read_int(input: &mut Peekable<Chars>, first: char) -> Token {
let mut raw_int = vec![first];
while let Some(c) = input.peek() {
if !c.is_ascii_digit() {
break;
}
raw_int.push(input.next().unwrap());
}
Token::Int(raw_int.iter().collect::<String>().parse::<i64>().unwrap())
}
fn read_ident(input: &mut Peekable<Chars>, first: char) -> Token {
let mut raw_ident = vec![first];
while let Some(c) = input.peek() {
if !c.is_ascii_alphanumeric() {
break;
}
raw_ident.push(input.next().unwrap())
}
let ident = raw_ident.into_iter().collect::<String>();
match ident.as_str() {
"true" => Token::True,
"false" => Token::False,
"nil" => Token::Nil,
ident => Token::Ident(ident.to_owned()),
}
}
#[cfg(test)]
mod test {
use super::*;
use rstest::rstest;
#[rstest]
#[case("()[]{}", vec![Token::LeftParen, Token::RightParen, Token::LeftBracket, Token::RightBracket, Token::LeftBrace, Token::RightBrace])]
#[case(" ' ` ^ ~@ ~ @", vec![Token::Apostrophe, Token::Grave, Token::Carot, Token::WeirdSign, Token::Tilde, Token::AtSign])]
#[case("(+ 1 2)", vec![Token::LeftParen, Token::Plus, Token::Int(1), Token::Int(2), Token::RightParen])]
#[case("(- 1 2)", vec![Token::LeftParen, Token::Minus, Token::Int(1), Token::Int(2), Token::RightParen])]
#[case("(* 1 2)", vec![Token::LeftParen, Token::Asterisk, Token::Int(1), Token::Int(2), Token::RightParen])]
#[case("(/ 1 2)", vec![Token::LeftParen, Token::Slash, Token::Int(1), Token::Int(2), Token::RightParen])]
#[case("(- -2 1)", vec![Token::LeftParen, Token::Minus, Token::Int(-2), Token::Int(1), Token::RightParen])]
#[case("(\"string and stuff\")", vec![Token::LeftParen, Token::String("string and stuff".into()), Token::RightParen])]
#[case(
"(func a b)",
vec![
Token::LeftParen,
Token::Ident("func".into()),
Token::Ident("a".into()),
Token::Ident("b".into()),
Token::RightParen
]
)]
#[case(
"(+ 1 (- 2 1))",
vec![
Token::LeftParen,
Token::Plus,
Token::Int(1),
Token::LeftParen,
Token::Minus,
Token::Int(2),
Token::Int(1),
Token::RightParen,
Token::RightParen
]
)]
#[case(
"(fn a ;; This comment is useless
(+ 1 2))",
vec![
Token::LeftParen,
Token::Ident("fn".into()),
Token::Ident("a".into()),
Token::LeftParen,
Token::Plus,
Token::Int(1),
Token::Int(2),
Token::RightParen,
Token::RightParen
]
)]
fn test_lexer(#[case] input: &str, #[case] expected: Vec<Token>) {
let res = read(input).unwrap();
assert_eq!(res, expected);
}
#[rstest]
// Unbalanced string
#[case("(\"asdf)")]
fn test_lexer_errors(#[case] input: &str) {
let res = read(input);
assert!(res.is_err());
}
}

View file

@ -1,5 +1,8 @@
use std::io::{self, Write}; use std::io::{self, Write};
mod lexer;
mod parser;
fn main() { fn main() {
let mut input = String::new(); let mut input = String::new();
@ -17,8 +20,9 @@ fn main() {
break; break;
} }
let ast = read(&input); let tokens = lexer::read(&input).unwrap();
let res = eval(&ast); let ast = parser::parse(tokens).unwrap();
let res = eval(ast);
println!("{res}"); println!("{res}");
@ -26,10 +30,6 @@ fn main() {
} }
} }
fn read(input: &str) -> String { fn eval(input: Vec<parser::Node>) -> String {
input.to_owned() format!("{input:?}")
}
fn eval(input: &str) -> String {
input.to_owned()
} }

226
src/parser.rs Normal file
View file

@ -0,0 +1,226 @@
use std::{iter::Peekable, vec::IntoIter};
use anyhow::{bail, Result};
use crate::lexer::Token;
#[derive(Debug, PartialEq, PartialOrd)]
pub enum Node {
List(Vec<Node>),
Vector(Vec<Node>),
HashMap(Vec<Node>),
Symbol(String),
Keyword(String),
Int(i64),
String(String),
True,
False,
Nil,
}
pub fn parse(tokens: Vec<Token>) -> Result<Vec<Node>> {
let mut tokens = tokens.into_iter().peekable();
let mut ast = Vec::new();
while let Some(node) = next_statement(&mut tokens)? {
ast.push(node)
}
Ok(ast)
}
fn next_statement(tokens: &mut Peekable<IntoIter<Token>>) -> Result<Option<Node>> {
let tok = match tokens.next() {
Some(tok) => tok,
None => return Ok(None),
};
let node = match tok {
Token::LeftParen => read_list(tokens, Token::RightParen)?,
Token::RightParen => bail!("closing parenthsis does not have matching open parenthesis"),
Token::LeftBracket => read_list(tokens, Token::RightBracket)?,
Token::RightBracket => bail!("closing bracket does not have matching open bracket"),
Token::LeftBrace => read_list(tokens, Token::RightBrace)?,
Token::RightBrace => bail!("closing brace does not have matching open brace"),
Token::WeirdSign => read_quote(tokens, "splice-unquote")?,
Token::Apostrophe => read_quote(tokens, "quote")?,
Token::Grave => read_quote(tokens, "quasiquote")?,
Token::Tilde => read_quote(tokens, "unquote")?,
// TODO: meta
Token::Carot => todo!(),
// TODO: deref
Token::AtSign => todo!(),
Token::Plus => Node::Symbol("+".into()),
Token::Minus => Node::Symbol("-".into()),
Token::Asterisk => Node::Symbol("*".into()),
Token::Slash => Node::Symbol("/".into()),
Token::Keyword(val) => Node::Keyword(val),
Token::Ident(val) => Node::Symbol(val),
Token::String(val) => Node::String(val),
Token::Int(int) => Node::Int(int),
Token::True => Node::True,
Token::False => Node::False,
Token::Nil => Node::Nil,
};
Ok(Some(node))
}
fn read_list(tokens: &mut Peekable<IntoIter<Token>>, closer: Token) -> Result<Node> {
let mut list = Vec::new();
loop {
if tokens.peek() == Some(&closer) {
tokens.next();
break;
}
if let Some(node) = next_statement(tokens)? {
list.push(node);
continue;
}
match next_statement(tokens)? {
Some(node) => list.push(node),
None => match closer {
Token::RightParen => bail!("unclosed list"),
Token::RightBracket => bail!("unclosed vector"),
Token::RightBrace => bail!("unclosed hashmap"),
_ => bail!("unreachable"),
},
}
}
match closer {
Token::RightParen => Ok(Node::List(list)),
Token::RightBracket => Ok(Node::Vector(list)),
Token::RightBrace => Ok(Node::HashMap(list)),
// This should theoretically be unreachable
_ => bail!(
"invalid collection type using closer {:?}. This is a bug; please file a bug report",
closer
),
}
}
fn read_quote(tokens: &mut Peekable<IntoIter<Token>>, quote_type: &str) -> Result<Node> {
let follower_node = match next_statement(tokens)? {
Some(node) => node,
None => bail!("quote does not have a valid follower node"),
};
Ok(Node::List(vec![
Node::Symbol(quote_type.into()),
follower_node,
]))
}
#[cfg(test)]
mod test {
use crate::lexer;
use super::*;
use rstest::rstest;
#[rstest]
#[case("10", vec![
Node::Int(10)])]
#[case(":owo", vec![
Node::Keyword("owo".into())])]
#[case("\"uwu\"", vec![
Node::String("uwu".into())])]
#[case("(10 2)", vec![
Node::List(vec![
Node::Int(10),
Node::Int(2)])])]
#[case("[10 2]", vec![
Node::Vector(vec![
Node::Int(10),
Node::Int(2)])])]
#[case("{10 2}", vec![
Node::HashMap(vec![
Node::Int(10),
Node::Int(2)])])]
#[case("(+ - * /)", vec![
Node::List(vec![
Node::Symbol("+".into()),
Node::Symbol("-".into()),
Node::Symbol("*".into()),
Node::Symbol("/".into())])])]
#[case("'(1 2 3)", vec![
Node::List(vec![
Node::Symbol("quote".into()),
Node::List(vec![
Node::Int(1),
Node::Int(2),
Node::Int(3)])])])]
#[case("`(1 2 3)", vec![
Node::List(vec![
Node::Symbol("quasiquote".into()),
Node::List(vec![
Node::Int(1),
Node::Int(2),
Node::Int(3)])])])]
#[case("~(1 2 3)", vec![
Node::List(vec![
Node::Symbol("unquote".into()),
Node::List(vec![
Node::Int(1),
Node::Int(2),
Node::Int(3)])])])]
#[case("~@(1 2 3)", vec![
Node::List(vec![
Node::Symbol("splice-unquote".into()),
Node::List(vec![
Node::Int(1),
Node::Int(2),
Node::Int(3)])])])]
#[case("(+ 1 2)", vec![
Node::List(vec![
Node::Symbol("+".into()),
Node::Int(1),
Node::Int(2)])])]
#[case("(+ 1 2 (- 1 2))", vec![
Node::List(vec![
Node::Symbol("+".into()),
Node::Int(1),
Node::Int(2),
Node::List(vec![
Node::Symbol("-".into()),
Node::Int(1),
Node::Int(2)])])])]
fn test_parsing(#[case] input: &str, #[case] expected: Vec<Node>) {
let tokens = lexer::read(input).unwrap();
let res = parse(tokens).unwrap();
assert_eq!(res, expected);
}
#[rstest]
#[case(")")]
#[case("]")]
#[case("}")]
#[case("(1 2")]
#[case("[1 2")]
#[case("{1 2")]
#[case("(1 2 '")]
#[case("(1 2 ')")]
#[case("(1 2 ~")]
#[case("(1 2 ~)")]
#[case("(1 2 `")]
#[case("(1 2 `)")]
#[case("(1 2 ~@")]
#[case("(1 2 ~@)")]
fn test_parsing_fail(#[case] input: &str) {
let tokens = lexer::read(input).unwrap();
let res = parse(tokens);
assert!(res.is_err());
}
}