Mal Step 1
This commit is contained in:
parent
ac87083777
commit
96f822ace8
5 changed files with 764 additions and 8 deletions
273
Cargo.lock
generated
273
Cargo.lock
generated
|
@ -2,6 +2,279 @@
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "1.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anyhow"
|
||||||
|
version = "1.0.75"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
|
||||||
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-executor",
|
||||||
|
"futures-io",
|
||||||
|
"futures-sink",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-channel"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-sink",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-core"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-executor"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-task",
|
||||||
|
"futures-util",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-io"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-macro"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-sink"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-task"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-timer"
|
||||||
|
version = "3.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-util"
|
||||||
|
version = "0.3.28"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
|
||||||
|
dependencies = [
|
||||||
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
|
"futures-io",
|
||||||
|
"futures-macro",
|
||||||
|
"futures-sink",
|
||||||
|
"futures-task",
|
||||||
|
"memchr",
|
||||||
|
"pin-project-lite",
|
||||||
|
"pin-utils",
|
||||||
|
"slab",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mal"
|
name = "mal"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"rstest",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.6.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pin-project-lite"
|
||||||
|
version = "0.2.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pin-utils"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.66"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.33"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.9.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.3.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.7.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "relative-path"
|
||||||
|
version = "1.9.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rstest"
|
||||||
|
version = "0.18.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "97eeab2f3c0a199bc4be135c36c924b6590b88c377d416494288c14f2db30199"
|
||||||
|
dependencies = [
|
||||||
|
"futures",
|
||||||
|
"futures-timer",
|
||||||
|
"rstest_macros",
|
||||||
|
"rustc_version",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rstest_macros"
|
||||||
|
version = "0.18.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"glob",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"relative-path",
|
||||||
|
"rustc_version",
|
||||||
|
"syn",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc_version"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
|
||||||
|
dependencies = [
|
||||||
|
"semver",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "semver"
|
||||||
|
version = "1.0.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "slab"
|
||||||
|
version = "0.4.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
||||||
|
|
|
@ -6,3 +6,7 @@ edition = "2021"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
anyhow = "1.0.75"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
rstest = "0.18.2"
|
||||||
|
|
253
src/lexer.rs
Normal file
253
src/lexer.rs
Normal file
|
@ -0,0 +1,253 @@
|
||||||
|
use std::{iter::Peekable, str::Chars};
|
||||||
|
|
||||||
|
use anyhow::{bail, Result};
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, PartialOrd)]
|
||||||
|
pub enum Token {
|
||||||
|
LeftParen,
|
||||||
|
RightParen,
|
||||||
|
|
||||||
|
LeftBracket,
|
||||||
|
RightBracket,
|
||||||
|
|
||||||
|
LeftBrace,
|
||||||
|
RightBrace,
|
||||||
|
|
||||||
|
WeirdSign,
|
||||||
|
Apostrophe,
|
||||||
|
Grave,
|
||||||
|
Tilde,
|
||||||
|
Carot,
|
||||||
|
AtSign,
|
||||||
|
|
||||||
|
// Math Operators
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
Asterisk,
|
||||||
|
Slash,
|
||||||
|
|
||||||
|
// Values
|
||||||
|
Keyword(String),
|
||||||
|
Int(i64),
|
||||||
|
String(String),
|
||||||
|
Ident(String),
|
||||||
|
True,
|
||||||
|
False,
|
||||||
|
Nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn read(input: &str) -> Result<Vec<Token>> {
|
||||||
|
let mut input = input.chars().peekable();
|
||||||
|
let mut tokens = Vec::new();
|
||||||
|
|
||||||
|
while let Some(tok) = next_token(&mut input)? {
|
||||||
|
tokens.push(tok)
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(tokens)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_token(input: &mut Peekable<Chars>) -> Result<Option<Token>> {
|
||||||
|
let tok = match input.next() {
|
||||||
|
Some(tok) => tok,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
let tok = match tok {
|
||||||
|
// Weird sign
|
||||||
|
'~' if input.peek().is_some_and(|c| c == &'@') => {
|
||||||
|
// Munch the @
|
||||||
|
input.next();
|
||||||
|
Token::WeirdSign
|
||||||
|
}
|
||||||
|
|
||||||
|
// Negative numbers
|
||||||
|
'-' if input.peek().is_some_and(|c| c.is_ascii_digit()) => read_int(input, '-'),
|
||||||
|
|
||||||
|
// Munch comments
|
||||||
|
';' => {
|
||||||
|
for c in input.by_ref() {
|
||||||
|
if c == '\n' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match next_token(input)? {
|
||||||
|
Some(tok) => tok,
|
||||||
|
None => return Ok(None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
'(' => Token::LeftParen,
|
||||||
|
')' => Token::RightParen,
|
||||||
|
|
||||||
|
'[' => Token::LeftBracket,
|
||||||
|
']' => Token::RightBracket,
|
||||||
|
'{' => Token::LeftBrace,
|
||||||
|
'}' => Token::RightBrace,
|
||||||
|
|
||||||
|
'\'' => Token::Apostrophe,
|
||||||
|
'`' => Token::Grave,
|
||||||
|
'~' => Token::Tilde,
|
||||||
|
'^' => Token::Carot,
|
||||||
|
'@' => Token::AtSign,
|
||||||
|
|
||||||
|
'+' => Token::Plus,
|
||||||
|
'-' => Token::Minus,
|
||||||
|
'*' => Token::Asterisk,
|
||||||
|
'/' => Token::Slash,
|
||||||
|
|
||||||
|
'"' => read_string(input)?,
|
||||||
|
':' => read_keyword(input),
|
||||||
|
|
||||||
|
c if c.is_ascii_digit() => read_int(input, c),
|
||||||
|
c if c.is_ascii_alphabetic() => read_ident(input, c),
|
||||||
|
|
||||||
|
// Munch whitespace
|
||||||
|
c if c.is_whitespace() => match next_token(input)? {
|
||||||
|
Some(tok) => tok,
|
||||||
|
None => return Ok(None),
|
||||||
|
},
|
||||||
|
_ => bail!("ilegal token"),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Some(tok))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_string(input: &mut Peekable<Chars>) -> Result<Token> {
|
||||||
|
let mut raw_str = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match input.peek() {
|
||||||
|
Some(&'"') => {
|
||||||
|
// We want to eat the tailing "
|
||||||
|
input.next();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(_) => (),
|
||||||
|
None => bail!("unbalanced string"),
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_str.push(input.next().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Token::String(raw_str.into_iter().collect()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_keyword(input: &mut Peekable<Chars>) -> Token {
|
||||||
|
let mut raw_keyword = Vec::new();
|
||||||
|
|
||||||
|
while let Some(c) = input.peek() {
|
||||||
|
if !c.is_ascii_alphanumeric() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_keyword.push(input.next().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(raw_keyword.into_iter().collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_int(input: &mut Peekable<Chars>, first: char) -> Token {
|
||||||
|
let mut raw_int = vec![first];
|
||||||
|
|
||||||
|
while let Some(c) = input.peek() {
|
||||||
|
if !c.is_ascii_digit() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_int.push(input.next().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Int(raw_int.iter().collect::<String>().parse::<i64>().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_ident(input: &mut Peekable<Chars>, first: char) -> Token {
|
||||||
|
let mut raw_ident = vec![first];
|
||||||
|
|
||||||
|
while let Some(c) = input.peek() {
|
||||||
|
if !c.is_ascii_alphanumeric() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_ident.push(input.next().unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
let ident = raw_ident.into_iter().collect::<String>();
|
||||||
|
|
||||||
|
match ident.as_str() {
|
||||||
|
"true" => Token::True,
|
||||||
|
"false" => Token::False,
|
||||||
|
"nil" => Token::Nil,
|
||||||
|
ident => Token::Ident(ident.to_owned()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
use rstest::rstest;
|
||||||
|
|
||||||
|
#[rstest]
|
||||||
|
#[case("()[]{}", vec![Token::LeftParen, Token::RightParen, Token::LeftBracket, Token::RightBracket, Token::LeftBrace, Token::RightBrace])]
|
||||||
|
#[case(" ' ` ^ ~@ ~ @", vec![Token::Apostrophe, Token::Grave, Token::Carot, Token::WeirdSign, Token::Tilde, Token::AtSign])]
|
||||||
|
#[case("(+ 1 2)", vec![Token::LeftParen, Token::Plus, Token::Int(1), Token::Int(2), Token::RightParen])]
|
||||||
|
#[case("(- 1 2)", vec![Token::LeftParen, Token::Minus, Token::Int(1), Token::Int(2), Token::RightParen])]
|
||||||
|
#[case("(* 1 2)", vec![Token::LeftParen, Token::Asterisk, Token::Int(1), Token::Int(2), Token::RightParen])]
|
||||||
|
#[case("(/ 1 2)", vec![Token::LeftParen, Token::Slash, Token::Int(1), Token::Int(2), Token::RightParen])]
|
||||||
|
#[case("(- -2 1)", vec![Token::LeftParen, Token::Minus, Token::Int(-2), Token::Int(1), Token::RightParen])]
|
||||||
|
#[case("(\"string and stuff\")", vec![Token::LeftParen, Token::String("string and stuff".into()), Token::RightParen])]
|
||||||
|
#[case(
|
||||||
|
"(func a b)",
|
||||||
|
vec![
|
||||||
|
Token::LeftParen,
|
||||||
|
Token::Ident("func".into()),
|
||||||
|
Token::Ident("a".into()),
|
||||||
|
Token::Ident("b".into()),
|
||||||
|
Token::RightParen
|
||||||
|
]
|
||||||
|
)]
|
||||||
|
#[case(
|
||||||
|
"(+ 1 (- 2 1))",
|
||||||
|
vec![
|
||||||
|
Token::LeftParen,
|
||||||
|
Token::Plus,
|
||||||
|
Token::Int(1),
|
||||||
|
Token::LeftParen,
|
||||||
|
Token::Minus,
|
||||||
|
Token::Int(2),
|
||||||
|
Token::Int(1),
|
||||||
|
Token::RightParen,
|
||||||
|
Token::RightParen
|
||||||
|
]
|
||||||
|
)]
|
||||||
|
#[case(
|
||||||
|
"(fn a ;; This comment is useless
|
||||||
|
(+ 1 2))",
|
||||||
|
vec![
|
||||||
|
Token::LeftParen,
|
||||||
|
Token::Ident("fn".into()),
|
||||||
|
Token::Ident("a".into()),
|
||||||
|
Token::LeftParen,
|
||||||
|
Token::Plus,
|
||||||
|
Token::Int(1),
|
||||||
|
Token::Int(2),
|
||||||
|
Token::RightParen,
|
||||||
|
Token::RightParen
|
||||||
|
|
||||||
|
]
|
||||||
|
)]
|
||||||
|
fn test_lexer(#[case] input: &str, #[case] expected: Vec<Token>) {
|
||||||
|
let res = read(input).unwrap();
|
||||||
|
assert_eq!(res, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[rstest]
|
||||||
|
// Unbalanced string
|
||||||
|
#[case("(\"asdf)")]
|
||||||
|
fn test_lexer_errors(#[case] input: &str) {
|
||||||
|
let res = read(input);
|
||||||
|
assert!(res.is_err());
|
||||||
|
}
|
||||||
|
}
|
16
src/main.rs
16
src/main.rs
|
@ -1,5 +1,8 @@
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
mod lexer;
|
||||||
|
mod parser;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut input = String::new();
|
let mut input = String::new();
|
||||||
|
|
||||||
|
@ -17,8 +20,9 @@ fn main() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
let ast = read(&input);
|
let tokens = lexer::read(&input).unwrap();
|
||||||
let res = eval(&ast);
|
let ast = parser::parse(tokens).unwrap();
|
||||||
|
let res = eval(ast);
|
||||||
|
|
||||||
println!("{res}");
|
println!("{res}");
|
||||||
|
|
||||||
|
@ -26,10 +30,6 @@ fn main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read(input: &str) -> String {
|
fn eval(input: Vec<parser::Node>) -> String {
|
||||||
input.to_owned()
|
format!("{input:?}")
|
||||||
}
|
|
||||||
|
|
||||||
fn eval(input: &str) -> String {
|
|
||||||
input.to_owned()
|
|
||||||
}
|
}
|
||||||
|
|
226
src/parser.rs
Normal file
226
src/parser.rs
Normal file
|
@ -0,0 +1,226 @@
|
||||||
|
use std::{iter::Peekable, vec::IntoIter};
|
||||||
|
|
||||||
|
use anyhow::{bail, Result};
|
||||||
|
|
||||||
|
use crate::lexer::Token;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, PartialOrd)]
|
||||||
|
pub enum Node {
|
||||||
|
List(Vec<Node>),
|
||||||
|
Vector(Vec<Node>),
|
||||||
|
HashMap(Vec<Node>),
|
||||||
|
|
||||||
|
Symbol(String),
|
||||||
|
Keyword(String),
|
||||||
|
Int(i64),
|
||||||
|
String(String),
|
||||||
|
True,
|
||||||
|
False,
|
||||||
|
Nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(tokens: Vec<Token>) -> Result<Vec<Node>> {
|
||||||
|
let mut tokens = tokens.into_iter().peekable();
|
||||||
|
let mut ast = Vec::new();
|
||||||
|
|
||||||
|
while let Some(node) = next_statement(&mut tokens)? {
|
||||||
|
ast.push(node)
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ast)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_statement(tokens: &mut Peekable<IntoIter<Token>>) -> Result<Option<Node>> {
|
||||||
|
let tok = match tokens.next() {
|
||||||
|
Some(tok) => tok,
|
||||||
|
None => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
let node = match tok {
|
||||||
|
Token::LeftParen => read_list(tokens, Token::RightParen)?,
|
||||||
|
Token::RightParen => bail!("closing parenthsis does not have matching open parenthesis"),
|
||||||
|
|
||||||
|
Token::LeftBracket => read_list(tokens, Token::RightBracket)?,
|
||||||
|
Token::RightBracket => bail!("closing bracket does not have matching open bracket"),
|
||||||
|
|
||||||
|
Token::LeftBrace => read_list(tokens, Token::RightBrace)?,
|
||||||
|
Token::RightBrace => bail!("closing brace does not have matching open brace"),
|
||||||
|
|
||||||
|
Token::WeirdSign => read_quote(tokens, "splice-unquote")?,
|
||||||
|
Token::Apostrophe => read_quote(tokens, "quote")?,
|
||||||
|
Token::Grave => read_quote(tokens, "quasiquote")?,
|
||||||
|
Token::Tilde => read_quote(tokens, "unquote")?,
|
||||||
|
|
||||||
|
// TODO: meta
|
||||||
|
Token::Carot => todo!(),
|
||||||
|
// TODO: deref
|
||||||
|
Token::AtSign => todo!(),
|
||||||
|
|
||||||
|
Token::Plus => Node::Symbol("+".into()),
|
||||||
|
Token::Minus => Node::Symbol("-".into()),
|
||||||
|
Token::Asterisk => Node::Symbol("*".into()),
|
||||||
|
Token::Slash => Node::Symbol("/".into()),
|
||||||
|
|
||||||
|
Token::Keyword(val) => Node::Keyword(val),
|
||||||
|
Token::Ident(val) => Node::Symbol(val),
|
||||||
|
Token::String(val) => Node::String(val),
|
||||||
|
Token::Int(int) => Node::Int(int),
|
||||||
|
Token::True => Node::True,
|
||||||
|
Token::False => Node::False,
|
||||||
|
Token::Nil => Node::Nil,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Some(node))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_list(tokens: &mut Peekable<IntoIter<Token>>, closer: Token) -> Result<Node> {
|
||||||
|
let mut list = Vec::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if tokens.peek() == Some(&closer) {
|
||||||
|
tokens.next();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(node) = next_statement(tokens)? {
|
||||||
|
list.push(node);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match next_statement(tokens)? {
|
||||||
|
Some(node) => list.push(node),
|
||||||
|
None => match closer {
|
||||||
|
Token::RightParen => bail!("unclosed list"),
|
||||||
|
Token::RightBracket => bail!("unclosed vector"),
|
||||||
|
Token::RightBrace => bail!("unclosed hashmap"),
|
||||||
|
_ => bail!("unreachable"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match closer {
|
||||||
|
Token::RightParen => Ok(Node::List(list)),
|
||||||
|
Token::RightBracket => Ok(Node::Vector(list)),
|
||||||
|
Token::RightBrace => Ok(Node::HashMap(list)),
|
||||||
|
|
||||||
|
// This should theoretically be unreachable
|
||||||
|
_ => bail!(
|
||||||
|
"invalid collection type using closer {:?}. This is a bug; please file a bug report",
|
||||||
|
closer
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_quote(tokens: &mut Peekable<IntoIter<Token>>, quote_type: &str) -> Result<Node> {
|
||||||
|
let follower_node = match next_statement(tokens)? {
|
||||||
|
Some(node) => node,
|
||||||
|
None => bail!("quote does not have a valid follower node"),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Node::List(vec![
|
||||||
|
Node::Symbol(quote_type.into()),
|
||||||
|
follower_node,
|
||||||
|
]))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use crate::lexer;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
use rstest::rstest;
|
||||||
|
|
||||||
|
#[rstest]
|
||||||
|
#[case("10", vec![
|
||||||
|
Node::Int(10)])]
|
||||||
|
#[case(":owo", vec![
|
||||||
|
Node::Keyword("owo".into())])]
|
||||||
|
#[case("\"uwu\"", vec![
|
||||||
|
Node::String("uwu".into())])]
|
||||||
|
#[case("(10 2)", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Int(10),
|
||||||
|
Node::Int(2)])])]
|
||||||
|
#[case("[10 2]", vec![
|
||||||
|
Node::Vector(vec![
|
||||||
|
Node::Int(10),
|
||||||
|
Node::Int(2)])])]
|
||||||
|
#[case("{10 2}", vec![
|
||||||
|
Node::HashMap(vec![
|
||||||
|
Node::Int(10),
|
||||||
|
Node::Int(2)])])]
|
||||||
|
#[case("(+ - * /)", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("+".into()),
|
||||||
|
Node::Symbol("-".into()),
|
||||||
|
Node::Symbol("*".into()),
|
||||||
|
Node::Symbol("/".into())])])]
|
||||||
|
#[case("'(1 2 3)", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("quote".into()),
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Int(1),
|
||||||
|
Node::Int(2),
|
||||||
|
Node::Int(3)])])])]
|
||||||
|
#[case("`(1 2 3)", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("quasiquote".into()),
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Int(1),
|
||||||
|
Node::Int(2),
|
||||||
|
Node::Int(3)])])])]
|
||||||
|
#[case("~(1 2 3)", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("unquote".into()),
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Int(1),
|
||||||
|
Node::Int(2),
|
||||||
|
Node::Int(3)])])])]
|
||||||
|
#[case("~@(1 2 3)", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("splice-unquote".into()),
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Int(1),
|
||||||
|
Node::Int(2),
|
||||||
|
Node::Int(3)])])])]
|
||||||
|
#[case("(+ 1 2)", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("+".into()),
|
||||||
|
Node::Int(1),
|
||||||
|
Node::Int(2)])])]
|
||||||
|
#[case("(+ 1 2 (- 1 2))", vec![
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("+".into()),
|
||||||
|
Node::Int(1),
|
||||||
|
Node::Int(2),
|
||||||
|
Node::List(vec![
|
||||||
|
Node::Symbol("-".into()),
|
||||||
|
Node::Int(1),
|
||||||
|
Node::Int(2)])])])]
|
||||||
|
fn test_parsing(#[case] input: &str, #[case] expected: Vec<Node>) {
|
||||||
|
let tokens = lexer::read(input).unwrap();
|
||||||
|
let res = parse(tokens).unwrap();
|
||||||
|
assert_eq!(res, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[rstest]
|
||||||
|
#[case(")")]
|
||||||
|
#[case("]")]
|
||||||
|
#[case("}")]
|
||||||
|
#[case("(1 2")]
|
||||||
|
#[case("[1 2")]
|
||||||
|
#[case("{1 2")]
|
||||||
|
#[case("(1 2 '")]
|
||||||
|
#[case("(1 2 ')")]
|
||||||
|
#[case("(1 2 ~")]
|
||||||
|
#[case("(1 2 ~)")]
|
||||||
|
#[case("(1 2 `")]
|
||||||
|
#[case("(1 2 `)")]
|
||||||
|
#[case("(1 2 ~@")]
|
||||||
|
#[case("(1 2 ~@)")]
|
||||||
|
fn test_parsing_fail(#[case] input: &str) {
|
||||||
|
let tokens = lexer::read(input).unwrap();
|
||||||
|
let res = parse(tokens);
|
||||||
|
assert!(res.is_err());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue