From 9ffab45bd6c6052df8696392f86cebce5a65d3ec Mon Sep 17 00:00:00 2001 From: Devon Tingley Date: Wed, 1 Mar 2023 23:10:54 -0500 Subject: [PATCH] Ch 1.3 --- .gitignore | 1 + Cargo.lock | 220 ++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 9 ++ src/lexer/mod.rs | 156 +++++++++++++++++++++++++++++++ src/lexer/tokens.rs | 25 +++++ src/main.rs | 5 + 6 files changed, 416 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/lexer/mod.rs create mode 100644 src/lexer/tokens.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..aeb50eb --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,220 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "futures" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" + +[[package]] +name = "futures-executor" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" + +[[package]] +name = "futures-macro" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" + +[[package]] +name = "futures-task" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" + +[[package]] +name = "futures-timer" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" + +[[package]] +name = "futures-util" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "moose" +version = "0.1.0" +dependencies = [ + "rstest", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "proc-macro2" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rstest" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b07f2d176c472198ec1e6551dc7da28f1c089652f66a7b722676c2238ebc0edf" +dependencies = [ + "futures", + "futures-timer", + "rstest_macros", + "rustc_version", +] + +[[package]] +name = "rstest_macros" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7229b505ae0706e64f37ffc54a9c163e11022a6636d58fe1f3f52018257ff9f7" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "rustc_version", + "syn", + "unicode-ident", +] + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "semver" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" + +[[package]] +name = "slab" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" +dependencies = [ + "autocfg", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..92799fc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "moose" +version = "0.1.0" +edition = "2021" + +[dependencies] + +[dev-dependencies] +rstest = "0.16.0" diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..2bbea07 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,156 @@ +mod tokens; + +use std::{iter::Peekable, str::Chars}; + +use tokens::Token; + +pub fn tokenize(input: &str) -> Vec { + let mut input = input.chars().into_iter().peekable(); + + let mut toks = Vec::new(); + while let Some(tok) = next_token(&mut input) { + toks.push(tok) + } + + toks +} + +fn next_token(input: &mut Peekable) -> Option { + let tok = match input.next()? { + '=' => Token::Assign, + '+' => Token::Plus, + + ',' => Token::Comma, + ';' => Token::Semicolon, + + '(' => Token::LeftParenthesis, + ')' => Token::RightParenthesis, + '{' => Token::LeftBrace, + '}' => Token::RightBrace, + + // Parse multicharacter tokens + tok if tok.is_ascii_alphabetic() => read_ident(input, tok), + tok if tok.is_ascii_digit() => read_int(input, tok), + + // Skip whitespace + tok if tok.is_ascii_whitespace() => next_token(input)?, + + _ => Token::Illegal, + }; + + Some(tok) +} + +fn read_ident(input: &mut Peekable, first: char) -> Token { + // Read the entire ident + let mut toks = vec![first]; + while let Some(tok) = input.peek() { + if !tok.is_ascii_alphabetic() { + break; + } + + let tok = input.next().unwrap(); + toks.push(tok); + } + + // Check if our ident is a keyword + let ident = toks.iter().cloned().collect::(); + match ident.as_str() { + "fn" => Token::Function, + "let" => Token::Let, + + ident => Token::Ident(ident.to_owned()), + } +} + +fn read_int(input: &mut Peekable, first: char) -> Token { + let mut toks = vec![first]; + while let Some(tok) = input.peek() { + if !tok.is_ascii_digit() { + break; + } + + let tok = input.next().unwrap(); + toks.push(tok); + } + + let int = toks + .iter() + .cloned() + .collect::() + .parse::() + .unwrap(); + Token::Int(int) +} + +#[cfg(test)] +mod tests { + use super::*; + use rstest::rstest; + + #[rstest] + #[case( + "=+(){},;", + vec![ + Token::Assign, + Token::Plus, + Token::LeftParenthesis, + Token::RightParenthesis, + Token::LeftBrace, + Token::RightBrace, + Token::Comma, + Token::Semicolon, + ])] + #[case( + " + let five = 5; + let ten = 10; + + let add = fn(x, y) { + x + y; + } + + let result = add(five, ten); + ", + vec![ + Token::Let, + Token::Ident("five".into()), + Token::Assign, + Token::Int(5), + Token::Semicolon, + Token::Let, + Token::Ident("ten".into()), + Token::Assign, + Token::Int(10), + Token::Semicolon, + Token::Let, + Token::Ident("add".into()), + Token::Assign, + Token::Function, + Token::LeftParenthesis, + Token::Ident("x".into()), + Token::Comma, + Token::Ident("y".into()), + Token::RightParenthesis, + Token::LeftBrace, + Token::Ident("x".into()), + Token::Plus, + Token::Ident("y".into()), + Token::Semicolon, + Token::RightBrace, + Token::Let, + Token::Ident("result".into()), + Token::Assign, + Token::Ident("add".into()), + Token::LeftParenthesis, + Token::Ident("five".into()), + Token::Comma, + Token::Ident("ten".into()), + Token::RightParenthesis, + Token::Semicolon, + ])] + fn test_next_token(#[case] input: &str, #[case] expected: Vec) { + let res = tokenize(input); + assert_eq!(res, expected); + } +} diff --git a/src/lexer/tokens.rs b/src/lexer/tokens.rs new file mode 100644 index 0000000..0532bfd --- /dev/null +++ b/src/lexer/tokens.rs @@ -0,0 +1,25 @@ +#[derive(Debug, PartialEq, PartialOrd)] +pub enum Token { + Illegal, + + // Ident + Literals + Ident(String), + Int(i64), + + // Operators + Assign, + Plus, + + // Delimiters + Comma, + Semicolon, + + LeftParenthesis, + RightParenthesis, + LeftBrace, + RightBrace, + + // Keywords + Function, + Let, +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..077d2fa --- /dev/null +++ b/src/main.rs @@ -0,0 +1,5 @@ +mod lexer; + +fn main() { + println!("{:?}", lexer::tokenize("asdf")); +}