commit 5b0263c89f72c0692cf1f6a3cf9ef6c853cf00eb Author: bad Date: Mon Mar 21 15:47:35 2022 +0100 Lexer diff --git a/.env b/.env new file mode 100644 index 0000000..98502fe --- /dev/null +++ b/.env @@ -0,0 +1 @@ +LOG=info diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..2024e8d --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,242 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crftng-intrprtrs" +version = "0.1.0" +dependencies = [ + "dotenv", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "dotenv" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "once_cell" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" + +[[package]] +name = "pin-project-lite" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c" + +[[package]] +name = "proc-macro2" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4af2ec4714533fcdf07e886f17025ace8b997b9ce51204ee69b6da831c3da57" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "syn" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea297be220d52398dcc07ce15a209fce436d361735ac1db700cab3b6cdfb9f54" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thread_local" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +dependencies = [ + "once_cell", +] + +[[package]] +name = "tracing" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1bdf54a7c28a2bbf701e1d2233f6c77f473486b94bee4f9678da5a148dca7f" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa31669fa42c09c34d94d8165dd2012e8ff3c66aca50f3bb226b68f216f2706c" +dependencies = [ + "lazy_static", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e0ab7bdc962035a87fba73f3acca9b8a8d0034c2e6f60b84aeaaddddc155dce" +dependencies = [ + "ansi_term", + "lazy_static", + "matchers", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8421fd9 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "crftng-intrprtrs" +version = "0.1.0" +edition = "2021" + +[dependencies] +dotenv = "0.15.0" +tracing = "0.1.32" +tracing-subscriber = { version = "0.3.9", features = ["env-filter"] } diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..7c224aa --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +hard_tabs=true diff --git a/src/lexer/error.rs b/src/lexer/error.rs new file mode 100644 index 0000000..a55e214 --- /dev/null +++ b/src/lexer/error.rs @@ -0,0 +1,20 @@ +use core::fmt; +use std::error::Error; + +#[derive(Debug)] +pub struct LexingError { + pub line: usize, + pub msg: String, +} + +impl fmt::Display for LexingError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Error on line: {}", self.line) + } +} + +impl Error for LexingError { + fn description(&self) -> &str { + &self.msg + } +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..ac21f96 --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,175 @@ +mod error; +pub mod token; +use std::iter; +use std::str::Chars; + +pub use error::LexingError; + +use self::token::Token; + +#[derive(Debug)] +pub struct Lexer<'a> { + source: &'a str, + source_iter: iter::Peekable>, + line: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(code: &'a str) -> Lexer<'a> { + return Lexer { + source: code, + source_iter: code.chars().peekable(), + line: 0, + }; + } + + pub fn scan_tokens(&mut self) -> Result, Vec> { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + while self.source_iter.peek().is_some() { + match self.scan_token() { + Some(Ok(token)) => { + if errors.is_empty() { + tokens.push(token) + } + } + Some(Err(e)) => errors.push(e), + None => (), + } + } + tokens.push(self.get_token(token::TokenType::Eof)); + + if errors.is_empty() { + Ok(tokens) + } else { + Err(errors) + } + } + + fn get_token(&self, token_type: token::TokenType) -> token::Token { + token::Token { + token_type, + line: self.line, + } + } + + fn get_token_if_next_eq_or( + &mut self, + to_eq: char, + if_eq_type: token::TokenType, + else_type: token::TokenType, + ) -> token::Token { + let token_type = self + .source_iter + .next_if_eq(&to_eq) + .map(|_| if_eq_type) + .unwrap_or(else_type); + self.get_token(token_type) + } + + fn scan_token(&mut self) -> Option> { + Some(Ok(match self.source_iter.next()? { + '(' => self.get_token(token::TokenType::LeftParen), + ')' => self.get_token(token::TokenType::RightParen), + '{' => self.get_token(token::TokenType::LeftBrace), + '}' => self.get_token(token::TokenType::RightBrace), + ',' => self.get_token(token::TokenType::Comma), + '.' => self.get_token(token::TokenType::Dot), + '-' => self.get_token(token::TokenType::Minus), + '+' => self.get_token(token::TokenType::Plus), + ';' => self.get_token(token::TokenType::Semicolon), + '*' => self.get_token(token::TokenType::Star), + '!' => self.get_token_if_next_eq_or( + '=', + token::TokenType::BangEqual, + token::TokenType::Bang, + ), + '=' => self.get_token_if_next_eq_or( + '=', + token::TokenType::EqualEqual, + token::TokenType::Equal, + ), + '<' => self.get_token_if_next_eq_or( + '=', + token::TokenType::LessEqual, + token::TokenType::Equal, + ), + '>' => self.get_token_if_next_eq_or( + '=', + token::TokenType::GreaterEqual, + token::TokenType::Greater, + ), + '"' => { + let mut string = String::with_capacity(128); + let unmatched_char_error = Some(Err(LexingError { + line: self.line, + msg: "Unmatched \" character".to_string(), + })); + + loop { + match self.source_iter.next() { + Some('"') => break, + Some('\\') => match self.source_iter.next() { + Some(c) => string.push(c), + None => return unmatched_char_error, + }, + Some(c) => string.push(c), + None => return unmatched_char_error, + } + } + self.get_token(token::TokenType::String(string)) + } + c@'0'..='9' => { + let mut num: i64 = c.to_digit(10).unwrap() as i64; + while let Some(c) = self.source_iter.peek() { + match c { + c@'0'..='9' => { + num *= 10; + num += c.to_digit(10).unwrap() as i64; + }, + _ => break, + } + } + self.get_token(token::TokenType::Int(num)) + }, + c@'a'..='z' | c@'A'..='Z' => { + let mut identifier = String::with_capacity(128); + identifier.push(c); + while let Some(c) = self.source_iter.next_if(|c| c.is_alphanumeric()) { + identifier.push(c); + } + + self.get_token(match identifier.as_str() { + "and" => token::TokenType::And, + "else" => token::TokenType::Else, + "false" => token::TokenType::False, + "fun" => token::TokenType::Fun, + "for" => token::TokenType::For, + "if" => token::TokenType::If, + "nil" => token::TokenType::Nil, + "print" => token::TokenType::Print, + "return" => token::TokenType::Return, + "true" => token::TokenType::True, + "let" => token::TokenType::Let, + "While" => token::TokenType::While, + "or" => token::TokenType::Or, + _ => token::TokenType::Identifier(identifier) + + }) + }, + // Ignore whitespace + ' ' | '\r' | '\t' => return None, + '\n' => { + self.line += 1; + return None; + } + c => { + return Some(Err(LexingError { + line: self.line, + msg: format!("Unexpected character: {c}"), + })) + } + })) + } +} diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..e86519d --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,49 @@ +#[derive(Debug)] +pub struct Token { + pub token_type: TokenType, + pub line: usize, +} + +#[derive(Debug)] +pub enum TokenType { + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + Identifier(String), + String(String), + Int(i64), + Float(f64), + + And, + Else, + False, + Fun, + For, + If, + Nil, + Or, + Print, + Return, + True, + Let, + While, + Eof, +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..f2c45b0 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,47 @@ +mod lexer; + +use lexer::Lexer; +use std::{fs, io, path}; +use tracing::Level; +use tracing_subscriber::{EnvFilter, FmtSubscriber}; + +fn run_file(file: &path::Path) -> Result<(), io::Error> { + let src = fs::read_to_string(file)?; + run(&src).unwrap(); + Ok(()) +} + +fn run_repl() { + let mut line_buf = String::with_capacity(1024); + loop { + match io::stdin().read_line(&mut line_buf) { + Ok(_) => { + let line = line_buf.trim(); + run(line).unwrap(); + line_buf.clear(); + } + Err(_e) => unimplemented!(), + } + } +} + +fn run(code: &str) -> Result<(), Vec> { + let mut lexer = Lexer::new(code); + println!("{:?}", lexer.scan_tokens()?); + Ok(()) +} + +fn main() { + dotenv::dotenv().ok(); + let subscriber = FmtSubscriber::builder() + .with_max_level(Level::TRACE) + .with_env_filter(EnvFilter::from_env("LOG")) + .finish(); + tracing::subscriber::set_global_default(subscriber).unwrap(); + + if let Some(file) = std::env::args_os().nth(1) { + run_file(file.as_ref()).unwrap(); + } else { + run_repl() + } +}