From a8c164819a73f01fc910211b2c2d79a50132de34 Mon Sep 17 00:00:00 2001 From: bad Date: Sat, 23 Apr 2022 20:42:30 +0200 Subject: [PATCH] Lex float types --- src/lexer/error.rs | 2 + src/lexer/lexer_iter.rs | 16 ++++ src/lexer/mod.rs | 170 ++++++++++++++++++++++------------------ test.lox | 5 +- 4 files changed, 116 insertions(+), 77 deletions(-) diff --git a/src/lexer/error.rs b/src/lexer/error.rs index d48554a..a489c13 100644 --- a/src/lexer/error.rs +++ b/src/lexer/error.rs @@ -7,6 +7,7 @@ pub type LexingError = crate::error::ErrorLocationWrapper; pub enum LexingErrorKind { UnmatchedQuote, IntPrimitiveTooBig, + InvalidFloat, UnexpectedCharacter(char), } @@ -18,6 +19,7 @@ impl fmt::Display for LexingErrorKind { LexingErrorKind::IntPrimitiveTooBig => { write!(f, "Integer too large. Max value is i32_max({})", i32::MAX) } + LexingErrorKind::InvalidFloat => write!(f, "Invalid float literal"), } } } diff --git a/src/lexer/lexer_iter.rs b/src/lexer/lexer_iter.rs index aa39570..23a9ca0 100644 --- a/src/lexer/lexer_iter.rs +++ b/src/lexer/lexer_iter.rs @@ -40,6 +40,22 @@ impl<'a> LexerIter<'a> { self.inner.as_str() } + pub fn as_str_while bool>(&mut self, mut predicate: F) -> &'a str { + let str = self.inner.as_str(); + let mut end_indice = 0; + for (i, c) in str.char_indices() { + end_indice = i; + if !predicate(c) { + break; + } + } + unsafe { + self.inner = str.get_unchecked(end_indice..).chars(); + let res = str.get_unchecked(0..end_indice); + res + } + } + pub fn next_if(&mut self, func: impl FnOnce(char) -> bool) -> Option { if func(self.peek()?) { self.next() diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index c8a9e96..757b499 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,6 +1,7 @@ mod error; mod lexer_iter; pub mod token; + use lexer_iter::LexerIter; pub use error::LexingError; @@ -29,14 +30,16 @@ impl<'a, 'b> Lexer<'a, 'b> { while self.source_iter.peek().is_some() { match self.scan_token() { - Ok(Some(token)) => if let Ok(ref mut v) = res { - v.push(token) + Ok(Some(token)) => { + if let Ok(ref mut v) = res { + v.push(token) + } } Ok(None) => (), Err(e) => match res { Ok(_) => res = Err(vec![e]), - Err(ref mut v) => v.push(e) - } + Err(ref mut v) => v.push(e), + }, } } @@ -73,75 +76,42 @@ impl<'a, 'b> Lexer<'a, 'b> { } fn scan_token(&mut self) -> Result>, LexingError> { - Ok(Some(match self.source_iter.next().unwrap() { - '(' => self.get_token(token::TokenType::LeftParen), - ')' => self.get_token(token::TokenType::RightParen), - '{' => self.get_token(token::TokenType::LeftBrace), - '}' => self.get_token(token::TokenType::RightBrace), - ',' => self.get_token(token::TokenType::Comma), - '.' => self.get_token(token::TokenType::Dot), - '-' => self.get_token(token::TokenType::Minus), - '+' => self.get_token(token::TokenType::Plus), - ';' => self.get_token(token::TokenType::Semicolon), - '*' => self.get_token(token::TokenType::Star), - '/' => self.get_token(token::TokenType::Slash), - '!' => self.get_token_if_next_eq_or( - '=', - token::TokenType::BangEqual, - token::TokenType::Bang, - ), - '=' => self.get_token_if_next_eq_or( - '=', - token::TokenType::EqualEqual, - token::TokenType::Equal, - ), - '<' => self.get_token_if_next_eq_or( - '=', - token::TokenType::LessEqual, - token::TokenType::Less, - ), - '>' => self.get_token_if_next_eq_or( - '=', - token::TokenType::GreaterEqual, - token::TokenType::Greater, - ), - '"' => { - let mut string = String::new(); - let unmatched_char_error = Err(self.get_error(LexingErrorKind::UnmatchedQuote)); - loop { - let next_char = self.source_iter.next(); - match next_char { - Some('"') => break, - Some('\\') => match self.source_iter.next() { - Some(c) => string.push(c), - None => return unmatched_char_error, - }, - Some(c) => string.push(c), - None => return unmatched_char_error, + Ok(Some(match self.source_iter.peek().unwrap() { + '0'..='9' => { + let mut found_period = false; + let num_str = self.source_iter.as_str_while(|c| { + if c == '.' { + // Already found a period finish parsing the float + if found_period { + return false; + } + found_period = true; } - } - self.get_token(token::TokenType::String(string)) - } - c @ '0'..='9' => { - let mut num: i32 = c.to_digit(10).unwrap() as i32; - while let Some(c) = self.source_iter.next_if(|c| c.is_digit(10)) { - let int_primitive_too_big_err = - || self.get_error(LexingErrorKind::IntPrimitiveTooBig); - num = num.checked_mul(10).ok_or_else(int_primitive_too_big_err)?; - num = num - .checked_add(c.to_digit(10).unwrap() as i32) - .ok_or_else(int_primitive_too_big_err)?; - } - self.get_token(token::TokenType::Int(num)) - } - c @ 'a'..='z' | c @ 'A'..='Z' => { - let mut identifier = String::new(); - identifier.push(c); - while let Some(c) = self.source_iter.next_if(|c| c.is_alphanumeric()) { - identifier.push(c); - } + c.is_digit(10) || c == '.' + }); + let res = if found_period { + num_str + .parse::() + .map(|v| self.get_token(token::TokenType::Float(v))) + .map_err(|_| LexingErrorKind::InvalidFloat) + } else { + num_str + .parse::() + .map(|v| self.get_token(token::TokenType::Int(v))) + .map_err(|_| LexingErrorKind::IntPrimitiveTooBig) + }; + return res.map(|v| Some(v)).map_err(|e| self.get_error(e)) + /* - self.get_token(match identifier.as_str() { + Err(IntErrorKind::PosOverflow) | Err(IntErrorKind::NegOverflow) => return Err(self.get_error(LexingErrorKind::IntPrimitiveTooBig)), + _ => unreachable!(), + } + */ + } + 'a'..='z' | 'A'..='Z' => { + let identifier = self.source_iter.as_str_while(|c| c.is_alphanumeric()); + + self.get_token(match identifier { "and" => token::TokenType::And, "else" => token::TokenType::Else, "false" => token::TokenType::False, @@ -155,12 +125,62 @@ impl<'a, 'b> Lexer<'a, 'b> { "let" => token::TokenType::Let, "While" => token::TokenType::While, "or" => token::TokenType::Or, - _ => token::TokenType::Identifier(identifier), + _ => token::TokenType::Identifier(identifier.to_string()), }) } - // Ignore whitespace - ' ' | '\r' | '\t' | '\n' => return Ok(None), - c => return Err(self.get_error(LexingErrorKind::UnexpectedCharacter(c))), + _ => match self.source_iter.next().unwrap() { + '(' => self.get_token(token::TokenType::LeftParen), + ')' => self.get_token(token::TokenType::RightParen), + '{' => self.get_token(token::TokenType::LeftBrace), + '}' => self.get_token(token::TokenType::RightBrace), + ',' => self.get_token(token::TokenType::Comma), + '.' => self.get_token(token::TokenType::Dot), + '-' => self.get_token(token::TokenType::Minus), + '+' => self.get_token(token::TokenType::Plus), + ';' => self.get_token(token::TokenType::Semicolon), + '*' => self.get_token(token::TokenType::Star), + '/' => self.get_token(token::TokenType::Slash), + '!' => self.get_token_if_next_eq_or( + '=', + token::TokenType::BangEqual, + token::TokenType::Bang, + ), + '=' => self.get_token_if_next_eq_or( + '=', + token::TokenType::EqualEqual, + token::TokenType::Equal, + ), + '<' => self.get_token_if_next_eq_or( + '=', + token::TokenType::LessEqual, + token::TokenType::Less, + ), + '>' => self.get_token_if_next_eq_or( + '=', + token::TokenType::GreaterEqual, + token::TokenType::Greater, + ), + '"' => { + let mut string = String::new(); + let unmatched_char_error = Err(self.get_error(LexingErrorKind::UnmatchedQuote)); + loop { + let next_char = self.source_iter.next(); + match next_char { + Some('"') => break, + Some('\\') => match self.source_iter.next() { + Some(c) => string.push(c), + None => return unmatched_char_error, + }, + Some(c) => string.push(c), + None => return unmatched_char_error, + } + } + self.get_token(token::TokenType::String(string)) + } + // Ignore whitespace + ' ' | '\r' | '\t' | '\n' => return Ok(None), + c => return Err(self.get_error(LexingErrorKind::UnexpectedCharacter(c))), + }, })) } } diff --git a/test.lox b/test.lox index e62683e..99adf23 100644 --- a/test.lox +++ b/test.lox @@ -1,5 +1,6 @@ -1 == 1; -1 == 2; +12 == 1; print 1; +print 1.90 == 1.90; +print 1.90; print 2; nil;