Lex float types

This commit is contained in:
bad 2022-04-23 20:42:30 +02:00
parent 9cda9643a9
commit a8c164819a
4 changed files with 116 additions and 77 deletions

View file

@ -7,6 +7,7 @@ pub type LexingError = crate::error::ErrorLocationWrapper<LexingErrorKind>;
pub enum LexingErrorKind { pub enum LexingErrorKind {
UnmatchedQuote, UnmatchedQuote,
IntPrimitiveTooBig, IntPrimitiveTooBig,
InvalidFloat,
UnexpectedCharacter(char), UnexpectedCharacter(char),
} }
@ -18,6 +19,7 @@ impl fmt::Display for LexingErrorKind {
LexingErrorKind::IntPrimitiveTooBig => { LexingErrorKind::IntPrimitiveTooBig => {
write!(f, "Integer too large. Max value is i32_max({})", i32::MAX) write!(f, "Integer too large. Max value is i32_max({})", i32::MAX)
} }
LexingErrorKind::InvalidFloat => write!(f, "Invalid float literal"),
} }
} }
} }

View file

@ -40,6 +40,22 @@ impl<'a> LexerIter<'a> {
self.inner.as_str() self.inner.as_str()
} }
pub fn as_str_while<F: FnMut(char) -> bool>(&mut self, mut predicate: F) -> &'a str {
let str = self.inner.as_str();
let mut end_indice = 0;
for (i, c) in str.char_indices() {
end_indice = i;
if !predicate(c) {
break;
}
}
unsafe {
self.inner = str.get_unchecked(end_indice..).chars();
let res = str.get_unchecked(0..end_indice);
res
}
}
pub fn next_if(&mut self, func: impl FnOnce(char) -> bool) -> Option<char> { pub fn next_if(&mut self, func: impl FnOnce(char) -> bool) -> Option<char> {
if func(self.peek()?) { if func(self.peek()?) {
self.next() self.next()

View file

@ -1,6 +1,7 @@
mod error; mod error;
mod lexer_iter; mod lexer_iter;
pub mod token; pub mod token;
use lexer_iter::LexerIter; use lexer_iter::LexerIter;
pub use error::LexingError; pub use error::LexingError;
@ -29,14 +30,16 @@ impl<'a, 'b> Lexer<'a, 'b> {
while self.source_iter.peek().is_some() { while self.source_iter.peek().is_some() {
match self.scan_token() { match self.scan_token() {
Ok(Some(token)) => if let Ok(ref mut v) = res { Ok(Some(token)) => {
v.push(token) if let Ok(ref mut v) = res {
v.push(token)
}
} }
Ok(None) => (), Ok(None) => (),
Err(e) => match res { Err(e) => match res {
Ok(_) => res = Err(vec![e]), Ok(_) => res = Err(vec![e]),
Err(ref mut v) => v.push(e) Err(ref mut v) => v.push(e),
} },
} }
} }
@ -73,75 +76,42 @@ impl<'a, 'b> Lexer<'a, 'b> {
} }
fn scan_token(&mut self) -> Result<Option<Token<'b>>, LexingError> { fn scan_token(&mut self) -> Result<Option<Token<'b>>, LexingError> {
Ok(Some(match self.source_iter.next().unwrap() { Ok(Some(match self.source_iter.peek().unwrap() {
'(' => self.get_token(token::TokenType::LeftParen), '0'..='9' => {
')' => self.get_token(token::TokenType::RightParen), let mut found_period = false;
'{' => self.get_token(token::TokenType::LeftBrace), let num_str = self.source_iter.as_str_while(|c| {
'}' => self.get_token(token::TokenType::RightBrace), if c == '.' {
',' => self.get_token(token::TokenType::Comma), // Already found a period finish parsing the float
'.' => self.get_token(token::TokenType::Dot), if found_period {
'-' => self.get_token(token::TokenType::Minus), return false;
'+' => self.get_token(token::TokenType::Plus), }
';' => self.get_token(token::TokenType::Semicolon), found_period = true;
'*' => self.get_token(token::TokenType::Star),
'/' => self.get_token(token::TokenType::Slash),
'!' => self.get_token_if_next_eq_or(
'=',
token::TokenType::BangEqual,
token::TokenType::Bang,
),
'=' => self.get_token_if_next_eq_or(
'=',
token::TokenType::EqualEqual,
token::TokenType::Equal,
),
'<' => self.get_token_if_next_eq_or(
'=',
token::TokenType::LessEqual,
token::TokenType::Less,
),
'>' => self.get_token_if_next_eq_or(
'=',
token::TokenType::GreaterEqual,
token::TokenType::Greater,
),
'"' => {
let mut string = String::new();
let unmatched_char_error = Err(self.get_error(LexingErrorKind::UnmatchedQuote));
loop {
let next_char = self.source_iter.next();
match next_char {
Some('"') => break,
Some('\\') => match self.source_iter.next() {
Some(c) => string.push(c),
None => return unmatched_char_error,
},
Some(c) => string.push(c),
None => return unmatched_char_error,
} }
} c.is_digit(10) || c == '.'
self.get_token(token::TokenType::String(string)) });
} let res = if found_period {
c @ '0'..='9' => { num_str
let mut num: i32 = c.to_digit(10).unwrap() as i32; .parse::<f32>()
while let Some(c) = self.source_iter.next_if(|c| c.is_digit(10)) { .map(|v| self.get_token(token::TokenType::Float(v)))
let int_primitive_too_big_err = .map_err(|_| LexingErrorKind::InvalidFloat)
|| self.get_error(LexingErrorKind::IntPrimitiveTooBig); } else {
num = num.checked_mul(10).ok_or_else(int_primitive_too_big_err)?; num_str
num = num .parse::<i32>()
.checked_add(c.to_digit(10).unwrap() as i32) .map(|v| self.get_token(token::TokenType::Int(v)))
.ok_or_else(int_primitive_too_big_err)?; .map_err(|_| LexingErrorKind::IntPrimitiveTooBig)
} };
self.get_token(token::TokenType::Int(num)) return res.map(|v| Some(v)).map_err(|e| self.get_error(e))
} /*
c @ 'a'..='z' | c @ 'A'..='Z' => {
let mut identifier = String::new();
identifier.push(c);
while let Some(c) = self.source_iter.next_if(|c| c.is_alphanumeric()) {
identifier.push(c);
}
self.get_token(match identifier.as_str() { Err(IntErrorKind::PosOverflow) | Err(IntErrorKind::NegOverflow) => return Err(self.get_error(LexingErrorKind::IntPrimitiveTooBig)),
_ => unreachable!(),
}
*/
}
'a'..='z' | 'A'..='Z' => {
let identifier = self.source_iter.as_str_while(|c| c.is_alphanumeric());
self.get_token(match identifier {
"and" => token::TokenType::And, "and" => token::TokenType::And,
"else" => token::TokenType::Else, "else" => token::TokenType::Else,
"false" => token::TokenType::False, "false" => token::TokenType::False,
@ -155,12 +125,62 @@ impl<'a, 'b> Lexer<'a, 'b> {
"let" => token::TokenType::Let, "let" => token::TokenType::Let,
"While" => token::TokenType::While, "While" => token::TokenType::While,
"or" => token::TokenType::Or, "or" => token::TokenType::Or,
_ => token::TokenType::Identifier(identifier), _ => token::TokenType::Identifier(identifier.to_string()),
}) })
} }
// Ignore whitespace _ => match self.source_iter.next().unwrap() {
' ' | '\r' | '\t' | '\n' => return Ok(None), '(' => self.get_token(token::TokenType::LeftParen),
c => return Err(self.get_error(LexingErrorKind::UnexpectedCharacter(c))), ')' => self.get_token(token::TokenType::RightParen),
'{' => self.get_token(token::TokenType::LeftBrace),
'}' => self.get_token(token::TokenType::RightBrace),
',' => self.get_token(token::TokenType::Comma),
'.' => self.get_token(token::TokenType::Dot),
'-' => self.get_token(token::TokenType::Minus),
'+' => self.get_token(token::TokenType::Plus),
';' => self.get_token(token::TokenType::Semicolon),
'*' => self.get_token(token::TokenType::Star),
'/' => self.get_token(token::TokenType::Slash),
'!' => self.get_token_if_next_eq_or(
'=',
token::TokenType::BangEqual,
token::TokenType::Bang,
),
'=' => self.get_token_if_next_eq_or(
'=',
token::TokenType::EqualEqual,
token::TokenType::Equal,
),
'<' => self.get_token_if_next_eq_or(
'=',
token::TokenType::LessEqual,
token::TokenType::Less,
),
'>' => self.get_token_if_next_eq_or(
'=',
token::TokenType::GreaterEqual,
token::TokenType::Greater,
),
'"' => {
let mut string = String::new();
let unmatched_char_error = Err(self.get_error(LexingErrorKind::UnmatchedQuote));
loop {
let next_char = self.source_iter.next();
match next_char {
Some('"') => break,
Some('\\') => match self.source_iter.next() {
Some(c) => string.push(c),
None => return unmatched_char_error,
},
Some(c) => string.push(c),
None => return unmatched_char_error,
}
}
self.get_token(token::TokenType::String(string))
}
// Ignore whitespace
' ' | '\r' | '\t' | '\n' => return Ok(None),
c => return Err(self.get_error(LexingErrorKind::UnexpectedCharacter(c))),
},
})) }))
} }
} }

View file

@ -1,5 +1,6 @@
1 == 1; 12 == 1;
1 == 2;
print 1; print 1;
print 1.90 == 1.90;
print 1.90;
print 2; print 2;
nil; nil;