Lex float types

This commit is contained in:
bad 2022-04-23 20:42:30 +02:00
parent 9cda9643a9
commit a8c164819a
4 changed files with 116 additions and 77 deletions

View File

@ -7,6 +7,7 @@ pub type LexingError = crate::error::ErrorLocationWrapper<LexingErrorKind>;
pub enum LexingErrorKind {
UnmatchedQuote,
IntPrimitiveTooBig,
InvalidFloat,
UnexpectedCharacter(char),
}
@ -18,6 +19,7 @@ impl fmt::Display for LexingErrorKind {
LexingErrorKind::IntPrimitiveTooBig => {
write!(f, "Integer too large. Max value is i32_max({})", i32::MAX)
}
LexingErrorKind::InvalidFloat => write!(f, "Invalid float literal"),
}
}
}

View File

@ -40,6 +40,22 @@ impl<'a> LexerIter<'a> {
self.inner.as_str()
}
pub fn as_str_while<F: FnMut(char) -> bool>(&mut self, mut predicate: F) -> &'a str {
let str = self.inner.as_str();
let mut end_indice = 0;
for (i, c) in str.char_indices() {
end_indice = i;
if !predicate(c) {
break;
}
}
unsafe {
self.inner = str.get_unchecked(end_indice..).chars();
let res = str.get_unchecked(0..end_indice);
res
}
}
pub fn next_if(&mut self, func: impl FnOnce(char) -> bool) -> Option<char> {
if func(self.peek()?) {
self.next()

View File

@ -1,6 +1,7 @@
mod error;
mod lexer_iter;
pub mod token;
use lexer_iter::LexerIter;
pub use error::LexingError;
@ -29,14 +30,16 @@ impl<'a, 'b> Lexer<'a, 'b> {
while self.source_iter.peek().is_some() {
match self.scan_token() {
Ok(Some(token)) => if let Ok(ref mut v) = res {
v.push(token)
Ok(Some(token)) => {
if let Ok(ref mut v) = res {
v.push(token)
}
}
Ok(None) => (),
Err(e) => match res {
Ok(_) => res = Err(vec![e]),
Err(ref mut v) => v.push(e)
}
Err(ref mut v) => v.push(e),
},
}
}
@ -73,75 +76,42 @@ impl<'a, 'b> Lexer<'a, 'b> {
}
fn scan_token(&mut self) -> Result<Option<Token<'b>>, LexingError> {
Ok(Some(match self.source_iter.next().unwrap() {
'(' => self.get_token(token::TokenType::LeftParen),
')' => self.get_token(token::TokenType::RightParen),
'{' => self.get_token(token::TokenType::LeftBrace),
'}' => self.get_token(token::TokenType::RightBrace),
',' => self.get_token(token::TokenType::Comma),
'.' => self.get_token(token::TokenType::Dot),
'-' => self.get_token(token::TokenType::Minus),
'+' => self.get_token(token::TokenType::Plus),
';' => self.get_token(token::TokenType::Semicolon),
'*' => self.get_token(token::TokenType::Star),
'/' => self.get_token(token::TokenType::Slash),
'!' => self.get_token_if_next_eq_or(
'=',
token::TokenType::BangEqual,
token::TokenType::Bang,
),
'=' => self.get_token_if_next_eq_or(
'=',
token::TokenType::EqualEqual,
token::TokenType::Equal,
),
'<' => self.get_token_if_next_eq_or(
'=',
token::TokenType::LessEqual,
token::TokenType::Less,
),
'>' => self.get_token_if_next_eq_or(
'=',
token::TokenType::GreaterEqual,
token::TokenType::Greater,
),
'"' => {
let mut string = String::new();
let unmatched_char_error = Err(self.get_error(LexingErrorKind::UnmatchedQuote));
loop {
let next_char = self.source_iter.next();
match next_char {
Some('"') => break,
Some('\\') => match self.source_iter.next() {
Some(c) => string.push(c),
None => return unmatched_char_error,
},
Some(c) => string.push(c),
None => return unmatched_char_error,
Ok(Some(match self.source_iter.peek().unwrap() {
'0'..='9' => {
let mut found_period = false;
let num_str = self.source_iter.as_str_while(|c| {
if c == '.' {
// Already found a period finish parsing the float
if found_period {
return false;
}
found_period = true;
}
}
self.get_token(token::TokenType::String(string))
}
c @ '0'..='9' => {
let mut num: i32 = c.to_digit(10).unwrap() as i32;
while let Some(c) = self.source_iter.next_if(|c| c.is_digit(10)) {
let int_primitive_too_big_err =
|| self.get_error(LexingErrorKind::IntPrimitiveTooBig);
num = num.checked_mul(10).ok_or_else(int_primitive_too_big_err)?;
num = num
.checked_add(c.to_digit(10).unwrap() as i32)
.ok_or_else(int_primitive_too_big_err)?;
}
self.get_token(token::TokenType::Int(num))
}
c @ 'a'..='z' | c @ 'A'..='Z' => {
let mut identifier = String::new();
identifier.push(c);
while let Some(c) = self.source_iter.next_if(|c| c.is_alphanumeric()) {
identifier.push(c);
}
c.is_digit(10) || c == '.'
});
let res = if found_period {
num_str
.parse::<f32>()
.map(|v| self.get_token(token::TokenType::Float(v)))
.map_err(|_| LexingErrorKind::InvalidFloat)
} else {
num_str
.parse::<i32>()
.map(|v| self.get_token(token::TokenType::Int(v)))
.map_err(|_| LexingErrorKind::IntPrimitiveTooBig)
};
return res.map(|v| Some(v)).map_err(|e| self.get_error(e))
/*
self.get_token(match identifier.as_str() {
Err(IntErrorKind::PosOverflow) | Err(IntErrorKind::NegOverflow) => return Err(self.get_error(LexingErrorKind::IntPrimitiveTooBig)),
_ => unreachable!(),
}
*/
}
'a'..='z' | 'A'..='Z' => {
let identifier = self.source_iter.as_str_while(|c| c.is_alphanumeric());
self.get_token(match identifier {
"and" => token::TokenType::And,
"else" => token::TokenType::Else,
"false" => token::TokenType::False,
@ -155,12 +125,62 @@ impl<'a, 'b> Lexer<'a, 'b> {
"let" => token::TokenType::Let,
"While" => token::TokenType::While,
"or" => token::TokenType::Or,
_ => token::TokenType::Identifier(identifier),
_ => token::TokenType::Identifier(identifier.to_string()),
})
}
// Ignore whitespace
' ' | '\r' | '\t' | '\n' => return Ok(None),
c => return Err(self.get_error(LexingErrorKind::UnexpectedCharacter(c))),
_ => match self.source_iter.next().unwrap() {
'(' => self.get_token(token::TokenType::LeftParen),
')' => self.get_token(token::TokenType::RightParen),
'{' => self.get_token(token::TokenType::LeftBrace),
'}' => self.get_token(token::TokenType::RightBrace),
',' => self.get_token(token::TokenType::Comma),
'.' => self.get_token(token::TokenType::Dot),
'-' => self.get_token(token::TokenType::Minus),
'+' => self.get_token(token::TokenType::Plus),
';' => self.get_token(token::TokenType::Semicolon),
'*' => self.get_token(token::TokenType::Star),
'/' => self.get_token(token::TokenType::Slash),
'!' => self.get_token_if_next_eq_or(
'=',
token::TokenType::BangEqual,
token::TokenType::Bang,
),
'=' => self.get_token_if_next_eq_or(
'=',
token::TokenType::EqualEqual,
token::TokenType::Equal,
),
'<' => self.get_token_if_next_eq_or(
'=',
token::TokenType::LessEqual,
token::TokenType::Less,
),
'>' => self.get_token_if_next_eq_or(
'=',
token::TokenType::GreaterEqual,
token::TokenType::Greater,
),
'"' => {
let mut string = String::new();
let unmatched_char_error = Err(self.get_error(LexingErrorKind::UnmatchedQuote));
loop {
let next_char = self.source_iter.next();
match next_char {
Some('"') => break,
Some('\\') => match self.source_iter.next() {
Some(c) => string.push(c),
None => return unmatched_char_error,
},
Some(c) => string.push(c),
None => return unmatched_char_error,
}
}
self.get_token(token::TokenType::String(string))
}
// Ignore whitespace
' ' | '\r' | '\t' | '\n' => return Ok(None),
c => return Err(self.get_error(LexingErrorKind::UnexpectedCharacter(c))),
},
}))
}
}

View File

@ -1,5 +1,6 @@
1 == 1;
1 == 2;
12 == 1;
print 1;
print 1.90 == 1.90;
print 1.90;
print 2;
nil;