crftng-intrprtrs/src/lexer/mod.rs

203 lines
5.3 KiB
Rust

mod error;
mod lexer_iter;
pub mod token;
use lexer_iter::LexerIter;
pub use error::LexingError;
use self::error::LexingErrorKind;
use self::token::Token;
#[derive(Debug)]
pub struct Lexer<'a, 'b> {
file: Option<&'b str>,
_source: &'a str,
source_iter: LexerIter<'a>,
}
impl<'a, 'b> Lexer<'a, 'b> {
pub fn new(code: &'a str, file: Option<&'b str>) -> Lexer<'a, 'b> {
return Lexer {
_source: code,
source_iter: LexerIter::new(code),
file,
};
}
pub fn scan_tokens(&mut self) -> Result<Vec<token::Token<'b>>, Vec<LexingError>> {
let mut res = Ok(Vec::new());
while self.source_iter.peek().is_some() {
match self.scan_token() {
Ok(Some(token)) => {
if let Ok(ref mut v) = res {
v.push(token)
}
}
Ok(None) => (),
Err(e) => match res {
Ok(_) => res = Err(vec![e]),
Err(ref mut v) => v.push(e),
},
}
}
if let Ok(ref mut v) = res {
v.push(self.get_token(token::TokenType::Eof));
}
res
}
fn get_token(&self, token_type: token::TokenType) -> token::Token<'b> {
token::Token {
token_type,
location: self.source_iter.get_location(self.file),
}
}
fn get_token_if_next_eq_or(
&mut self,
to_eq: char,
if_eq_type: token::TokenType,
else_type: token::TokenType,
) -> token::Token<'b> {
let token_type = self
.source_iter
.next_if_eq(to_eq)
.map(|_| if_eq_type)
.unwrap_or(else_type);
self.get_token(token_type)
}
fn get_error(&self, error: LexingErrorKind) -> LexingError {
LexingError::new(error, self.source_iter.get_location(self.file).into())
}
fn scan_token(&mut self) -> Result<Option<Token<'b>>, LexingError> {
Ok(Some(match self.source_iter.peek().unwrap() {
'0'..='9' => {
let mut found_period = false;
let num_str = self.source_iter.as_str_while(|c| {
if c == '.' {
// Already found a period finish parsing the float
if found_period {
return false;
}
found_period = true;
}
c.is_digit(10) || c == '.'
});
let res = if found_period {
num_str
.parse::<f32>()
.map(|v| self.get_token(token::TokenType::Float(v)))
.map_err(|_| LexingErrorKind::InvalidFloat)
} else {
num_str
.parse::<i32>()
.map(|v| self.get_token(token::TokenType::Int(v)))
.map_err(|_| LexingErrorKind::IntPrimitiveTooBig)
};
return res.map(Some).map_err(|e| self.get_error(e));
/*
Err(IntErrorKind::PosOverflow) | Err(IntErrorKind::NegOverflow) => return Err(self.get_error(LexingErrorKind::IntPrimitiveTooBig)),
_ => unreachable!(),
}
*/
}
'a'..='z' | 'A'..='Z' => {
let identifier = self.source_iter.as_str_while(|c| c.is_alphanumeric());
self.get_token(match identifier {
"and" => token::TokenType::And,
"else" => token::TokenType::Else,
"false" => token::TokenType::False,
"fun" => token::TokenType::Fun,
"for" => token::TokenType::For,
"if" => token::TokenType::If,
"nil" => token::TokenType::Nil,
"print" => token::TokenType::Print,
"return" => token::TokenType::Return,
"true" => token::TokenType::True,
"let" => token::TokenType::Let,
"While" => token::TokenType::While,
"or" => token::TokenType::Or,
_ => token::TokenType::Identifier(identifier.to_string()),
})
}
_ => match self.source_iter.next().unwrap() {
'(' => self.get_token(token::TokenType::LeftParen),
')' => self.get_token(token::TokenType::RightParen),
'{' => self.get_token(token::TokenType::LeftBrace),
'}' => self.get_token(token::TokenType::RightBrace),
',' => self.get_token(token::TokenType::Comma),
'.' => self.get_token(token::TokenType::Dot),
'-' => self.get_token(token::TokenType::Minus),
'+' => self.get_token(token::TokenType::Plus),
';' => self.get_token(token::TokenType::Semicolon),
'*' => self.get_token(token::TokenType::Star),
'/' => self.get_token(token::TokenType::Slash),
'!' => self.get_token_if_next_eq_or(
'=',
token::TokenType::BangEqual,
token::TokenType::Bang,
),
'=' => self.get_token_if_next_eq_or(
'=',
token::TokenType::EqualEqual,
token::TokenType::Equal,
),
'<' => self.get_token_if_next_eq_or(
'=',
token::TokenType::LessEqual,
token::TokenType::Less,
),
'>' => self.get_token_if_next_eq_or(
'=',
token::TokenType::GreaterEqual,
token::TokenType::Greater,
),
'"' => {
let mut string = String::new();
let unmatched_char_error = Err(self.get_error(LexingErrorKind::UnmatchedQuote));
loop {
let next_char = self.source_iter.next();
match next_char {
Some('"') => break,
Some('\\') => match self.source_iter.next() {
Some(c) => string.push(c),
None => return unmatched_char_error,
},
Some(c) => string.push(c),
None => return unmatched_char_error,
}
}
self.get_token(token::TokenType::String(string))
}
// Ignore whitespace
' ' | '\r' | '\t' | '\n' => return Ok(None),
c => return Err(self.get_error(LexingErrorKind::UnexpectedCharacter(c))),
},
}))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_int_literal_too_large() {
let mut lexer = Lexer::new("2222222222222222222223", None);
let errors = lexer.scan_tokens().unwrap_err();
assert_eq!(errors.len(), 1);
assert!(matches!(
errors[0].inner,
LexingErrorKind::IntPrimitiveTooBig
))
}
}