crftng-intrprtrs/src/lexer/mod.rs

176 lines
4.3 KiB
Rust

mod error;
pub mod token;
use std::iter;
use std::str::Chars;
pub use error::LexingError;
use self::token::Token;
#[derive(Debug)]
pub struct Lexer<'a> {
source: &'a str,
source_iter: iter::Peekable<Chars<'a>>,
line: usize,
}
impl<'a> Lexer<'a> {
pub fn new(code: &'a str) -> Lexer<'a> {
return Lexer {
source: code,
source_iter: code.chars().peekable(),
line: 0,
};
}
pub fn scan_tokens(&mut self) -> Result<Vec<token::Token>, Vec<LexingError>> {
let mut tokens = Vec::new();
let mut errors = Vec::new();
while self.source_iter.peek().is_some() {
match self.scan_token() {
Some(Ok(token)) => {
if errors.is_empty() {
tokens.push(token)
}
}
Some(Err(e)) => errors.push(e),
None => (),
}
}
tokens.push(self.get_token(token::TokenType::Eof));
if errors.is_empty() {
Ok(tokens)
} else {
Err(errors)
}
}
fn get_token(&self, token_type: token::TokenType) -> token::Token {
token::Token {
token_type,
line: self.line,
}
}
fn get_token_if_next_eq_or(
&mut self,
to_eq: char,
if_eq_type: token::TokenType,
else_type: token::TokenType,
) -> token::Token {
let token_type = self
.source_iter
.next_if_eq(&to_eq)
.map(|_| if_eq_type)
.unwrap_or(else_type);
self.get_token(token_type)
}
fn scan_token(&mut self) -> Option<Result<Token, LexingError>> {
Some(Ok(match self.source_iter.next()? {
'(' => self.get_token(token::TokenType::LeftParen),
')' => self.get_token(token::TokenType::RightParen),
'{' => self.get_token(token::TokenType::LeftBrace),
'}' => self.get_token(token::TokenType::RightBrace),
',' => self.get_token(token::TokenType::Comma),
'.' => self.get_token(token::TokenType::Dot),
'-' => self.get_token(token::TokenType::Minus),
'+' => self.get_token(token::TokenType::Plus),
';' => self.get_token(token::TokenType::Semicolon),
'*' => self.get_token(token::TokenType::Star),
'!' => self.get_token_if_next_eq_or(
'=',
token::TokenType::BangEqual,
token::TokenType::Bang,
),
'=' => self.get_token_if_next_eq_or(
'=',
token::TokenType::EqualEqual,
token::TokenType::Equal,
),
'<' => self.get_token_if_next_eq_or(
'=',
token::TokenType::LessEqual,
token::TokenType::Equal,
),
'>' => self.get_token_if_next_eq_or(
'=',
token::TokenType::GreaterEqual,
token::TokenType::Greater,
),
'"' => {
let mut string = String::with_capacity(128);
let unmatched_char_error = Some(Err(LexingError {
line: self.line,
msg: "Unmatched \" character".to_string(),
}));
loop {
match self.source_iter.next() {
Some('"') => break,
Some('\\') => match self.source_iter.next() {
Some(c) => string.push(c),
None => return unmatched_char_error,
},
Some(c) => string.push(c),
None => return unmatched_char_error,
}
}
self.get_token(token::TokenType::String(string))
}
c@'0'..='9' => {
let mut num: i64 = c.to_digit(10).unwrap() as i64;
while let Some(c) = self.source_iter.peek() {
match c {
c@'0'..='9' => {
num *= 10;
num += c.to_digit(10).unwrap() as i64;
},
_ => break,
}
}
self.get_token(token::TokenType::Int(num))
},
c@'a'..='z' | c@'A'..='Z' => {
let mut identifier = String::with_capacity(128);
identifier.push(c);
while let Some(c) = self.source_iter.next_if(|c| c.is_alphanumeric()) {
identifier.push(c);
}
self.get_token(match identifier.as_str() {
"and" => token::TokenType::And,
"else" => token::TokenType::Else,
"false" => token::TokenType::False,
"fun" => token::TokenType::Fun,
"for" => token::TokenType::For,
"if" => token::TokenType::If,
"nil" => token::TokenType::Nil,
"print" => token::TokenType::Print,
"return" => token::TokenType::Return,
"true" => token::TokenType::True,
"let" => token::TokenType::Let,
"While" => token::TokenType::While,
"or" => token::TokenType::Or,
_ => token::TokenType::Identifier(identifier)
})
},
// Ignore whitespace
' ' | '\r' | '\t' => return None,
'\n' => {
self.line += 1;
return None;
}
c => {
return Some(Err(LexingError {
line: self.line,
msg: format!("Unexpected character: {c}"),
}))
}
}))
}
}