642 lines
16 KiB
Rust
642 lines
16 KiB
Rust
fn substring_offset(substring: &str, string: &str) -> usize
|
|
{
|
|
substring.as_ptr() as usize - string.as_ptr() as usize
|
|
}
|
|
|
|
pub fn trim_start(mut input: &str) -> &str
|
|
{
|
|
loop
|
|
{
|
|
let original_input = input;
|
|
|
|
input = input.trim_start();
|
|
|
|
let mut input_characters = input.chars();
|
|
|
|
if let Some('#') = input_characters.next()
|
|
{
|
|
input = input_characters.as_str();
|
|
|
|
match (input.find('\n'), input.find('\r'))
|
|
{
|
|
(Some(newline_index), Some(carriage_return_index)) =>
|
|
{
|
|
let split_index = std::cmp::min(newline_index, carriage_return_index);
|
|
input = input.split_at(split_index).1;
|
|
},
|
|
(Some(split_index), _)
|
|
| (_, Some(split_index)) => input = input.split_at(split_index).1,
|
|
_ => input = &input[..input.len()],
|
|
}
|
|
}
|
|
|
|
if input.is_empty() || input == original_input
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
input
|
|
}
|
|
|
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
|
pub(crate) enum Keyword
|
|
{
|
|
And,
|
|
Exists,
|
|
False,
|
|
ForAll,
|
|
Infimum,
|
|
Not,
|
|
Or,
|
|
Supremum,
|
|
True,
|
|
}
|
|
|
|
impl std::fmt::Debug for Keyword
|
|
{
|
|
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result
|
|
{
|
|
match &self
|
|
{
|
|
Self::And => write!(formatter, "and"),
|
|
Self::Exists => write!(formatter, "exists"),
|
|
Self::False => write!(formatter, "false"),
|
|
Self::ForAll => write!(formatter, "forall"),
|
|
Self::Infimum => write!(formatter, "inf"),
|
|
Self::Not => write!(formatter, "not"),
|
|
Self::Or => write!(formatter, "or"),
|
|
Self::Supremum => write!(formatter, "sup"),
|
|
Self::True => write!(formatter, "true"),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
|
pub(crate) enum Symbol
|
|
{
|
|
ArrowLeft,
|
|
ArrowLeftAndRight,
|
|
ArrowRight,
|
|
Comma,
|
|
Division,
|
|
Equal,
|
|
Exponentiation,
|
|
Greater,
|
|
GreaterOrEqual,
|
|
Less,
|
|
LessOrEqual,
|
|
Minus,
|
|
Multiplication,
|
|
NotEqual,
|
|
Percent,
|
|
Plus,
|
|
VerticalBar,
|
|
}
|
|
|
|
impl std::fmt::Debug for Symbol
|
|
{
|
|
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result
|
|
{
|
|
match &self
|
|
{
|
|
Self::ArrowLeft => write!(formatter, "<-"),
|
|
Self::ArrowLeftAndRight => write!(formatter, "<->"),
|
|
Self::ArrowRight => write!(formatter, "->"),
|
|
Self::Comma => write!(formatter, ","),
|
|
Self::Division => write!(formatter, "/"),
|
|
Self::Equal => write!(formatter, "="),
|
|
Self::Exponentiation => write!(formatter, "**"),
|
|
Self::Greater => write!(formatter, ">"),
|
|
Self::GreaterOrEqual => write!(formatter, ">="),
|
|
Self::Less => write!(formatter, "<"),
|
|
Self::LessOrEqual => write!(formatter, "<="),
|
|
Self::Minus => write!(formatter, "-"),
|
|
Self::Multiplication => write!(formatter, "*"),
|
|
Self::NotEqual => write!(formatter, "!="),
|
|
Self::Percent => write!(formatter, "%"),
|
|
Self::Plus => write!(formatter, "+"),
|
|
Self::VerticalBar => write!(formatter, "|"),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
|
pub(crate) enum Token<'i>
|
|
{
|
|
Identifier(&'i str),
|
|
Number(usize),
|
|
ParenthesizedExpression(&'i str),
|
|
Symbol(Symbol),
|
|
}
|
|
|
|
fn is_identifier_start_character(character: char) -> bool
|
|
{
|
|
character.is_ascii_alphabetic()
|
|
}
|
|
|
|
fn is_identifier_body_character(character: char) -> bool
|
|
{
|
|
match character
|
|
{
|
|
'_' => true,
|
|
_ if character.is_ascii_alphanumeric() => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
pub fn identifier(input: &str) -> Option<(&str, &str)>
|
|
{
|
|
let mut characters = input.char_indices();
|
|
|
|
let first_character = loop
|
|
{
|
|
match characters.next()
|
|
{
|
|
Some((_, '_')) => continue,
|
|
Some((_, character)) => break Some(character),
|
|
None => break None,
|
|
}
|
|
}?;
|
|
|
|
if !is_identifier_start_character(first_character)
|
|
{
|
|
return None;
|
|
}
|
|
|
|
loop
|
|
{
|
|
match characters.next()
|
|
{
|
|
None => return Some((input, characters.as_str())),
|
|
Some((character_index, character)) =>
|
|
{
|
|
if !is_identifier_body_character(character)
|
|
{
|
|
return Some(input.split_at(character_index));
|
|
}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) fn is_keyword(identifier: &str) -> bool
|
|
{
|
|
match identifier
|
|
{
|
|
"and"
|
|
| "exists"
|
|
| "false"
|
|
| "forall"
|
|
| "inf"
|
|
| "not"
|
|
| "or"
|
|
| "sup"
|
|
| "true" => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
|
|
fn number_string(input: &str) -> Option<(&str, &str)>
|
|
{
|
|
let mut characters = input.char_indices();
|
|
|
|
let (_, character) = match characters.next()
|
|
{
|
|
Some(characters_next) => characters_next,
|
|
None => return None,
|
|
};
|
|
|
|
if !character.is_ascii_digit()
|
|
{
|
|
return None;
|
|
}
|
|
|
|
loop
|
|
{
|
|
match characters.next()
|
|
{
|
|
None => return Some((input, characters.as_str())),
|
|
Some((character_index, character)) =>
|
|
{
|
|
if !character.is_ascii_digit()
|
|
{
|
|
return Some(input.split_at(character_index));
|
|
}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn number(input: &str) -> Result<Option<(usize, &str)>, crate::parse::Error>
|
|
{
|
|
let (number_string, remaining_input) = match number_string(input)
|
|
{
|
|
Some(number_string) => number_string,
|
|
None => return Ok(None),
|
|
};
|
|
|
|
let number = number_string.parse()
|
|
.map_err(|error| crate::parse::Error::new_parse_number(input,
|
|
crate::parse::error::Location::new(0, Some(0)), error))?;
|
|
|
|
Ok(Some((number, remaining_input)))
|
|
}
|
|
|
|
pub(crate) fn symbol(input: &str) -> Option<(Symbol, &str)>
|
|
{
|
|
let mut characters = input.char_indices();
|
|
|
|
let (_, character) = match characters.next()
|
|
{
|
|
Some(characters_next) => characters_next,
|
|
None => return None,
|
|
};
|
|
|
|
let remaining_input = characters.as_str();
|
|
|
|
match character
|
|
{
|
|
',' => Some((Symbol::Comma, remaining_input)),
|
|
// <->, <-, <=, <
|
|
'=' => Some((Symbol::Equal, remaining_input)),
|
|
// !=
|
|
'!' => match characters.next()
|
|
{
|
|
Some((_, '=')) => Some((Symbol::NotEqual, characters.as_str())),
|
|
_ => None,
|
|
},
|
|
'<' => match characters.next()
|
|
{
|
|
Some((_, '-')) =>
|
|
{
|
|
let remaining_input = characters.as_str();
|
|
|
|
match characters.next()
|
|
{
|
|
Some((_, '>')) => Some((Symbol::ArrowLeftAndRight, characters.as_str())),
|
|
_ => Some((Symbol::ArrowLeft, remaining_input)),
|
|
}
|
|
},
|
|
Some((_, '=')) => Some((Symbol::LessOrEqual, characters.as_str())),
|
|
_ => Some((Symbol::Less, remaining_input)),
|
|
},
|
|
// >=, >
|
|
'>' => match characters.next()
|
|
{
|
|
Some((_, '=')) => Some((Symbol::GreaterOrEqual, characters.as_str())),
|
|
_ => Some((Symbol::Greater, remaining_input)),
|
|
},
|
|
'+' => Some((Symbol::Plus, remaining_input)),
|
|
// ->, -
|
|
'-' => match characters.next()
|
|
{
|
|
Some((_, '>')) => Some((Symbol::ArrowRight, characters.as_str())),
|
|
_ => Some((Symbol::Minus, remaining_input)),
|
|
},
|
|
// **, *
|
|
'*' => match characters.next()
|
|
{
|
|
Some((_, '*')) => Some((Symbol::Exponentiation, characters.as_str())),
|
|
_ => Some((Symbol::Multiplication, remaining_input)),
|
|
},
|
|
'/' => Some((Symbol::Division, remaining_input)),
|
|
'%' => Some((Symbol::Percent, remaining_input)),
|
|
'|' => Some((Symbol::VerticalBar, remaining_input)),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
pub(crate) fn parenthesized_expression(input: &str)
|
|
-> Result<Option<(&str, &str)>, crate::parse::Error>
|
|
{
|
|
let mut characters = input.chars();
|
|
|
|
let (first_character, remaining_input) = match characters.next()
|
|
{
|
|
Some(first_character) => (first_character, characters.as_str()),
|
|
None => return Ok(None),
|
|
};
|
|
|
|
if first_character != '('
|
|
{
|
|
return Ok(None);
|
|
}
|
|
|
|
let mut characters = remaining_input.char_indices();
|
|
let mut number_of_open_parentheses = 1;
|
|
|
|
while let Some((character_index, character)) = characters.next()
|
|
{
|
|
match character
|
|
{
|
|
'(' => number_of_open_parentheses += 1,
|
|
')' => number_of_open_parentheses -= 1,
|
|
_ => (),
|
|
}
|
|
|
|
if number_of_open_parentheses == 0
|
|
{
|
|
let position_of_closing_parenthesis = character_index;
|
|
let (parenthesized_expression, _) =
|
|
remaining_input.split_at(position_of_closing_parenthesis);
|
|
let remaining_input = characters.as_str();
|
|
|
|
return Ok(Some((parenthesized_expression, remaining_input)));
|
|
}
|
|
}
|
|
|
|
Err(crate::parse::Error::new_unmatched_parenthesis(
|
|
crate::parse::error::Location::new(0, Some(1))))
|
|
}
|
|
|
|
pub(crate) struct Tokens<'i, F>
|
|
{
|
|
original_input: &'i str,
|
|
input: &'i str,
|
|
previous_index: usize,
|
|
reached_end_of_stream: bool,
|
|
functor: F,
|
|
}
|
|
|
|
impl<'i> Tokens<'i, ()>
|
|
{
|
|
pub fn new_iter(input: &'i str) -> Tokens<'i, impl FnMut(Token<'i>) -> Option<Token<'i>>>
|
|
{
|
|
Tokens::new_filter_map(input, |x| Some(x))
|
|
}
|
|
|
|
pub fn new_filter<P>(input: &'i str, mut predicate: P)
|
|
-> Tokens<'i, impl FnMut(Token<'i>) -> Option<Token<'i>>>
|
|
where
|
|
P: FnMut(&Token<'i>) -> bool,
|
|
{
|
|
Tokens::new_filter_map(input,
|
|
move |x|
|
|
{
|
|
if predicate(&x)
|
|
{
|
|
Some(x)
|
|
}
|
|
else
|
|
{
|
|
None
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
impl<'i, F> Tokens<'i, F>
|
|
{
|
|
pub fn new_filter_map(input: &'i str, functor: F) -> Self
|
|
{
|
|
Self
|
|
{
|
|
original_input: input,
|
|
input,
|
|
previous_index: 0,
|
|
reached_end_of_stream: false,
|
|
functor,
|
|
}
|
|
}
|
|
|
|
fn next_token(&mut self) -> Option<Result<(usize, usize, Token<'i>), crate::parse::Error>>
|
|
{
|
|
self.input = trim_start(self.input);
|
|
let index_left = substring_offset(self.input, self.original_input);
|
|
|
|
let first_character = match self.input.chars().next()
|
|
{
|
|
None => return None,
|
|
Some(first_character) => first_character,
|
|
};
|
|
|
|
if self.input.starts_with(")")
|
|
{
|
|
return Some(Err(crate::parse::Error::new_unmatched_parenthesis(
|
|
crate::parse::error::Location::new(0, Some(1)))));
|
|
}
|
|
|
|
match parenthesized_expression(self.input)
|
|
{
|
|
Ok(Some((parenthesized_expression, remaining_input))) =>
|
|
{
|
|
self.input = remaining_input;
|
|
let index_right = substring_offset(self.input, self.original_input);
|
|
|
|
return Some(Ok((index_left, index_right,
|
|
Token::ParenthesizedExpression(parenthesized_expression))));
|
|
},
|
|
Ok(None) => (),
|
|
Err(error) => return Some(Err(error)),
|
|
}
|
|
|
|
match number(self.input)
|
|
{
|
|
Ok(Some((number, remaining_input))) =>
|
|
{
|
|
self.input = remaining_input;
|
|
let index_right = substring_offset(self.input, self.original_input);
|
|
|
|
return Some(Ok((index_left, index_right, Token::Number(number))));
|
|
},
|
|
Ok(None) => (),
|
|
Err(error) => return Some(Err(error)),
|
|
}
|
|
|
|
if let Some((identifier, remaining_input)) = identifier(self.input)
|
|
{
|
|
self.input = remaining_input;
|
|
let index_right = substring_offset(self.input, self.original_input);
|
|
|
|
return Some(Ok((index_left, index_right, Token::Identifier(identifier))));
|
|
}
|
|
|
|
if let Some((symbol, remaining_input)) = symbol(self.input)
|
|
{
|
|
self.input = remaining_input;
|
|
let index_right = substring_offset(self.input, self.original_input);
|
|
|
|
return Some(Ok((index_left, index_right, Token::Symbol(symbol))));
|
|
}
|
|
|
|
return Some(Err(crate::parse::Error::new_character_not_allowed(first_character,
|
|
crate::parse::error::Location::new(0, Some(0)))));
|
|
}
|
|
|
|
pub fn remaining_input(&mut self) -> Option<&'i str>
|
|
{
|
|
if self.reached_end_of_stream
|
|
{
|
|
return None;
|
|
}
|
|
|
|
let remaining_input = self.original_input[self.previous_index..].trim();
|
|
self.reached_end_of_stream = true;
|
|
|
|
Some(remaining_input)
|
|
}
|
|
|
|
pub fn split(self) -> TokenSplit<Self>
|
|
{
|
|
TokenSplit::new(self)
|
|
}
|
|
}
|
|
|
|
impl<'i, F, G> std::iter::Iterator for Tokens<'i, F>
|
|
where
|
|
F: FnMut(Token<'i>) -> Option<G>,
|
|
{
|
|
type Item = Result<(&'i str, G), crate::parse::Error>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item>
|
|
{
|
|
if self.previous_index == self.original_input.len()
|
|
{
|
|
return None;
|
|
}
|
|
|
|
loop
|
|
{
|
|
match self.next_token()
|
|
{
|
|
Some(Ok((index_left, index_right, token))) =>
|
|
{
|
|
let token = match (self.functor)(token)
|
|
{
|
|
None => continue,
|
|
Some(token) => token,
|
|
};
|
|
|
|
let input_left = self.original_input[self.previous_index..index_left].trim();
|
|
|
|
self.previous_index = index_right;
|
|
|
|
return Some(Ok((input_left, token)));
|
|
},
|
|
Some(Err(error)) => return Some(Err(error)),
|
|
None => return None,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) struct TokenSplit<T>
|
|
{
|
|
tokens: T,
|
|
}
|
|
|
|
impl TokenSplit<()>
|
|
{
|
|
pub fn new<T>(tokens: T) -> TokenSplit<T>
|
|
{
|
|
TokenSplit
|
|
{
|
|
tokens,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'i, F, G> std::iter::Iterator for TokenSplit<Tokens<'i, F>>
|
|
where
|
|
F: FnMut(Token<'i>) -> Option<G>,
|
|
{
|
|
type Item = Result<&'i str, crate::parse::Error>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item>
|
|
{
|
|
match self.tokens.next()
|
|
{
|
|
Some(Ok((input_before, _))) => Some(Ok(input_before)),
|
|
Some(Err(error)) => Some(Err(error)),
|
|
None => match self.tokens.remaining_input()
|
|
{
|
|
Some(remaining_input) => Some(Ok(remaining_input)),
|
|
None => None,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests
|
|
{
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn tokenize_identifier()
|
|
{
|
|
assert_eq!(identifier("test").unwrap(), ("test", ""));
|
|
assert_eq!(identifier("test2").unwrap(), ("test2", ""));
|
|
assert_eq!(identifier("Test").unwrap(), ("Test", ""));
|
|
assert_eq!(identifier("Test2").unwrap(), ("Test2", ""));
|
|
assert_eq!(identifier("_test").unwrap(), ("_test", ""));
|
|
assert_eq!(identifier("_test2").unwrap(), ("_test2", ""));
|
|
assert_eq!(identifier("__test").unwrap(), ("__test", ""));
|
|
assert_eq!(identifier("__test2").unwrap(), ("__test2", ""));
|
|
assert_eq!(identifier("test, test").unwrap(), ("test", ", test"));
|
|
assert_eq!(identifier("test2, test").unwrap(), ("test2", ", test"));
|
|
assert_eq!(identifier("Test, Test").unwrap(), ("Test", ", Test"));
|
|
assert_eq!(identifier("Test2, Test").unwrap(), ("Test2", ", Test"));
|
|
assert_eq!(identifier("_test, _test").unwrap(), ("_test", ", _test"));
|
|
assert_eq!(identifier("_test2, _test").unwrap(), ("_test2", ", _test"));
|
|
assert_eq!(identifier("__test, __test").unwrap(), ("__test", ", __test"));
|
|
assert_eq!(identifier("__test2, __test").unwrap(), ("__test2", ", __test"));
|
|
assert!(identifier("2test, test").is_none());
|
|
assert!(identifier("#test, test").is_none());
|
|
assert!(identifier("$test, test").is_none());
|
|
assert!(identifier(",test, test").is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn tokenize_primitives()
|
|
{
|
|
assert_eq!(parenthesized_expression("(foo bar baz) test").unwrap(),
|
|
Some(("foo bar baz", " test")));
|
|
assert!(parenthesized_expression("( | asd#0231(asd|asd) test").is_err());
|
|
assert_eq!(parenthesized_expression("( | asd#0231(asd|asd) ) test").unwrap(),
|
|
Some((" | asd#0231(asd|asd) ", " test")));
|
|
assert_eq!(parenthesized_expression("( | a)sd#0231(asd|asd) test").unwrap(),
|
|
Some((" | a", "sd#0231(asd|asd) test")));
|
|
|
|
assert_eq!(number("1234, ").unwrap(), Some((1234, ", ")));
|
|
assert_eq!(number("1234.5, ").unwrap(), Some((1234, ".5, ")));
|
|
assert_eq!(number("-1234, ").unwrap(), None);
|
|
assert_eq!(number("a1234, ").unwrap(), None);
|
|
|
|
assert_eq!(symbol("<-"), Some((Symbol::ArrowLeft, "")));
|
|
assert_eq!(symbol("<->"), Some((Symbol::ArrowLeftAndRight, "")));
|
|
assert_eq!(symbol("->"), Some((Symbol::ArrowRight, "")));
|
|
assert_eq!(symbol(","), Some((Symbol::Comma, "")));
|
|
assert_eq!(symbol("/"), Some((Symbol::Division, "")));
|
|
assert_eq!(symbol("="), Some((Symbol::Equal, "")));
|
|
assert_eq!(symbol("**"), Some((Symbol::Exponentiation, "")));
|
|
assert_eq!(symbol(">"), Some((Symbol::Greater, "")));
|
|
assert_eq!(symbol(">="), Some((Symbol::GreaterOrEqual, "")));
|
|
assert_eq!(symbol("<"), Some((Symbol::Less, "")));
|
|
assert_eq!(symbol("<="), Some((Symbol::LessOrEqual, "")));
|
|
assert_eq!(symbol("-"), Some((Symbol::Minus, "")));
|
|
assert_eq!(symbol("*"), Some((Symbol::Multiplication, "")));
|
|
assert_eq!(symbol("!="), Some((Symbol::NotEqual, "")));
|
|
assert_eq!(symbol("+"), Some((Symbol::Plus, "")));
|
|
assert_eq!(symbol("|"), Some((Symbol::VerticalBar, "")));
|
|
|
|
assert_eq!(symbol("<-a"), Some((Symbol::ArrowLeft, "a")));
|
|
assert_eq!(symbol("<->a"), Some((Symbol::ArrowLeftAndRight, "a")));
|
|
assert_eq!(symbol("->a"), Some((Symbol::ArrowRight, "a")));
|
|
assert_eq!(symbol(",a"), Some((Symbol::Comma, "a")));
|
|
assert_eq!(symbol("/a"), Some((Symbol::Division, "a")));
|
|
assert_eq!(symbol("=a"), Some((Symbol::Equal, "a")));
|
|
assert_eq!(symbol("**a"), Some((Symbol::Exponentiation, "a")));
|
|
assert_eq!(symbol(">a"), Some((Symbol::Greater, "a")));
|
|
assert_eq!(symbol(">=a"), Some((Symbol::GreaterOrEqual, "a")));
|
|
assert_eq!(symbol("<a"), Some((Symbol::Less, "a")));
|
|
assert_eq!(symbol("<=a"), Some((Symbol::LessOrEqual, "a")));
|
|
assert_eq!(symbol("-a"), Some((Symbol::Minus, "a")));
|
|
assert_eq!(symbol("*a"), Some((Symbol::Multiplication, "a")));
|
|
assert_eq!(symbol("!=a"), Some((Symbol::NotEqual, "a")));
|
|
assert_eq!(symbol("+a"), Some((Symbol::Plus, "a")));
|
|
assert_eq!(symbol("|a"), Some((Symbol::VerticalBar, "a")));
|
|
}
|
|
}
|