Finish dirty first pass over parser

This commit is contained in:
Patrick Lühne 2020-04-28 05:21:58 +02:00
parent 66ac57c5b8
commit 8d474fa489
Signed by: patrick
GPG Key ID: 05F3611E97A70ABF
4 changed files with 417 additions and 219 deletions

View File

@ -28,6 +28,7 @@ pub enum Kind
UnexpectedToken,
EmptyExpression,
ExpectedLogicalConnectiveArgument(String),
ExpectedTerm,
MultipleComparisonOperators(crate::ComparisonOperator, crate::ComparisonOperator),
}
@ -101,6 +102,11 @@ impl Error
Self::new(Kind::ExpectedLogicalConnectiveArgument(logical_connective_name), location)
}
pub(crate) fn new_expected_term(location: Location) -> Self
{
Self::new(Kind::ExpectedTerm, location)
}
pub(crate) fn new_multiple_comparison_operators(
comparison_operator_1: crate::ComparisonOperator,
comparison_operator_2: crate::ComparisonOperator, location: Location)
@ -134,12 +140,13 @@ impl std::fmt::Debug for Error
Kind::MixedImplicationDirections(_location_2) =>
write!(formatter, "-> and <- implications may not be mixed within the same scope")?,
Kind::ExpectedVariableDeclaration =>
write!(formatter, "expected variable declaration")?,
write!(formatter, "expected a variable declaration")?,
Kind::UnexpectedToken => write!(formatter, "unexpected token")?,
Kind::EmptyExpression => write!(formatter, "empty expression")?,
Kind::ExpectedLogicalConnectiveArgument(ref logical_connective_name) =>
write!(formatter, "this “{}” logical connective is missing an argument",
logical_connective_name)?,
Kind::ExpectedTerm => write!(formatter, "expected a term")?,
Kind::MultipleComparisonOperators(comparison_operator_1, comparison_operator_2) =>
write!(formatter, "chained comparisons arent supported (found “{:?}” and “{:?}” in the same formula), consider separating them with “and”",
comparison_operator_1, comparison_operator_2)?,

View File

@ -68,28 +68,15 @@ impl<'i> FormulaStr<'i>
}
}
fn tokens(&self) -> Tokens<'i, impl FnMut(Token<'i>) -> Option<Token<'i>>>
{
Tokens::new_iter(self.input)
}
fn logical_connectives(&self) -> Tokens<'i, impl FnMut(Token<'i>) -> Option<LogicalConnective>>
{
let functor = |token| match token
{
Token::Identifier(ref identifier) => match *identifier
{
"and" => Some(LogicalConnective::And),
"or" => Some(LogicalConnective::Or),
_ => None,
},
Token::Symbol(ref symbol) => match symbol
{
Symbol::ArrowLeft => Some(LogicalConnective::ImpliesRightToLeft),
Symbol::ArrowLeftAndRight => Some(LogicalConnective::IfAndOnlyIf),
Symbol::ArrowRight => Some(LogicalConnective::ImpliesLeftToRight),
_ => None,
},
Token::Identifier("and") => Some(LogicalConnective::And),
Token::Identifier("or") => Some(LogicalConnective::Or),
Token::Symbol(Symbol::ArrowLeft) => Some(LogicalConnective::ImpliesRightToLeft),
Token::Symbol(Symbol::ArrowLeftAndRight) => Some(LogicalConnective::IfAndOnlyIf),
Token::Symbol(Symbol::ArrowRight) => Some(LogicalConnective::ImpliesLeftToRight),
_ => None,
};
@ -101,19 +88,14 @@ impl<'i> FormulaStr<'i>
{
let predicate = move |token: &_| match token
{
Token::Identifier(ref identifier) => match *identifier
{
"and" => logical_connective == LogicalConnective::And,
"or" => logical_connective == LogicalConnective::Or,
_ => false,
},
Token::Symbol(ref symbol) => match symbol
{
Symbol::ArrowLeft => logical_connective == LogicalConnective::ImpliesRightToLeft,
Symbol::ArrowLeftAndRight => logical_connective == LogicalConnective::IfAndOnlyIf,
Symbol::ArrowRight => logical_connective == LogicalConnective::ImpliesLeftToRight,
_ => false,
},
Token::Identifier("and") => logical_connective == LogicalConnective::And,
Token::Identifier("or") => logical_connective == LogicalConnective::Or,
Token::Symbol(Symbol::ArrowLeft) =>
logical_connective == LogicalConnective::ImpliesRightToLeft,
Token::Symbol(Symbol::ArrowLeftAndRight) =>
logical_connective == LogicalConnective::IfAndOnlyIf,
Token::Symbol(Symbol::ArrowRight) =>
logical_connective == LogicalConnective::ImpliesLeftToRight,
_ => false,
};
@ -247,14 +229,38 @@ impl<'i> FormulaStr<'i>
// Parse quantified formulas
if let Some((identifier, input)) = identifier(input)
{
if identifier == "not"
match identifier
{
let input = input.trim_start();
println!("{} parsing “not” formula body: {}", indentation, input);
"not" =>
{
let input = input.trim_start();
println!("{} parsing “not” formula body: {}", indentation, input);
let argument = FormulaStr::new(input).parse(level + 1)?;
let argument = FormulaStr::new(input).parse(level + 1)?;
return Ok(crate::Formula::not(Box::new(argument)));
return Ok(crate::Formula::not(Box::new(argument)));
},
"true" =>
{
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
return Ok(crate::Formula::true_());
},
"false" =>
{
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
return Ok(crate::Formula::false_());
},
_ => (),
}
let quantifier = match identifier
@ -340,25 +346,19 @@ impl<'i> FormulaStr<'i>
crate::PredicateDeclaration::new(predicate_name.to_string(), arguments.len());
let declaration = std::rc::Rc::new(declaration);
// TODO: handle unexpected input after end of parenthesized expression
return Ok(crate::Formula::predicate(declaration, arguments));
}
// Parse parenthesized formulas
match parenthesized_expression(input)?
if let Some((parenthesized_expression, input)) = parenthesized_expression(input)?
{
Some((parenthesized_expression, input)) =>
if !input.trim().is_empty()
{
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))));
}
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))));
}
return FormulaStr::new(parenthesized_expression).parse(level);
},
None => (),
return FormulaStr::new(parenthesized_expression).parse(level + 1);
}
Err(crate::parse::Error::new_unexpected_token(

View File

@ -49,6 +49,45 @@ fn variable_name(input: &str) -> Option<(&str, &str)>
None
}
pub fn is_function_name(identifier: &str) -> bool
{
if is_keyword(identifier)
{
return false;
}
let mut characters = identifier.chars();
while let Some(character) = characters.next()
{
match character
{
'_' => continue,
_ if character.is_ascii_lowercase() => return true,
_ => return false,
}
}
false
}
fn is_variable_name(identifier: &str) -> bool
{
let mut characters = identifier.chars();
while let Some(character) = characters.next()
{
match character
{
'_' => continue,
_ if character.is_ascii_uppercase() => return true,
_ => return false,
}
}
false
}
pub(crate) fn variable_declaration(input: &str) -> Option<(crate::VariableDeclaration, &str)>
{
variable_name(input)
@ -96,45 +135,35 @@ pub(crate) fn variable_declarations(input: &str)
}
#[derive(Clone, Copy, Eq, PartialEq)]
pub(crate) enum TermInfixOperator
pub(crate) enum ArithmeticOperatorClass
{
Add,
Divide,
Exponentiate,
Modulo,
Multiply,
Subtract,
Exponential,
Multiplicative,
Additive,
}
impl TermInfixOperator
impl ArithmeticOperatorClass
{
fn level(&self) -> usize
{
match self
{
Self::Exponentiate => 1,
Self::Multiply
| Self::Divide
| Self::Modulo => 2,
Self::Add
| Self::Subtract => 3,
Self::Exponential => 1,
Self::Multiplicative => 2,
Self::Additive => 3,
}
}
}
impl std::fmt::Debug for TermInfixOperator
impl std::fmt::Debug for ArithmeticOperatorClass
{
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result
{
match &self
{
Self::Add => write!(formatter, "+"),
Self::Divide => write!(formatter, "/"),
Self::Exponentiate => write!(formatter, "**"),
// TODO: conflicts with single-line comments
Self::Modulo => write!(formatter, "%"),
Self::Multiply => write!(formatter, "*"),
Self::Subtract => write!(formatter, "-"),
Self::Exponential => write!(formatter, "exponential"),
Self::Multiplicative => write!(formatter, "multiplicative"),
Self::Additive => write!(formatter, "additive"),
}
}
}
@ -154,15 +183,118 @@ impl<'i> TermStr<'i>
}
}
pub fn iter_infix_operators(&self) -> TermInfixOperatorIterator<'i>
fn arithmetic_operator_classes(&self) -> Tokens<'i, impl FnMut(Token<'i>)
-> Option<ArithmeticOperatorClass>>
{
TermInfixOperatorIterator::new(self.input)
let functor = |token| match token
{
Token::Symbol(Symbol::Exponentiation) => Some(ArithmeticOperatorClass::Exponential),
Token::Symbol(Symbol::Multiplication) => Some(ArithmeticOperatorClass::Multiplicative),
Token::Symbol(Symbol::Division) => Some(ArithmeticOperatorClass::Multiplicative),
Token::Symbol(Symbol::Percent) => Some(ArithmeticOperatorClass::Multiplicative),
Token::Symbol(Symbol::Plus) => Some(ArithmeticOperatorClass::Additive),
Token::Symbol(Symbol::Minus) => Some(ArithmeticOperatorClass::Additive),
_ => None,
};
// TODO: refactor so that self.input is always set correctly
Tokens::new_filter_map(self.input, functor)
}
pub fn split_at_infix_operator(&self, infix_operator: TermInfixOperator)
-> SplitTermAtInfixOperator<'i>
fn filter_by_arithmetic_operator_class(&self,
arithmetic_operator_class: ArithmeticOperatorClass)
-> Tokens<'i, impl FnMut(Token<'i>) -> Option<crate::BinaryOperator>>
{
SplitTermAtInfixOperator::new(self, infix_operator)
let functor = move |token| match token
{
Token::Symbol(Symbol::Exponentiation) =>
if arithmetic_operator_class == ArithmeticOperatorClass::Exponential
{
Some(crate::BinaryOperator::Exponentiate)
}
else
{
None
},
Token::Symbol(Symbol::Multiplication) =>
if arithmetic_operator_class == ArithmeticOperatorClass::Multiplicative
{
Some(crate::BinaryOperator::Multiply)
}
else
{
None
},
Token::Symbol(Symbol::Division) =>
if arithmetic_operator_class == ArithmeticOperatorClass::Multiplicative
{
Some(crate::BinaryOperator::Divide)
}
else
{
None
},
Token::Symbol(Symbol::Percent) =>
if arithmetic_operator_class == ArithmeticOperatorClass::Multiplicative
{
Some(crate::BinaryOperator::Modulo)
}
else
{
None
},
Token::Symbol(Symbol::Plus) =>
if arithmetic_operator_class == ArithmeticOperatorClass::Additive
{
Some(crate::BinaryOperator::Add)
}
else
{
None
},
Token::Symbol(Symbol::Minus) =>
if arithmetic_operator_class == ArithmeticOperatorClass::Additive
{
Some(crate::BinaryOperator::Subtract)
}
else
{
None
},
_ => None,
};
Tokens::new_filter_map(self.input, functor)
}
pub fn top_level_arithmetic_operator_class(&self)
-> Result<Option<ArithmeticOperatorClass>, crate::parse::Error>
{
let mut top_level_arithmetic_operator_class = None;
for arithmetic_operator_class in self.arithmetic_operator_classes()
{
let (_, arithmetic_operator_class) = arithmetic_operator_class?;
top_level_arithmetic_operator_class = match top_level_arithmetic_operator_class
{
None => Some(arithmetic_operator_class),
Some(top_level_arithmetic_operator_class) =>
{
if arithmetic_operator_class.level()
> top_level_arithmetic_operator_class.level()
{
Some(arithmetic_operator_class)
}
else
{
Some(top_level_arithmetic_operator_class)
}
},
}
}
Ok(top_level_arithmetic_operator_class)
}
pub fn parse(&self, level: usize) -> Result<crate::Term, crate::parse::Error>
@ -172,181 +304,235 @@ impl<'i> TermStr<'i>
let input = self.input.trim_start();
// TODO: implement
Ok(crate::Term::true_())
}
}
pub(crate) struct TermInfixOperatorIterator<'i>
{
original_input: &'i str,
input: &'i str,
}
impl<'i> TermInfixOperatorIterator<'i>
{
pub fn new(input: &'i str) -> Self
{
Self
match input.chars().next()
{
original_input: input,
input,
Some(')') => return Err(crate::parse::Error::new_unmatched_parenthesis(
crate::parse::error::Location::new(0, Some(0)))),
// TODO: implement absolute value function
Some('|') => unimplemented!(),
None => return Err(crate::parse::Error::new_empty_expression(
crate::parse::error::Location::new(0, Some(0)))),
_ => (),
}
}
}
impl<'i> std::iter::Iterator for TermInfixOperatorIterator<'i>
{
type Item = Result<(&'i str, &'i str, TermInfixOperator), crate::parse::Error>;
fn next(&mut self) -> Option<Self::Item>
{
loop
// Parse arithmetic infix operations
if let Some(top_level_arithmetic_operator_class) =
self.top_level_arithmetic_operator_class()?
{
self.input = self.input.trim_start();
println!("{} parsing {:?} arithmetic term", indentation,
top_level_arithmetic_operator_class);
let first_character = match self.input.chars().next()
if top_level_arithmetic_operator_class == ArithmeticOperatorClass::Exponential
{
None => return None,
Some(first_character) => first_character,
};
// TODO: implement
if self.input.starts_with("|")
{
unimplemented!();
return exponentiate(
self.filter_by_arithmetic_operator_class(top_level_arithmetic_operator_class)
.split(), level + 1);
}
if self.input.starts_with(")")
// Parse arguments of arithmetic infix operations
let mut argument_iterator =
self.filter_by_arithmetic_operator_class(top_level_arithmetic_operator_class);
let (first_argument, first_binary_operator) = argument_iterator.next().ok_or_else(||
crate::parse::Error::new_expected_term(
crate::parse::error::Location::new(0, Some(0))))??;
let first_argument = TermStr::new(first_argument).parse(level + 1)?;
// TODO: improve error handling if the terms between the operators are invalid
let (accumulator, last_binary_operator) =
argument_iterator.try_fold((first_argument, first_binary_operator),
|(accumulator, binary_operator), argument|
{
let (argument, next_binary_operator) = argument?;
let argument = TermStr::new(argument).parse(level + 1)?;
let binary_operation =
crate::BinaryOperation::new(binary_operator, Box::new(accumulator),
Box::new(argument));
let formula = crate::Term::BinaryOperation(binary_operation);
Ok((formula, next_binary_operator))
})?;
// The last item hasnt been consumed yet, so its safe to unwrap it
let last_argument = argument_iterator.remaining_input().unwrap();
let last_argument = TermStr::new(last_argument).parse(level + 1)?;
let last_binary_operation =
crate::BinaryOperation::new(last_binary_operator, Box::new(accumulator),
Box::new(last_argument));
return Ok(crate::Term::BinaryOperation(last_binary_operation));
}
if let Some((number, input)) = number(input)?
{
if !input.trim().is_empty()
{
return Some(Err(crate::parse::Error::new_unmatched_parenthesis(
crate::parse::error::Location::new(0, Some(1)))));
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))));
}
match parenthesized_expression(self.input)
return Ok(crate::Term::integer(number as i32));
}
if let Some((identifier, input)) = identifier(input)
{
match identifier
{
Ok(Some((_, remaining_input))) =>
"inf" =>
{
self.input = remaining_input;
continue;
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
return Ok(crate::Term::infimum());
},
Ok(None) => (),
Err(error) => return Some(Err(error)),
}
match number(self.input)
{
Ok(Some((_, remaining_input))) =>
"sup" =>
{
self.input = remaining_input;
continue;
}
Ok(None) => (),
Err(error) => return Some(Err(error)),
}
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
let index_left = self.input.as_ptr() as usize - self.original_input.as_ptr() as usize;
let input_left = self.original_input.split_at(index_left).0.trim_end();
if let Some((_, remaining_input)) = identifier(self.input)
{
self.input = remaining_input;
continue;
}
if let Some((symbol, remaining_input)) = symbol(self.input)
{
self.input = remaining_input;
match symbol
return Ok(crate::Term::supremum());
},
"true" =>
{
Symbol::Division => return Some(Ok((input_left, remaining_input,
TermInfixOperator::Divide))),
Symbol::Exponentiation => return Some(Ok((input_left, remaining_input,
TermInfixOperator::Exponentiate))),
Symbol::Minus => return Some(Ok((input_left, remaining_input,
TermInfixOperator::Subtract))),
Symbol::Multiplication => return Some(Ok((input_left, remaining_input,
TermInfixOperator::Multiply))),
Symbol::Percent => return Some(Ok((input_left, remaining_input,
TermInfixOperator::Modulo))),
Symbol::Plus => return Some(Ok((input_left, remaining_input,
TermInfixOperator::Add))),
_ => continue,
}
}
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
return Some(Err(crate::parse::Error::new_character_not_allowed(first_character,
crate::parse::error::Location::new(0, Some(0)))));
return Ok(crate::Term::true_());
},
"false" =>
{
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
return Ok(crate::Term::false_());
},
_ if is_variable_name(identifier) =>
{
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
// TODO: implement look-up
let declaration = crate::VariableDeclaration::new(identifier.to_string());
let declaration = std::rc::Rc::new(declaration);
return Ok(crate::Term::variable(declaration));
},
_ if is_function_name(identifier) =>
{
let function_name = identifier;
println!("{} parsing function {}", indentation, function_name);
let input = input.trim_start();
// Parse arguments if there are any
let (arguments, input) = match parenthesized_expression(input)?
{
Some((parenthesized_expression, input)) =>
{
let functor = |token: &_| *token == Token::Symbol(Symbol::Comma);
let arguments = Tokens::new_filter(parenthesized_expression, functor).split()
.map(|argument| TermStr::new(argument?).parse(level + 1))
.collect::<Result<_, _>>()?;
(arguments, input)
}
None => (vec![], input),
};
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
// TODO: implement look-up
let declaration =
crate::FunctionDeclaration::new(function_name.to_string(), arguments.len());
let declaration = std::rc::Rc::new(declaration);
return Ok(crate::Term::function(declaration, arguments));
},
_ => (),
}
}
// TODO: parse negative value
// Parse parenthesized terms
if let Some((parenthesized_expression, input)) = parenthesized_expression(input)?
{
if !input.trim().is_empty()
{
return Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))));
}
return TermStr::new(parenthesized_expression).parse(level + 1);
}
Err(crate::parse::Error::new_unexpected_token(
crate::parse::error::Location::new(0, Some(0))))
}
}
pub(crate) struct SplitTermAtInfixOperator<'i>
// TODO: refactor
fn exponentiate_inner<'i, T>(mut argument_iterator: T, level: usize)
-> Result<Option<crate::Term>, crate::parse::Error>
where
T: std::iter::Iterator<Item = Result<&'i str, crate::parse::Error>>
{
infix_operator_iterator: TermInfixOperatorIterator<'i>,
infix_operator: TermInfixOperator,
previous_index: usize,
}
impl<'i> SplitTermAtInfixOperator<'i>
{
pub fn new(input: &TermStr<'i>, infix_operator: TermInfixOperator)
-> Self
match argument_iterator.next()
{
Self
Some(argument) =>
{
infix_operator_iterator: input.iter_infix_operators(),
infix_operator,
previous_index: 0,
}
// TODO: improve error handling if antecedent cannot be parsed
let argument = TermStr::new(argument?).parse(level)?;
match exponentiate_inner(argument_iterator, level)?
{
Some(next_argument) => Ok(Some(crate::Term::exponentiate(Box::new(argument),
Box::new(next_argument)))),
None => Ok(Some(argument)),
}
},
None => Ok(None),
}
}
impl<'i> std::iter::Iterator for SplitTermAtInfixOperator<'i>
fn exponentiate<'i, T>(mut argument_iterator: T, level: usize)
-> Result<crate::Term, crate::parse::Error>
where
T: std::iter::Iterator<Item = Result<&'i str, crate::parse::Error>>
{
type Item = Result<&'i str, crate::parse::Error>;
fn next(&mut self) -> Option<Self::Item>
match argument_iterator.next()
{
loop
Some(argument) =>
{
let (input_left, input_right, infix_operator) =
match self.infix_operator_iterator.next()
// TODO: improve error handling if antecedent cannot be parsed
let argument = TermStr::new(argument?).parse(level)?;
match exponentiate_inner(argument_iterator, level)?
{
Some(Err(error)) => return Some(Err(error)),
Some(Ok(infix_operator_iterator_next)) => infix_operator_iterator_next,
None => break,
};
if infix_operator == self.infix_operator
{
// TODO: refactor
let index = input_left.as_ptr() as usize
+ input_left.len()
- self.infix_operator_iterator.original_input.as_ptr() as usize;
let split_input = &self.infix_operator_iterator
.original_input[self.previous_index..index].trim();
self.previous_index = input_right.as_ptr() as usize
- self.infix_operator_iterator.original_input.as_ptr() as usize;
return Some(Ok(split_input));
Some(next_argument) =>
Ok(crate::Term::exponentiate(Box::new(argument), Box::new(next_argument))),
None => Err(crate::parse::Error::new_expected_term(
crate::parse::error::Location::new(0, Some(0)))),
}
}
let remaining_input = self.infix_operator_iterator
.original_input[self.previous_index..].trim();
if remaining_input.is_empty()
{
None
}
else
{
self.previous_index = self.infix_operator_iterator.original_input.len();
Some(Ok(remaining_input))
}
},
None => Err(crate::parse::Error::new_expected_term(
crate::parse::error::Location::new(0, Some(0)))),
}
}

View File

@ -10,8 +10,10 @@ pub(crate) enum Keyword
Exists,
False,
ForAll,
Infimum,
Not,
Or,
Supremum,
True,
}
@ -25,8 +27,10 @@ impl std::fmt::Debug for Keyword
Self::Exists => write!(formatter, "exists"),
Self::False => write!(formatter, "false"),
Self::ForAll => write!(formatter, "forall"),
Self::Infimum => write!(formatter, "inf"),
Self::Not => write!(formatter, "not"),
Self::Or => write!(formatter, "or"),
Self::Supremum => write!(formatter, "sup"),
Self::True => write!(formatter, "true"),
}
}
@ -148,8 +152,10 @@ pub(crate) fn is_keyword(identifier: &str) -> bool
| "exists"
| "false"
| "forall"
| "inf"
| "not"
| "or"
| "sup"
| "true" => true,
_ => false,
}
@ -467,7 +473,6 @@ where
};
let input_left = self.original_input[self.previous_index..index_left].trim();
assert!(!input_left.is_empty());
self.previous_index = index_right;