diff --git a/src/format/formulas.rs b/src/format/formulas.rs index a1f6161..25dd927 100644 --- a/src/format/formulas.rs +++ b/src/format/formulas.rs @@ -1,5 +1,23 @@ use super::terms::*; +impl std::fmt::Debug for crate::ComparisonOperator +{ + fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result + { + let operator_symbol = match self + { + Self::Less => "<", + Self::LessOrEqual => "<=", + Self::Greater => ">", + Self::GreaterOrEqual => ">=", + Self::Equal => "=", + Self::NotEqual => "!=", + }; + + write!(formatter, "{}", operator_symbol) + } +} + impl std::fmt::Debug for crate::ImplicationDirection { fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result diff --git a/src/parse.rs b/src/parse.rs index 99c7584..2efe9e5 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -13,6 +13,8 @@ pub use formulas::formula;*/ pub mod error; pub mod formulas; +pub mod terms; pub mod tokens; pub use error::Error; +pub use formulas::formula; diff --git a/src/parse/error.rs b/src/parse/error.rs index fbf13e4..8547521 100644 --- a/src/parse/error.rs +++ b/src/parse/error.rs @@ -24,6 +24,12 @@ pub enum Kind CharacterNotAllowed(char), ParseNumber(String), MixedImplicationDirections(Location), + ExpectedVariableDeclaration, + UnexpectedToken, + EmptyInput, + ExpectedLogicalConnectiveArgument(String), + ExpectedComparisonArgument(String), + MultipleComparisonOperators(crate::ComparisonOperator, crate::ComparisonOperator), } pub struct Error @@ -73,6 +79,43 @@ impl Error { Self::new(Kind::MixedImplicationDirections(location_2), location_1) } + + pub(crate) fn new_expected_variable_declaration(location: Location) -> Self + { + Self::new(Kind::ExpectedVariableDeclaration, location) + } + + pub(crate) fn new_unexpected_token(location: Location) -> Self + { + Self::new(Kind::UnexpectedToken, location) + } + + pub(crate) fn new_empty_input(location: Location) -> Self + { + Self::new(Kind::EmptyInput, location) + } + + pub(crate) fn new_expected_logical_connective_argument(logical_connective_name: String, + location: Location) + -> Self + { + Self::new(Kind::ExpectedLogicalConnectiveArgument(logical_connective_name), location) + } + + pub(crate) fn new_comparison_argument(comparison_operator_name: String, location: Location) + -> Self + { + Self::new(Kind::ExpectedComparisonArgument(comparison_operator_name), location) + } + + pub(crate) fn new_multiple_comparison_operators( + comparison_operator_1: crate::ComparisonOperator, + comparison_operator_2: crate::ComparisonOperator, location: Location) + -> Self + { + Self::new(Kind::MultipleComparisonOperators(comparison_operator_1, comparison_operator_2), + location) + } } impl std::fmt::Debug for Error @@ -97,6 +140,19 @@ impl std::fmt::Debug for Error // TODO: print second location properly Kind::MixedImplicationDirections(_location_2) => write!(formatter, "-> and <- implications may not be mixed within the same scope")?, + Kind::ExpectedVariableDeclaration => + write!(formatter, "expected variable declaration")?, + Kind::UnexpectedToken => write!(formatter, "unexpected token")?, + Kind::EmptyInput => write!(formatter, "empty input")?, + Kind::ExpectedLogicalConnectiveArgument(ref logical_connective_name) => + write!(formatter, "this “{}” logical connective is missing an argument", + logical_connective_name)?, + Kind::ExpectedComparisonArgument(ref comparison_operator_name) => + write!(formatter, "this “{}” comparison is missing an argument", + comparison_operator_name)?, + Kind::MultipleComparisonOperators(comparison_operator_1, comparison_operator_2) => + write!(formatter, "chained comparisons aren’t supported (found “{:?}” and “{:?}” in the same formula), consider separating them with “and”", + comparison_operator_1, comparison_operator_2)?, } if let Some(source) = &self.source diff --git a/src/parse/formulas.rs b/src/parse/formulas.rs index e0acb04..0817f9e 100644 --- a/src/parse/formulas.rs +++ b/src/parse/formulas.rs @@ -1,6 +1,7 @@ +use super::terms::*; use super::tokens::*; -pub fn parse_formula(input: &str) -> Result +pub fn formula(input: &str) -> Result { let formula_str = FormulaStr::new(input); formula_str.parse(0)?; @@ -9,8 +10,13 @@ pub fn parse_formula(input: &str) -> Result Ok(crate::Formula::true_()) } +pub(crate) fn predicate_name(identifier: &str) -> Option<(&str, &str)> +{ + function_name(identifier) +} + #[derive(Clone, Copy, Eq, PartialEq)] -enum FormulaInfixOperator +enum LogicalConnective { And, IfAndOnlyIf, @@ -19,7 +25,8 @@ enum FormulaInfixOperator Or, } -impl FormulaInfixOperator +// TODO: rename to logic infix connective +impl LogicalConnective { fn level(&self) -> usize { @@ -34,7 +41,7 @@ impl FormulaInfixOperator } } -impl std::fmt::Debug for FormulaInfixOperator +impl std::fmt::Debug for LogicalConnective { fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { @@ -64,102 +71,389 @@ impl<'i> FormulaStr<'i> } } - pub fn top_level_infix_operator(&self) - -> Result, crate::parse::Error> + fn iter_tokens(&self) -> TokenIterator<'i> { - let mut top_level_infix_operator = None; + TokenIterator::new(self.input) + } - for infix_operator in self.iter_infix_operators() + fn filter_logical_connective(&self, logical_connective: LogicalConnective) + -> std::iter::Filter, impl FnMut(&Result<(usize, usize, Token<'i>), crate::parse::Error>) -> bool> + { + let token_selector = move |token: &_| match token { - let (_, _, infix_operator) = infix_operator?; - - top_level_infix_operator = match top_level_infix_operator + Ok((_, _, Token::Identifier(ref identifier))) => match *identifier { - None => Some(infix_operator), - Some(top_level_infix_operator) => + "and" => logical_connective == LogicalConnective::And, + "or" => logical_connective == LogicalConnective::Or, + _ => false, + }, + Ok((_, _, Token::Symbol(ref symbol))) => match symbol + { + Symbol::ArrowLeft => logical_connective == LogicalConnective::ImpliesRightToLeft, + Symbol::ArrowLeftAndRight => logical_connective == LogicalConnective::IfAndOnlyIf, + Symbol::ArrowRight => logical_connective == LogicalConnective::ImpliesLeftToRight, + _ => false, + }, + _ => false, + }; + + self.iter_tokens().filter(token_selector) + } + + pub fn top_level_logical_connective(&self) + -> Result, crate::parse::Error> + { + let logical_connective = |token| match token + { + Token::Identifier(identifier) => match identifier + { + "and" => Some(LogicalConnective::And), + "or" => Some(LogicalConnective::Or), + _ => None, + }, + Token::Symbol(symbol) => match symbol + { + Symbol::ArrowLeft => Some(LogicalConnective::ImpliesRightToLeft), + Symbol::ArrowLeftAndRight => Some(LogicalConnective::IfAndOnlyIf), + Symbol::ArrowRight => Some(LogicalConnective::ImpliesLeftToRight), + _ => None, + }, + _ => None, + }; + + let mut top_level_logical_connective = None; + + for token in self.iter_tokens() + { + let (_, _, token) = token?; + let logical_connective = match logical_connective(token) + { + Some(logical_connective) => logical_connective, + None => continue, + }; + + top_level_logical_connective = match top_level_logical_connective + { + None => Some(logical_connective), + Some(top_level_logical_connective) => { - if (infix_operator == FormulaInfixOperator::ImpliesLeftToRight - && top_level_infix_operator == FormulaInfixOperator::ImpliesRightToLeft) - || (infix_operator == FormulaInfixOperator::ImpliesRightToLeft - && top_level_infix_operator == FormulaInfixOperator::ImpliesLeftToRight) + if (logical_connective == LogicalConnective::ImpliesLeftToRight + && top_level_logical_connective == LogicalConnective::ImpliesRightToLeft) + || (logical_connective == LogicalConnective::ImpliesRightToLeft + && top_level_logical_connective == LogicalConnective::ImpliesLeftToRight) { return Err(crate::parse::Error::new_mixed_implication_directions( crate::parse::error::Location::new(0, Some(0)), crate::parse::error::Location::new(0, Some(0)))); } - if infix_operator.level() > top_level_infix_operator.level() + if logical_connective.level() > top_level_logical_connective.level() { - Some(infix_operator) + Some(logical_connective) } else { - Some(top_level_infix_operator) + Some(top_level_logical_connective) } }, } } - Ok(top_level_infix_operator) + Ok(top_level_logical_connective) } - pub fn iter_infix_operators(&self) -> FormulaInfixOperatorIterator<'i> + fn filter_comparison_operators(&self) + -> std::iter::FilterMap, impl FnMut(Result<(usize, usize, Token<'i>), crate::parse::Error>) + -> Option>> { - FormulaInfixOperatorIterator::new(self.input) + let token_functor = |token| match token + { + Ok((input_left, remaining_input, Token::Symbol(symbol))) => match symbol + { + Symbol::Greater => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::Greater))), + Symbol::GreaterOrEqual => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::GreaterOrEqual))), + Symbol::Less => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::Less))), + Symbol::LessOrEqual => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::LessOrEqual))), + Symbol::Equal => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::Equal))), + Symbol::NotEqual => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::NotEqual))), + _ => None, + }, + Err(error) => Some(Err(error)), + _ => None, + }; + + self.iter_tokens().filter_map(token_functor) } - pub fn split_at_infix_operator(&self, infix_operator: FormulaInfixOperator) - -> SplitFormulaAtInfixOperator<'i> - { - SplitFormulaAtInfixOperator::new(self, infix_operator) - } - - pub fn parse(&self, level: usize) -> Result<(), crate::parse::Error> + pub fn parse(&self, level: usize) -> Result { let indentation = " ".repeat(level); - println!("{}- parsing: {}", indentation, self.input); - let input = self.input.trim_start(); - match self.top_level_infix_operator()? + println!("{}- parsing formula: {}", indentation, input); + + match input.chars().next() { - None => - { - if let Some((identifier, _)) = identifier(input) - { - match identifier - { - "exists" => println!("{} parsing “exists” expression from: {}", indentation, input), - "forall" => println!("{} parsing “forall” expression from: {}", indentation, input), - _ => (), - } - } - - println!("{} can’t break down any further: {}", indentation, input) - }, - Some(top_level_infix_operator) => - { - println!("{} parsing “{:?}” expression from: {}", indentation, - top_level_infix_operator, input); - - for subformula in self.split_at_infix_operator(top_level_infix_operator) - { - FormulaStr::new(subformula?).parse(level + 1)?; - } - }, + Some(')') => return Err(crate::parse::Error::new_unmatched_parenthesis( + crate::parse::error::Location::new(0, Some(0)))), + None => return Err(crate::parse::Error::new_empty_input( + crate::parse::error::Location::new(0, Some(0)))), + _ => (), } - Ok(()) + // Parse logical infix connectives + if let Some(top_level_logical_connective) = self.top_level_logical_connective()? + { + println!("{} parsing “{:?}” infix formula", indentation, top_level_logical_connective); + + // Parse arguments of n-ary logical infix connectives + let arguments_n_ary = || + { + // TODO: improve error handling if the formulas between the operators are invalid + TokenSplit::new(self.filter_logical_connective(top_level_logical_connective), + self.input) + .map(|subformula| FormulaStr::new(subformula?).parse(level + 1)) + .collect::, _>>() + }; + + match top_level_logical_connective + { + LogicalConnective::And => return Ok(crate::Formula::and(arguments_n_ary()?)), + LogicalConnective::Or => return Ok(crate::Formula::or(arguments_n_ary()?)), + LogicalConnective::IfAndOnlyIf => + return Ok(crate::Formula::if_and_only_if(arguments_n_ary()?)), + LogicalConnective::ImpliesLeftToRight => + return implication_left_to_right( + TokenSplit::new( + self.filter_logical_connective(top_level_logical_connective), + self.input), + level + 1), + /*LogicalConnective::ImpliesRightToLeft => unimplemented!(),*/ + _ => + { + println!("{} TODO: parse implication", indentation); + + // TODO: implement correctly + return Ok(crate::Formula::true_()); + } + } + } + + // Parse quantified formulas + if let Some((identifier, input)) = identifier(input) + { + let quantifier = match identifier + { + "exists" => Some(Quantifier::Existential), + "forall" => Some(Quantifier::Universal), + _ => None, + }; + + if let Some(quantifier) = quantifier + { + let input = input.trim_start(); + println!("{} parsing “{:?}” formula body: {}", indentation, quantifier, input); + + return quantified_formula(input, quantifier, level + 1); + } + } + + let mut comparison_operators = self.filter_comparison_operators(); + + // Parse comparisons + if let Some(comparison_operator) = comparison_operators.next() + { + let (_, _, comparison_operator) = comparison_operator?; + + // Comparisons with more than one comparison operator aren’t supported + if let Some(next_comparison_operator) = comparison_operators.next() + { + let (_, _, next_comparison_operator) = next_comparison_operator?; + + return Err(crate::parse::Error::new_multiple_comparison_operators( + comparison_operator, next_comparison_operator, + crate::parse::error::Location::new(0, Some(0)))); + } + + println!("{} parsing “{:?}” comparison: {}", indentation, comparison_operator, input); + + let mut comparison_operator_split = + TokenSplit::new(self.filter_comparison_operators(), self.input); + + // There’s exactly one comparison operator in this formula, as we have verified above. + // Hence, the split is guaranteed to generate exactly these two elements + let input_left = comparison_operator_split.next().unwrap()?; + let input_right = comparison_operator_split.next().unwrap()?; + + let argument_left = TermStr::new(input_left).parse(level + 1)?; + let argument_right = TermStr::new(input_right).parse(level + 1)?; + + return Ok(crate::Formula::compare(comparison_operator, Box::new(argument_left), + Box::new(argument_right))); + } + + // Parse predicates + if let Some((predicate_name, input)) = predicate_name(input) + { + println!("{} TODO: parse predicate {}", indentation, predicate_name); + + let input = input.trim_start(); + + // Parse arguments if there are any + /*let arguments = match parenthesized_expression(input)? + { + Some((parenthesized_expression, remaining_input)) => + { + unimplemented!(); + } + None => unimplemented!(), + };*/ + + // TODO: implement correctly + return Ok(crate::Formula::true_()); + } + + // Parse parenthesized formulas + if let Some('(') = input.chars().next() + { + match parenthesized_expression(input)? + { + Some((parenthesized_expression, remaining_input)) => + { + if !remaining_input.trim().is_empty() + { + return Err(crate::parse::Error::new_unexpected_token( + crate::parse::error::Location::new(0, Some(0)))); + } + + return FormulaStr::new(parenthesized_expression).parse(level); + }, + None => unreachable!(), + } + }; + + println!("{} can’t break down formula any further: {}", indentation, input); + + // TODO: implement correctly + Ok(crate::Formula::true_()) } } -struct FormulaInfixOperatorIterator<'i> +#[derive(Clone, Copy, Eq, PartialEq)] +pub(crate) enum Quantifier +{ + Existential, + Universal, +} + +impl std::fmt::Debug for Quantifier +{ + fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result + { + match &self + { + Self::Existential => write!(formatter, "exists"), + Self::Universal => write!(formatter, "forall"), + } + } +} + +// TODO: refactor +fn implication_left_to_right_inner<'i, T>( + mut split_formula_at_logical_connective: TokenSplit<'i, T, Token<'i>>, level: usize) + -> Result, crate::parse::Error> +where + T: std::iter::Iterator), crate::parse::Error>> +{ + match split_formula_at_logical_connective.next() + { + Some(argument) => + { + // TODO: improve error handling if antecedent cannot be parsed + let argument = FormulaStr::new(argument?).parse(level)?; + match implication_left_to_right_inner(split_formula_at_logical_connective, level)? + { + Some(next_argument) => Ok(Some(crate::Formula::implies( + crate::ImplicationDirection::LeftToRight, Box::new(argument), + Box::new(next_argument)))), + None => Ok(Some(argument)), + } + }, + None => Ok(None), + } +} + +fn implication_left_to_right<'i, T>( + mut split_formula_at_logical_connective: TokenSplit<'i, T, Token<'i>>, level: usize) + -> Result +where + T: std::iter::Iterator), crate::parse::Error>> +{ + match split_formula_at_logical_connective.next() + { + Some(argument) => + { + // TODO: improve error handling if antecedent cannot be parsed + let argument = FormulaStr::new(argument?).parse(level)?; + match implication_left_to_right_inner(split_formula_at_logical_connective, level)? + { + Some(next_argument) => Ok(crate::Formula::implies( + crate::ImplicationDirection::LeftToRight, Box::new(argument), + Box::new(next_argument))), + None => Err(crate::parse::Error::new_expected_logical_connective_argument( + "left-to-right implication".to_string(), + crate::parse::error::Location::new(0, Some(0)))), + } + }, + None => Err(crate::parse::Error::new_expected_logical_connective_argument( + "left-to-right implication".to_string(), + crate::parse::error::Location::new(0, Some(0)))), + } +} + +fn quantified_formula(input: &str, quantifier: Quantifier, level: usize) + -> Result +{ + let (parameters, input) = match variable_declarations(input)? + { + Some(variable_declarations) => variable_declarations, + None => return Err(crate::parse::Error::new_expected_variable_declaration( + crate::parse::error::Location::new(0, Some(0)))), + }; + let parameters = std::rc::Rc::new(parameters); + + let formula_str = FormulaStr::new(input.trim()); + let formula = Box::new(formula_str.parse(level)?); + + // TODO: push variable stack layer + let formula = match quantifier + { + Quantifier::Existential => crate::Formula::exists(parameters, formula), + Quantifier::Universal => crate::Formula::for_all(parameters, formula), + }; + + Ok(formula) +} + +/*struct ComparisonOperatorIterator<'i> { original_input: &'i str, input: &'i str, } -impl<'i> FormulaInfixOperatorIterator<'i> +impl<'i> ComparisonOperatorIterator<'i> { pub fn new(input: &'i str) -> Self { @@ -171,9 +465,10 @@ impl<'i> FormulaInfixOperatorIterator<'i> } } -impl<'i> std::iter::Iterator for FormulaInfixOperatorIterator<'i> +// TODO: refactor +impl<'i> std::iter::Iterator for ComparisonOperatorIterator<'i> { - type Item = Result<(&'i str, &'i str, FormulaInfixOperator), crate::parse::Error>; + type Item = Result<(&'i str, &'i str, crate::ComparisonOperator), crate::parse::Error>; fn next(&mut self) -> Option { @@ -218,109 +513,49 @@ impl<'i> std::iter::Iterator for FormulaInfixOperatorIterator<'i> let index_left = self.input.as_ptr() as usize - self.original_input.as_ptr() as usize; let input_left = self.original_input.split_at(index_left).0.trim_end(); - if let Some((identifier, remaining_input)) = identifier(self.input) - { - self.input = remaining_input; - - match identifier - { - "and" => - return Some(Ok((input_left, remaining_input, FormulaInfixOperator::And))), - "or" => - return Some(Ok((input_left, remaining_input, FormulaInfixOperator::Or))), - _ => continue, - } - } - if let Some((symbol, remaining_input)) = symbol(self.input) { self.input = remaining_input; match symbol { - Symbol::ArrowLeft => return Some(Ok((input_left, remaining_input, - FormulaInfixOperator::ImpliesRightToLeft))), - Symbol::ArrowLeftAndRight => return Some(Ok((input_left, remaining_input, - FormulaInfixOperator::IfAndOnlyIf))), - Symbol::ArrowRight => return Some(Ok((input_left, remaining_input, - FormulaInfixOperator::ImpliesLeftToRight))), + Symbol::Greater => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::Greater))), + Symbol::GreaterOrEqual => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::GreaterOrEqual))), + Symbol::Less => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::Less))), + Symbol::LessOrEqual => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::LessOrEqual))), + Symbol::Equal => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::Equal))), + Symbol::NotEqual => + return Some(Ok((input_left, remaining_input, + crate::ComparisonOperator::NotEqual))), _ => continue, } } + match identifier(self.input) + { + Some((_, remaining_input)) => + { + self.input = remaining_input; + continue; + } + None => (), + } + return Some(Err(crate::parse::Error::new_character_not_allowed(first_character, crate::parse::error::Location::new(0, Some(0))))); } } -} - -struct SplitFormulaAtInfixOperator<'i> -{ - infix_operator_iterator: FormulaInfixOperatorIterator<'i>, - infix_operator: FormulaInfixOperator, - previous_index: usize, -} - -impl<'i> SplitFormulaAtInfixOperator<'i> -{ - pub fn new(input: &FormulaStr<'i>, infix_operator: FormulaInfixOperator) - -> Self - { - Self - { - infix_operator_iterator: input.iter_infix_operators(), - infix_operator, - previous_index: 0, - } - } -} - -impl<'i> std::iter::Iterator for SplitFormulaAtInfixOperator<'i> -{ - type Item = Result<&'i str, crate::parse::Error>; - - fn next(&mut self) -> Option - { - loop - { - let (input_left, input_right, infix_operator) = - match self.infix_operator_iterator.next() - { - Some(Err(error)) => return Some(Err(error)), - Some(Ok(infix_operator_iterator_next)) => infix_operator_iterator_next, - None => break, - }; - - if infix_operator == self.infix_operator - { - // TODO: refactor - let index = input_left.as_ptr() as usize - + input_left.len() - - self.infix_operator_iterator.original_input.as_ptr() as usize; - let split_input = &self.infix_operator_iterator - .original_input[self.previous_index..index].trim(); - self.previous_index = input_right.as_ptr() as usize - - self.infix_operator_iterator.original_input.as_ptr() as usize; - - return Some(Ok(split_input)); - } - } - - let remaining_input = self.infix_operator_iterator - .original_input[self.previous_index..].trim(); - - if remaining_input.is_empty() - { - None - } - else - { - self.previous_index = self.infix_operator_iterator.original_input.len(); - - Some(Ok(remaining_input)) - } - } -} +}*/ #[cfg(test)] mod tests diff --git a/src/parse/terms.rs b/src/parse/terms.rs new file mode 100644 index 0000000..4d61de6 --- /dev/null +++ b/src/parse/terms.rs @@ -0,0 +1,442 @@ +use super::tokens::*; + +pub fn parse_term(input: &str) -> Result +{ + let term_str = TermStr::new(input); + term_str.parse(0)?; + + // TODO: implement correctly + Ok(crate::Term::true_()) +} + +pub(crate) fn function_name(input: &str) -> Option<(&str, &str)> +{ + let (identifier, remaining_input) = identifier(input)?; + + if is_keyword(identifier) + { + return None; + } + + let mut characters = identifier.chars(); + + while let Some(character) = characters.next() + { + match character + { + '_' => continue, + _ if character.is_ascii_lowercase() => return Some((identifier, remaining_input)), + _ => return None, + } + } + + None +} + +fn variable_name(input: &str) -> Option<(&str, &str)> +{ + let (identifier, remaining_input) = identifier(input)?; + + let mut characters = identifier.chars(); + + while let Some(character) = characters.next() + { + match character + { + '_' => continue, + _ if character.is_ascii_uppercase() => return Some((identifier, remaining_input)), + _ => return None, + } + } + + None +} + +pub(crate) fn variable_declaration(input: &str) -> Option<(crate::VariableDeclaration, &str)> +{ + variable_name(input) + .map(|(variable_name, remaining_input)| + (crate::VariableDeclaration::new(variable_name.to_string()), remaining_input)) +} + +pub(crate) fn variable_declarations(input: &str) + -> Result, crate::parse::Error> +{ + let mut variable_declarations = vec![]; + + let (first_variable_declaration, mut input) = match variable_declaration(input) + { + Some(first_variable_declaration) => first_variable_declaration, + None => return Ok(None), + }; + + variable_declarations.push(std::rc::Rc::new(first_variable_declaration)); + + loop + { + input = input.trim_start(); + + input = match symbol(input) + { + Some((Symbol::Comma, input)) => input, + // TODO: detect redeclarations, such as in “exists X, Y, X” + _ => return Ok(Some((variable_declarations, input))), + }; + + input = input.trim_start(); + + let (variable_declaration, remaining_input) = match variable_declaration(input) + { + Some(variable_declaration) => variable_declaration, + None => return Err(crate::parse::Error::new_expected_variable_declaration( + crate::parse::error::Location::new(0, Some(0)))), + }; + + input = remaining_input; + + variable_declarations.push(std::rc::Rc::new(variable_declaration)); + } +} + +#[derive(Clone, Copy, Eq, PartialEq)] +pub(crate) enum TermInfixOperator +{ + Add, + Divide, + Exponentiate, + Modulo, + Multiply, + Subtract, +} + +impl TermInfixOperator +{ + fn level(&self) -> usize + { + match self + { + Self::Exponentiate => 1, + Self::Multiply + | Self::Divide + | Self::Modulo => 2, + Self::Add + | Self::Subtract => 3, + } + } +} + +impl std::fmt::Debug for TermInfixOperator +{ + fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result + { + match &self + { + Self::Add => write!(formatter, "+"), + Self::Divide => write!(formatter, "/"), + Self::Exponentiate => write!(formatter, "**"), + // TODO: conflicts with single-line comments + Self::Modulo => write!(formatter, "%"), + Self::Multiply => write!(formatter, "*"), + Self::Subtract => write!(formatter, "-"), + } + } +} + +pub(crate) struct TermStr<'i> +{ + input: &'i str, +} + +impl<'i> TermStr<'i> +{ + pub fn new(input: &'i str) -> Self + { + Self + { + input, + } + } + + pub fn iter_infix_operators(&self) -> TermInfixOperatorIterator<'i> + { + TermInfixOperatorIterator::new(self.input) + } + + pub fn split_at_infix_operator(&self, infix_operator: TermInfixOperator) + -> SplitTermAtInfixOperator<'i> + { + SplitTermAtInfixOperator::new(self, infix_operator) + } + + pub fn parse(&self, level: usize) -> Result + { + let indentation = " ".repeat(level); + println!("{}- parsing term: {}", indentation, self.input); + + let input = self.input.trim_start(); + + // TODO: implement + Ok(crate::Term::true_()) + } +} + +pub(crate) struct TermInfixOperatorIterator<'i> +{ + original_input: &'i str, + input: &'i str, +} + +impl<'i> TermInfixOperatorIterator<'i> +{ + pub fn new(input: &'i str) -> Self + { + Self + { + original_input: input, + input, + } + } +} + +impl<'i> std::iter::Iterator for TermInfixOperatorIterator<'i> +{ + type Item = Result<(&'i str, &'i str, TermInfixOperator), crate::parse::Error>; + + fn next(&mut self) -> Option + { + loop + { + self.input = self.input.trim_start(); + + let first_character = match self.input.chars().next() + { + None => return None, + Some(first_character) => first_character, + }; + + // TODO: implement + if self.input.starts_with("|") + { + unimplemented!(); + } + + if self.input.starts_with(")") + { + return Some(Err(crate::parse::Error::new_unmatched_parenthesis( + crate::parse::error::Location::new(0, Some(1))))); + } + + match parenthesized_expression(self.input) + { + Ok(Some((_, remaining_input))) => + { + self.input = remaining_input; + continue; + }, + Ok(None) => (), + Err(error) => return Some(Err(error)), + } + + match number(self.input) + { + Ok(Some((_, remaining_input))) => + { + self.input = remaining_input; + continue; + } + Ok(None) => (), + Err(error) => return Some(Err(error)), + } + + let index_left = self.input.as_ptr() as usize - self.original_input.as_ptr() as usize; + let input_left = self.original_input.split_at(index_left).0.trim_end(); + + if let Some((_, remaining_input)) = identifier(self.input) + { + self.input = remaining_input; + continue; + } + + if let Some((symbol, remaining_input)) = symbol(self.input) + { + self.input = remaining_input; + + match symbol + { + Symbol::Division => return Some(Ok((input_left, remaining_input, + TermInfixOperator::Divide))), + Symbol::Exponentiation => return Some(Ok((input_left, remaining_input, + TermInfixOperator::Exponentiate))), + Symbol::Minus => return Some(Ok((input_left, remaining_input, + TermInfixOperator::Subtract))), + Symbol::Multiplication => return Some(Ok((input_left, remaining_input, + TermInfixOperator::Multiply))), + Symbol::Percent => return Some(Ok((input_left, remaining_input, + TermInfixOperator::Modulo))), + Symbol::Plus => return Some(Ok((input_left, remaining_input, + TermInfixOperator::Add))), + _ => continue, + } + } + + return Some(Err(crate::parse::Error::new_character_not_allowed(first_character, + crate::parse::error::Location::new(0, Some(0))))); + } + } +} + +pub(crate) struct SplitTermAtInfixOperator<'i> +{ + infix_operator_iterator: TermInfixOperatorIterator<'i>, + infix_operator: TermInfixOperator, + previous_index: usize, +} + +impl<'i> SplitTermAtInfixOperator<'i> +{ + pub fn new(input: &TermStr<'i>, infix_operator: TermInfixOperator) + -> Self + { + Self + { + infix_operator_iterator: input.iter_infix_operators(), + infix_operator, + previous_index: 0, + } + } +} + +impl<'i> std::iter::Iterator for SplitTermAtInfixOperator<'i> +{ + type Item = Result<&'i str, crate::parse::Error>; + + fn next(&mut self) -> Option + { + loop + { + let (input_left, input_right, infix_operator) = + match self.infix_operator_iterator.next() + { + Some(Err(error)) => return Some(Err(error)), + Some(Ok(infix_operator_iterator_next)) => infix_operator_iterator_next, + None => break, + }; + + if infix_operator == self.infix_operator + { + // TODO: refactor + let index = input_left.as_ptr() as usize + + input_left.len() + - self.infix_operator_iterator.original_input.as_ptr() as usize; + let split_input = &self.infix_operator_iterator + .original_input[self.previous_index..index].trim(); + self.previous_index = input_right.as_ptr() as usize + - self.infix_operator_iterator.original_input.as_ptr() as usize; + + return Some(Ok(split_input)); + } + } + + let remaining_input = self.infix_operator_iterator + .original_input[self.previous_index..].trim(); + + if remaining_input.is_empty() + { + None + } + else + { + self.previous_index = self.infix_operator_iterator.original_input.len(); + + Some(Ok(remaining_input)) + } + } +} + +#[cfg(test)] +mod tests +{ + use super::*; + + #[test] + fn parse_variable_name() + { + assert_eq!(variable_name("X").unwrap(), ("X", "")); + assert_eq!(variable_name("_X").unwrap(), ("_X", "")); + assert_eq!(variable_name("__X").unwrap(), ("__X", "")); + assert_eq!(variable_name("Variable").unwrap(), ("Variable", "")); + assert_eq!(variable_name("_Variable").unwrap(), ("_Variable", "")); + assert_eq!(variable_name("__Variable").unwrap(), ("__Variable", "")); + assert_eq!(variable_name("X,").unwrap(), ("X", ",")); + assert_eq!(variable_name("_X,").unwrap(), ("_X", ",")); + assert_eq!(variable_name("__X,").unwrap(), ("__X", ",")); + assert_eq!(variable_name("Variable,").unwrap(), ("Variable", ",")); + assert_eq!(variable_name("_Variable,").unwrap(), ("_Variable", ",")); + assert_eq!(variable_name("__Variable,").unwrap(), ("__Variable", ",")); + } + + #[test] + fn parse_variable_declaration() + { + let v = variable_declaration("X").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("X", "")); + let v = variable_declaration("_X").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("_X", "")); + let v = variable_declaration("__X").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("__X", "")); + let v = variable_declaration("Variable").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("Variable", "")); + let v = variable_declaration("_Variable").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("_Variable", "")); + let v = variable_declaration("__Variable").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("__Variable", "")); + let v = variable_declaration("X,").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("X", ",")); + let v = variable_declaration("_X,").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("_X", ",")); + let v = variable_declaration("__X,").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("__X", ",")); + let v = variable_declaration("Variable,").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("Variable", ",")); + let v = variable_declaration("_Variable,").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("_Variable", ",")); + let v = variable_declaration("__Variable,").unwrap(); + assert_eq!((v.0.name.as_str(), v.1), ("__Variable", ",")); + } + + #[test] + fn parse_variable_declarations() + { + let v = variable_declarations("X.").unwrap().unwrap(); + assert_eq!(v.0.len(), 1); + assert_eq!(v.0[0].name.as_str(), "X"); + assert_eq!(v.1, "."); + + let v = variable_declarations("X,Y,Z.").unwrap().unwrap(); + assert_eq!(v.0.len(), 3); + assert_eq!(v.0[0].name.as_str(), "X"); + assert_eq!(v.0[1].name.as_str(), "Y"); + assert_eq!(v.0[2].name.as_str(), "Z"); + assert_eq!(v.1, "."); + + let v = variable_declarations("X, Y, Z.").unwrap().unwrap(); + assert_eq!(v.0.len(), 3); + assert_eq!(v.0[0].name.as_str(), "X"); + assert_eq!(v.0[1].name.as_str(), "Y"); + assert_eq!(v.0[2].name.as_str(), "Z"); + assert_eq!(v.1, "."); + + let v = variable_declarations("X , Y , Z.").unwrap().unwrap(); + assert_eq!(v.0.len(), 3); + assert_eq!(v.0[0].name.as_str(), "X"); + assert_eq!(v.0[1].name.as_str(), "Y"); + assert_eq!(v.0[2].name.as_str(), "Z"); + assert_eq!(v.1, "."); + + assert!(variable_declarations("test").unwrap().is_none()); + assert!(variable_declarations("X, test").is_err()); + assert!(variable_declarations("X ,test").is_err()); + assert!(variable_declarations("X,Y,Z, test").is_err()); + assert!(variable_declarations("X,Y,Z ,test").is_err()); + } +} diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 0586cca..705cfc4 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -1,3 +1,37 @@ +fn substring_offset(substring: &str, string: &str) -> usize +{ + substring.as_ptr() as usize - string.as_ptr() as usize +} + +#[derive(Clone, Copy, Eq, PartialEq)] +pub(crate) enum Keyword +{ + And, + Exists, + False, + ForAll, + Not, + Or, + True, +} + +impl std::fmt::Debug for Keyword +{ + fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result + { + match &self + { + Self::And => write!(formatter, "and"), + Self::Exists => write!(formatter, "exists"), + Self::False => write!(formatter, "false"), + Self::ForAll => write!(formatter, "forall"), + Self::Not => write!(formatter, "not"), + Self::Or => write!(formatter, "or"), + Self::True => write!(formatter, "true"), + } + } +} + #[derive(Clone, Copy, Eq, PartialEq)] pub(crate) enum Symbol { @@ -15,6 +49,7 @@ pub(crate) enum Symbol Minus, Multiplication, NotEqual, + Percent, Plus, VerticalBar, } @@ -39,15 +74,24 @@ impl std::fmt::Debug for Symbol Self::Minus => write!(formatter, "-"), Self::Multiplication => write!(formatter, "*"), Self::NotEqual => write!(formatter, "!="), + Self::Percent => write!(formatter, "%"), Self::Plus => write!(formatter, "+"), Self::VerticalBar => write!(formatter, "|"), } } } +#[derive(Clone, Copy, Eq, PartialEq)] +pub(crate) enum Token<'i> +{ + Identifier(&'i str), + Number(usize), + ParenthesizedExpression(&'i str), + Symbol(Symbol), +} + fn is_identifier_start_character(character: char) -> bool { - // TODO: support leading underscores character.is_ascii_alphabetic() } @@ -65,13 +109,17 @@ pub(crate) fn identifier(input: &str) -> Option<(&str, &str)> { let mut characters = input.char_indices(); - let (_, character) = match characters.next() + let first_character = loop { - Some(characters_next) => characters_next, - None => return None, - }; + match characters.next() + { + Some((_, '_')) => continue, + Some((_, character)) => break Some(character), + None => break None, + } + }?; - if !is_identifier_start_character(character) + if !is_identifier_start_character(first_character) { return None; } @@ -92,6 +140,21 @@ pub(crate) fn identifier(input: &str) -> Option<(&str, &str)> } } +pub(crate) fn is_keyword(identifier: &str) -> bool +{ + match identifier + { + "and" + | "exists" + | "false" + | "forall" + | "not" + | "or" + | "true" => true, + _ => false, + } +} + fn number_string(input: &str) -> Option<(&str, &str)> { let mut characters = input.char_indices(); @@ -196,6 +259,7 @@ pub(crate) fn symbol(input: &str) -> Option<(Symbol, &str)> _ => Some((Symbol::Multiplication, remaining_input)), }, '/' => Some((Symbol::Division, remaining_input)), + '%' => Some((Symbol::Percent, remaining_input)), '|' => Some((Symbol::VerticalBar, remaining_input)), _ => None, } @@ -244,11 +308,221 @@ pub(crate) fn parenthesized_expression(input: &str) crate::parse::error::Location::new(0, Some(1)))) } +pub(crate) trait OriginalInput<'i> +{ + fn original_input(&self) -> &'i str; +} + +pub(crate) struct TokenIterator<'i> +{ + original_input: &'i str, + input: &'i str, +} + +impl<'i> TokenIterator<'i> +{ + pub fn new(input: &'i str) -> Self + { + Self + { + original_input: input, + input, + } + } + + /*pub fn filter

(self, pattern: P) -> TokenFilter<'i, P> + where + P: FnMut(&Token<'i>) -> bool, + { + TokenFilter::new(self, pattern) + } + + pub fn split(self) -> TokenSplit<'i, Self> + { + TokenSplit::new(self) + }*/ +} + +/*impl<'i> OriginalInput<'i> for TokenIterator<'i> +{ + fn original_input(&self) -> &'i str + { + self.original_input + } +} + +impl<'i, P> OriginalInput<'i> for std::iter::Filter, P> +{ + fn original_input(&self) -> &'i str + { + self.iter.original_input + } +}*/ + +impl<'i> std::iter::Iterator for TokenIterator<'i> +{ + type Item = Result<(usize, usize, Token<'i>), crate::parse::Error>; + + fn next(&mut self) -> Option + { + self.input = self.input.trim_start(); + let index_left = substring_offset(self.input, self.original_input); + + let first_character = match self.input.chars().next() + { + None => return None, + Some(first_character) => first_character, + }; + + if self.input.starts_with(")") + { + return Some(Err(crate::parse::Error::new_unmatched_parenthesis( + crate::parse::error::Location::new(0, Some(1))))); + } + + match parenthesized_expression(self.input) + { + Ok(Some((parenthesized_expression, remaining_input))) => + { + self.input = remaining_input; + let index_right = substring_offset(self.input, self.original_input); + + return Some(Ok((index_left, index_right, + Token::ParenthesizedExpression(parenthesized_expression)))); + }, + Ok(None) => (), + Err(error) => return Some(Err(error)), + } + + match number(self.input) + { + Ok(Some((number, remaining_input))) => + { + self.input = remaining_input; + let index_right = substring_offset(self.input, self.original_input); + + return Some(Ok((index_left, index_right, Token::Number(number)))); + }, + Ok(None) => (), + Err(error) => return Some(Err(error)), + } + + if let Some((identifier, remaining_input)) = identifier(self.input) + { + self.input = remaining_input; + let index_right = substring_offset(self.input, self.original_input); + + return Some(Ok((index_left, index_right, Token::Identifier(identifier)))); + } + + if let Some((symbol, remaining_input)) = symbol(self.input) + { + self.input = remaining_input; + let index_right = substring_offset(self.input, self.original_input); + + return Some(Ok((index_left, index_right, Token::Symbol(symbol)))); + } + + return Some(Err(crate::parse::Error::new_character_not_allowed(first_character, + crate::parse::error::Location::new(0, Some(0))))); + } +} + +pub(crate) struct TokenSplit<'i, T, U> +where + T: std::iter::Iterator> +{ + token_iterator: T, + original_input: &'i str, + previous_index: usize, +} + +impl<'i, T, U> TokenSplit<'i, T, U> +where + T: std::iter::Iterator> +{ + pub fn new(token_iterator: T, original_input: &'i str) -> Self + { + Self + { + token_iterator, + original_input, + previous_index: 0, + } + } +} + +impl<'i, T, U> std::iter::Iterator for TokenSplit<'i, T, U> +where + T: std::iter::Iterator> +{ + type Item = Result<&'i str, crate::parse::Error>; + + fn next(&mut self) -> Option + { + if self.previous_index == self.original_input.len() + { + return None; + } + + loop + { + match self.token_iterator.next() + { + Some(Ok((index_left, index_right, token))) => + { + let input_between = self.original_input[self.previous_index..index_left].trim(); + + assert!(!input_between.is_empty()); + + self.previous_index = index_right; + + return Some(Ok(input_between)); + }, + Some(Err(error)) => return Some(Err(error)), + None => + { + let remaining_input = self.original_input[self.previous_index..].trim(); + + self.previous_index = self.original_input.len(); + + return Some(Ok(remaining_input)); + }, + } + } + } +} + #[cfg(test)] mod tests { use super::*; + #[test] + fn tokenize_identifier() + { + assert_eq!(identifier("test").unwrap(), ("test", "")); + assert_eq!(identifier("test2").unwrap(), ("test2", "")); + assert_eq!(identifier("Test").unwrap(), ("Test", "")); + assert_eq!(identifier("Test2").unwrap(), ("Test2", "")); + assert_eq!(identifier("_test").unwrap(), ("_test", "")); + assert_eq!(identifier("_test2").unwrap(), ("_test2", "")); + assert_eq!(identifier("__test").unwrap(), ("__test", "")); + assert_eq!(identifier("__test2").unwrap(), ("__test2", "")); + assert_eq!(identifier("test, test").unwrap(), ("test", ", test")); + assert_eq!(identifier("test2, test").unwrap(), ("test2", ", test")); + assert_eq!(identifier("Test, Test").unwrap(), ("Test", ", Test")); + assert_eq!(identifier("Test2, Test").unwrap(), ("Test2", ", Test")); + assert_eq!(identifier("_test, _test").unwrap(), ("_test", ", _test")); + assert_eq!(identifier("_test2, _test").unwrap(), ("_test2", ", _test")); + assert_eq!(identifier("__test, __test").unwrap(), ("__test", ", __test")); + assert_eq!(identifier("__test2, __test").unwrap(), ("__test2", ", __test")); + assert!(identifier("2test, test").is_none()); + assert!(identifier("#test, test").is_none()); + assert!(identifier("$test, test").is_none()); + assert!(identifier(",test, test").is_none()); + } + #[test] fn tokenize_primitives() {