From 0d15381e1103418ed821a5d522421f64790e2726 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Thu, 23 Mar 2017 13:10:44 -0700 Subject: [PATCH] Crudely parse `or` and `or-join`. (#388) r=nalexander --- query-parser/src/parse.rs | 205 ++++++++++++++++++++++++++++--- query-parser/tests/find_tests.rs | 151 ++++++++++++++++++++++- query/src/lib.rs | 44 ++++++- 3 files changed, 378 insertions(+), 22 deletions(-) diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index e7660535..fa9f8f9a 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -27,12 +27,15 @@ use self::mentat_query::{ FindSpec, FnArg, FromValue, + OrJoin, + OrWhereClause, Pattern, PatternNonValuePlace, PatternValuePlace, Predicate, PredicateFn, SrcVar, + UnifyVars, Variable, WhereClause, }; @@ -110,28 +113,114 @@ def_value_satisfy_parser_fn!(Where, PatternNonValuePlace, PatternNonValuePlace::from_value); +fn seq>>(x: T) -> Option> { + match x.into() { + Some(edn::Value::List(items)) => Some(items.into_iter().collect()), + Some(edn::Value::Vector(items)) => Some(items), + _ => None, + } +} + /// Take a vector Value containing one vector Value, and return the `Vec` inside the inner vector. /// Also accepts an inner list, returning it as a `Vec`. fn unwrap_nested(x: edn::Value) -> Option> { match x { edn::Value::Vector(mut v) => { - match v.pop() { - Some(edn::Value::List(items)) => Some(items.into_iter().collect()), - Some(edn::Value::Vector(items)) => Some(items), - _ => None, - } + seq(v.pop()) } _ => None, } } +def_value_parser_fn!(Where, and, (), input, { + matches_plain_symbol!("and", input) +}); + +def_value_parser_fn!(Where, or, (), input, { + matches_plain_symbol!("or", input) +}); + +def_value_parser_fn!(Where, or_join, (), input, { + matches_plain_symbol!("or-join", input) +}); + +def_value_parser_fn!(Where, rule_vars, Vec, input, { + satisfy_map(|x: edn::Value| { + seq(x).and_then(|items| { + let mut p = many1(Query::variable()).skip(eof()); + Query::to_parsed_value(p.parse_lazy(&items[..]).into()) + })}).parse_stream(input) +}); + +def_value_parser_fn!(Where, or_pattern_clause, OrWhereClause, input, { + Where::clause().map(|clause| OrWhereClause::Clause(clause)).parse_stream(input) +}); + +def_value_parser_fn!(Where, or_and_clause, OrWhereClause, input, { + satisfy_map(|x: edn::Value| { + seq(x).and_then(|items| { + let mut p = Where::and() + .with(many1(Where::clause())) + .skip(eof()) + .map(OrWhereClause::And); + let r: ParseResult = p.parse_lazy(&items[..]).into(); + Query::to_parsed_value(r) + }) + }).parse_stream(input) +}); + +def_value_parser_fn!(Where, or_where_clause, OrWhereClause, input, { + choice([Where::or_pattern_clause(), Where::or_and_clause()]).parse_stream(input) +}); + +def_value_parser_fn!(Where, or_clause, WhereClause, input, { + satisfy_map(|x: edn::Value| { + seq(x).and_then(|items| { + let mut p = Where::or() + .with(many1(Where::or_where_clause())) + .skip(eof()) + .map(|clauses| { + WhereClause::OrJoin( + OrJoin { + unify_vars: UnifyVars::Implicit, + clauses: clauses, + }) + }); + let r: ParseResult = p.parse_lazy(&items[..]).into(); + Query::to_parsed_value(r) + }) + }).parse_stream(input) +}); + +def_value_parser_fn!(Where, or_join_clause, WhereClause, input, { + satisfy_map(|x: edn::Value| { + seq(x).and_then(|items| { + let mut p = Where::or_join() + .with(Where::rule_vars()) + .and(many1(Where::or_where_clause())) + .skip(eof()) + .map(|(vars, clauses)| { + WhereClause::OrJoin( + OrJoin { + unify_vars: UnifyVars::Explicit(vars), + clauses: clauses, + }) + }); + let r: ParseResult = p.parse_lazy(&items[..]).into(); + Query::to_parsed_value(r) + }) + }).parse_stream(input) +}); + /// A vector containing just a parenthesized filter expression. def_value_parser_fn!(Where, pred, WhereClause, input, { satisfy_map(|x: edn::Value| { // Accept either a list or a vector here: // `[(foo ?x ?y)]` or `[[foo ?x ?y]]` unwrap_nested(x).and_then(|items| { - let mut p = (Query::predicate_fn(), Query::arguments(), eof()).map(|(f, args, _)| { + let mut p = (Query::predicate_fn(), Query::arguments()) + .skip(eof()) + .map(|(f, args)| { WhereClause::Pred( Predicate { operator: f.0, @@ -154,9 +243,9 @@ def_value_parser_fn!(Where, pattern, WhereClause, input, { Where::pattern_non_value_place(), // e Where::pattern_non_value_place(), // a optional(Where::pattern_value_place()), // v - optional(Where::pattern_non_value_place()), // tx - eof()) - .map(|(src, e, a, v, tx, _)| { + optional(Where::pattern_non_value_place())) // tx + .skip(eof()) + .map(|(src, e, a, v, tx)| { let v = v.unwrap_or(PatternValuePlace::Placeholder); let tx = tx.unwrap_or(PatternNonValuePlace::Placeholder); @@ -190,15 +279,28 @@ def_value_parser_fn!(Where, pattern, WhereClause, input, { }); def_value_parser_fn!(Query, arguments, Vec, input, { - (many::, _>(Query::fn_arg()), eof()) - .map(|(args, _)| { args }) + (many::, _>(Query::fn_arg())) + .skip(eof()) .parse_stream(input) }); +def_value_parser_fn!(Where, clause, WhereClause, input, { + choice([Where::pattern(), + Where::pred(), + // It's either + // (or-join [vars] clauses…) + // or + // (or clauses…) + // We don't yet handle source vars. + Where::or_join_clause(), + Where::or_clause(), + ]).parse_stream(input) +}); + def_value_parser_fn!(Where, clauses, Vec, input, { // Right now we only support patterns and predicates. See #239 for more. - (many1::, _>(choice([Where::pattern(), Where::pred()])), eof()) - .map(|(patterns, _)| { patterns }) + (many1::, _>(Where::clause())) + .skip(eof()) .parse_stream(input) }); @@ -213,15 +315,19 @@ def_value_parser_fn!(Find, ellipsis, (), input, { }); def_value_parser_fn!(Find, find_scalar, FindSpec, input, { - (Query::variable(), Find::period(), eof()) - .map(|(var, _, _)| FindSpec::FindScalar(Element::Variable(var))) + Query::variable() + .skip(Find::period()) + .skip(eof()) + .map(|var| FindSpec::FindScalar(Element::Variable(var))) .parse_stream(input) }); def_value_parser_fn!(Find, find_coll, FindSpec, input, { satisfy_unwrap!(edn::Value::Vector, y, { - let mut p = (Query::variable(), Find::ellipsis(), eof()) - .map(|(var, _, _)| FindSpec::FindColl(Element::Variable(var))); + let mut p = Query::variable() + .skip(Find::ellipsis()) + .skip(eof()) + .map(|var| FindSpec::FindColl(Element::Variable(var))); let r: ParseResult = p.parse_lazy(&y[..]).into(); Query::to_parsed_value(r) }) @@ -229,8 +335,8 @@ def_value_parser_fn!(Find, find_coll, FindSpec, input, { }); def_value_parser_fn!(Find, elements, Vec, input, { - (many1::, _>(Query::variable()), eof()) - .map(|(vars, _)| { + many1::, _>(Query::variable()).skip(eof()) + .map(|vars| { vars.into_iter() .map(Element::Variable) .collect() @@ -392,6 +498,67 @@ mod test { })); } + #[test] + fn test_rule_vars() { + let e = edn::PlainSymbol::new("?e"); + let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone())])]; + assert_parses_to!(Where::rule_vars, input, + vec![Variable(e.clone())]); + } + + #[test] + fn test_or() { + let oj = edn::PlainSymbol::new("or"); + let e = edn::PlainSymbol::new("?e"); + let a = edn::PlainSymbol::new("?a"); + let v = edn::PlainSymbol::new("?v"); + let input = [edn::Value::List( + vec![edn::Value::PlainSymbol(oj), + edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()), + edn::Value::PlainSymbol(a.clone()), + edn::Value::PlainSymbol(v.clone())])].into_iter().collect())]; + assert_parses_to!(Where::or_clause, input, + WhereClause::OrJoin( + OrJoin { + unify_vars: UnifyVars::Implicit, + clauses: vec![OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(e)), + attribute: PatternNonValuePlace::Variable(Variable(a)), + value: PatternValuePlace::Variable(Variable(v)), + tx: PatternNonValuePlace::Placeholder, + }))], + })); + } + + #[test] + fn test_or_join() { + let oj = edn::PlainSymbol::new("or-join"); + let e = edn::PlainSymbol::new("?e"); + let a = edn::PlainSymbol::new("?a"); + let v = edn::PlainSymbol::new("?v"); + let input = [edn::Value::List( + vec![edn::Value::PlainSymbol(oj), + edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone())]), + edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()), + edn::Value::PlainSymbol(a.clone()), + edn::Value::PlainSymbol(v.clone())])].into_iter().collect())]; + assert_parses_to!(Where::or_join_clause, input, + WhereClause::OrJoin( + OrJoin { + unify_vars: UnifyVars::Explicit(vec![Variable(e.clone())]), + clauses: vec![OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(e)), + attribute: PatternNonValuePlace::Variable(Variable(a)), + value: PatternValuePlace::Variable(Variable(v)), + tx: PatternNonValuePlace::Placeholder, + }))], + })); + } + #[test] fn test_find_sp_variable() { let sym = edn::PlainSymbol::new("?x"); diff --git a/query-parser/tests/find_tests.rs b/query-parser/tests/find_tests.rs index f5ba7763..bfefe20f 100644 --- a/query-parser/tests/find_tests.rs +++ b/query-parser/tests/find_tests.rs @@ -12,16 +12,22 @@ extern crate mentat_query_parser; extern crate mentat_query; extern crate edn; -use edn::PlainSymbol; +use edn::{ + NamespacedKeyword, + PlainSymbol, +}; use mentat_query::{ Element, FindSpec, FnArg, + OrJoin, + OrWhereClause, Pattern, PatternNonValuePlace, PatternValuePlace, Predicate, + UnifyVars, Variable, WhereClause, }; @@ -54,3 +60,146 @@ fn can_parse_predicates() { ]}), ]); } + +#[test] +fn can_parse_simple_or() { + let s = "[:find ?x . :where (or [?x _ 10] [?x _ 15])]"; + let p = parse_find_string(s).unwrap(); + + assert_eq!(p.find_spec, + FindSpec::FindScalar(Element::Variable(Variable(PlainSymbol::new("?x"))))); + assert_eq!(p.where_clauses, + vec![ + WhereClause::OrJoin(OrJoin { + unify_vars: UnifyVars::Implicit, + clauses: vec![ + OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::EntidOrInteger(10), + tx: PatternNonValuePlace::Placeholder, + })), + OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::EntidOrInteger(15), + tx: PatternNonValuePlace::Placeholder, + })), + ], + }), + ]); +} + +#[test] +fn can_parse_unit_or_join() { + let s = "[:find ?x . :where (or-join [?x] [?x _ 15])]"; + let p = parse_find_string(s).unwrap(); + + assert_eq!(p.find_spec, + FindSpec::FindScalar(Element::Variable(Variable(PlainSymbol::new("?x"))))); + assert_eq!(p.where_clauses, + vec![ + WhereClause::OrJoin(OrJoin { + unify_vars: UnifyVars::Explicit(vec![Variable(PlainSymbol::new("?x"))]), + clauses: vec![ + OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::EntidOrInteger(15), + tx: PatternNonValuePlace::Placeholder, + })), + ], + }), + ]); +} + +#[test] +fn can_parse_simple_or_join() { + let s = "[:find ?x . :where (or-join [?x] [?x _ 10] [?x _ 15])]"; + let p = parse_find_string(s).unwrap(); + + assert_eq!(p.find_spec, + FindSpec::FindScalar(Element::Variable(Variable(PlainSymbol::new("?x"))))); + assert_eq!(p.where_clauses, + vec![ + WhereClause::OrJoin(OrJoin { + unify_vars: UnifyVars::Explicit(vec![Variable(PlainSymbol::new("?x"))]), + clauses: vec![ + OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::EntidOrInteger(10), + tx: PatternNonValuePlace::Placeholder, + })), + OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::EntidOrInteger(15), + tx: PatternNonValuePlace::Placeholder, + })), + ], + }), + ]); +} + +#[test] +fn can_parse_simple_or_and_join() { + let s = "[:find ?x . :where (or [?x _ 10] (and (or [?x :foo/bar ?y] [?x :foo/baz ?y]) [(< ?y 1)]))]"; + let p = parse_find_string(s).unwrap(); + + assert_eq!(p.find_spec, + FindSpec::FindScalar(Element::Variable(Variable(PlainSymbol::new("?x"))))); + assert_eq!(p.where_clauses, + vec![ + WhereClause::OrJoin(OrJoin { + unify_vars: UnifyVars::Implicit, + clauses: vec![ + OrWhereClause::Clause( + WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Placeholder, + value: PatternValuePlace::EntidOrInteger(10), + tx: PatternNonValuePlace::Placeholder, + })), + OrWhereClause::And( + vec![ + WhereClause::OrJoin(OrJoin { + unify_vars: UnifyVars::Implicit, + clauses: vec![ + OrWhereClause::Clause(WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "bar")), + value: PatternValuePlace::Variable(Variable(PlainSymbol::new("?y"))), + tx: PatternNonValuePlace::Placeholder, + })), + OrWhereClause::Clause(WhereClause::Pattern(Pattern { + source: None, + entity: PatternNonValuePlace::Variable(Variable(PlainSymbol::new("?x"))), + attribute: PatternNonValuePlace::Ident(NamespacedKeyword::new("foo", "baz")), + value: PatternValuePlace::Variable(Variable(PlainSymbol::new("?y"))), + tx: PatternNonValuePlace::Placeholder, + })), + ], + }), + + WhereClause::Pred(Predicate { operator: PlainSymbol::new("<"), args: vec![ + FnArg::Variable(Variable(PlainSymbol::new("?y"))), FnArg::EntidOrInteger(1), + ]}), + ], + ) + ], + }), + ]); +} \ No newline at end of file diff --git a/query/src/lib.rs b/query/src/lib.rs index a552425b..3bc0eaef 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -474,13 +474,53 @@ pub struct Predicate { pub args: Vec, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum UnifyVars { + /// `Implicit` means the variables in an `or` or `not` are derived from the enclosed pattern. + /// DataScript regards these vars as 'free': these variables don't need to be bound by the + /// enclosing environment. + /// + /// Datomic's documentation implies that all implicit variables are required: + /// + /// > Datomic will attempt to push the or clause down until all necessary variables are bound, + /// > and will throw an exception if that is not possible. + /// + /// but that would render top-level `or` expressions (as used in Datomic's own examples!) + /// impossible, so we assume that this is an error in the documentation. + /// + /// All contained 'arms' in an `or` with implicit variables must bind the same vars. + Implicit, + + /// `Explicit` means the variables in an `or-join` or `not-join` are explicitly listed, + /// specified with `required-vars` syntax. + /// + /// DataScript parses these as free, but allows (incorrectly) the use of more complicated + /// `rule-vars` syntax. + /// + /// Only the named variables will be unified with the enclosing query. + /// + /// Every 'arm' in an `or-join` must mention the entire set of explicit vars. + Explicit(Vec), +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum OrWhereClause { + Clause(WhereClause), + And(Vec), +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct OrJoin { + pub unify_vars: UnifyVars, + pub clauses: Vec, +} + #[allow(dead_code)] #[derive(Clone, Debug, Eq, PartialEq)] pub enum WhereClause { Not, NotJoin, - Or, - OrJoin, + OrJoin(OrJoin), Pred(Predicate), WhereFn, RuleExpr,