From dbbbd220f91b67cfea851ef693e1ccdbc5671aa7 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 5 Jun 2017 20:02:41 -0700 Subject: [PATCH 01/20] Pre: add helpers to ValueTypeSet. --- query-algebrizer/src/types.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index e26006c9..0a23802d 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -525,9 +525,27 @@ impl ValueTypeSet { pub fn of_numeric_types() -> ValueTypeSet { ValueTypeSet(EnumSet::of_both(ValueType::Double, ValueType::Long)) } + + /// Return a set containing `Ref` and `Keyword`. + pub fn of_keywords() -> ValueTypeSet { + ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Keyword)) + } + + /// Return a set containing `Ref` and `Long`. + pub fn of_longs() -> ValueTypeSet { + ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Long)) + } } impl ValueTypeSet { + pub fn insert(&mut self, vt: ValueType) -> bool { + self.0.insert(vt) + } + + pub fn len(&self) -> usize { + self.0.len() + } + /// Returns a set containing all the types in this set and `other`. pub fn union(&self, other: &ValueTypeSet) -> ValueTypeSet { ValueTypeSet(self.0.union(other.0)) From a10c6fc67af88a95635b164ea57005ff08245283 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Fri, 9 Jun 2017 20:13:13 -0700 Subject: [PATCH 02/20] Pre: make ValueTypeSet Copy, as it only newtypes EnumSet, which is Copy. --- query-algebrizer/src/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 0a23802d..387d0d0d 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -496,7 +496,7 @@ impl EnumSetExtensions for EnumSet { } } -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ValueTypeSet(pub EnumSet); impl Default for ValueTypeSet { From 08534a1a3a05a49bb2bc70f2f92950ab7b356c0a Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 5 Apr 2017 15:30:22 -0700 Subject: [PATCH 03/20] Pre: Handle SrcVar. --- query-algebrizer/src/clauses/resolve.rs | 4 +- query/src/lib.rs | 50 +++++++++++++++++++------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/query-algebrizer/src/clauses/resolve.rs b/query-algebrizer/src/clauses/resolve.rs index 9cf27807..b64ce9cd 100644 --- a/query-algebrizer/src/clauses/resolve.rs +++ b/query-algebrizer/src/clauses/resolve.rs @@ -50,7 +50,7 @@ impl ConjoiningClauses { }, // Can't be an entid. EntidOrInteger(i) => Ok(QueryValue::TypedValue(TypedValue::Long(i))), - Ident(_) | + IdentOrKeyword(_) | SrcVar(_) | Constant(NonIntegerConstant::Boolean(_)) | Constant(NonIntegerConstant::Text(_)) | @@ -78,7 +78,7 @@ impl ConjoiningClauses { .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name()))) }, EntidOrInteger(i) => Ok(QueryValue::PrimitiveLong(i)), - Ident(_) => unimplemented!(), // TODO + IdentOrKeyword(_) => unimplemented!(), // TODO Constant(NonIntegerConstant::Boolean(val)) => Ok(QueryValue::TypedValue(TypedValue::Boolean(val))), Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), Constant(NonIntegerConstant::Text(s)) => Ok(QueryValue::TypedValue(TypedValue::typed_string(s.as_str()))), diff --git a/query/src/lib.rs b/query/src/lib.rs index f12144bb..bc4134a6 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -174,7 +174,11 @@ impl FromValue for SrcVar { impl SrcVar { pub fn from_symbol(sym: &PlainSymbol) -> Option { if sym.is_src_symbol() { - Some(SrcVar::NamedSrc(sym.plain_name().to_string())) + if sym.0 == "$" { + Some(SrcVar::DefaultSrc) + } else { + Some(SrcVar::NamedSrc(sym.plain_name().to_string())) + } } else { None } @@ -210,22 +214,44 @@ pub enum FnArg { Variable(Variable), SrcVar(SrcVar), EntidOrInteger(i64), - Ident(NamespacedKeyword), + IdentOrKeyword(NamespacedKeyword), Constant(NonIntegerConstant), } impl FromValue for FnArg { fn from_value(v: &edn::ValueAndSpan) -> Option { - // TODO: support SrcVars. - Variable::from_value(v) - .and_then(|v| Some(FnArg::Variable(v))) - .or_else(|| { - println!("from_value {}", v.inner); - match v.inner { - edn::SpannedValue::Integer(i) => Some(FnArg::EntidOrInteger(i)), - edn::SpannedValue::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))), - _ => unimplemented!(), - }}) + use edn::SpannedValue::*; + match v.inner { + Integer(x) => + Some(FnArg::EntidOrInteger(x)), + PlainSymbol(ref x) if x.is_src_symbol() => + SrcVar::from_symbol(x).map(FnArg::SrcVar), + PlainSymbol(ref x) if x.is_var_symbol() => + Variable::from_symbol(x).map(FnArg::Variable), + PlainSymbol(_) => None, + NamespacedKeyword(ref x) => + Some(FnArg::IdentOrKeyword(x.clone())), + Instant(x) => + Some(FnArg::Constant(NonIntegerConstant::Instant(x))), + Uuid(x) => + Some(FnArg::Constant(NonIntegerConstant::Uuid(x))), + Boolean(x) => + Some(FnArg::Constant(NonIntegerConstant::Boolean(x))), + Float(x) => + Some(FnArg::Constant(NonIntegerConstant::Float(x))), + BigInteger(ref x) => + Some(FnArg::Constant(NonIntegerConstant::BigInteger(x.clone()))), + Text(ref x) => + // TODO: intern strings. #398. + Some(FnArg::Constant(NonIntegerConstant::Text(Rc::new(x.clone())))), + Nil | + NamespacedSymbol(_) | + Keyword(_) | + Vector(_) | + List(_) | + Set(_) | + Map(_) => None, + } } } From 9fe31d443d161b72d1ecde699fc5cc9475199eb8 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 19 Apr 2017 11:10:24 -0700 Subject: [PATCH 04/20] Pre: Accept EDN vectors in FnArg arguments. Datomic accepts mostly-arbitrary EDN, and it is actually used: for example, the following are all valid, and all mean different things: * `(ground 1 ?x)` * `(ground [1 2 3] [?x ?y ?z])` * `(ground [[1 2 3] [4 5 6]] [[?x ?y ?z]])` We could probably introduce new syntax that expresses these patterns while avoiding collection arguments, but I don't see one right now. I've elected to support only vectors for simplicity; I'm hoping to avoid parsing edn::Value in the query-algebrizer. --- query-algebrizer/src/clauses/resolve.rs | 4 +++- query-parser/src/parse.rs | 16 +++++++++++++++- query/src/lib.rs | 3 +++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/query-algebrizer/src/clauses/resolve.rs b/query-algebrizer/src/clauses/resolve.rs index b64ce9cd..58e0d902 100644 --- a/query-algebrizer/src/clauses/resolve.rs +++ b/query-algebrizer/src/clauses/resolve.rs @@ -56,7 +56,8 @@ impl ConjoiningClauses { Constant(NonIntegerConstant::Text(_)) | Constant(NonIntegerConstant::Uuid(_)) | Constant(NonIntegerConstant::Instant(_)) | // Instants are covered elsewhere. - Constant(NonIntegerConstant::BigInteger(_)) => { + Constant(NonIntegerConstant::BigInteger(_)) | + Vector(_) => { self.mark_known_empty(EmptyBecause::NonNumericArgument); bail!(ErrorKind::NonNumericArgument(function.clone(), position)); }, @@ -86,6 +87,7 @@ impl ConjoiningClauses { Constant(NonIntegerConstant::Instant(u)) => Ok(QueryValue::TypedValue(TypedValue::Instant(u))), Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(), SrcVar(_) => unimplemented!(), + Vector(_) => unimplemented!(), // TODO } } } diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index ab2d857c..f5a58447 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -134,7 +134,7 @@ def_parser!(Query, predicate_fn, PredicateFn, { }); def_parser!(Query, fn_arg, FnArg, { - satisfy_map(FnArg::from_value) + satisfy_map(FnArg::from_value).or(vector().of_exactly(many::, _>(Query::fn_arg())).map(FnArg::Vector)) }); def_parser!(Query, arguments, Vec, { @@ -777,4 +777,18 @@ mod test { let mut par = Query::natural_number(); assert_eq!(None, par.parse(input.atom_stream()).err()); } + + #[test] + fn test_fn_arg_collections() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let input = edn::Value::Vector(vec![edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), + edn::Value::PlainSymbol(vy.clone())])]); + + assert_parses_to!(|| vector().of_exactly(Query::fn_arg()), + input, + FnArg::Vector(vec![FnArg::Variable(variable(vx)), + FnArg::Variable(variable(vy)), + ])); + } } diff --git a/query/src/lib.rs b/query/src/lib.rs index bc4134a6..414090f0 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -216,6 +216,9 @@ pub enum FnArg { EntidOrInteger(i64), IdentOrKeyword(NamespacedKeyword), Constant(NonIntegerConstant), + // The collection values representable in EDN. There's no advantage to destructuring up front, + // since consumers will need to handle arbitrarily nested EDN themselves anyway. + Vector(Vec), } impl FromValue for FnArg { From 06bb8e99a7d4398cc5e94d70658f10241872cc99 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 19 Apr 2017 13:00:14 -0700 Subject: [PATCH 05/20] Pre: Add Values to query-sql. --- query-sql/src/lib.rs | 102 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 95 insertions(+), 7 deletions(-) diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 6ca151a3..15027bfd 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -152,6 +152,14 @@ pub enum TableOrSubquery { Table(SourceAlias), Union(Vec, TableAlias), Subquery(Box), + Values(Values, TableAlias), +} + +pub enum Values { + /// Like "VALUES (0, 1), (2, 3), ...". + Unnamed(Vec>), + /// Like "SELECT 0 AS x, SELECT 0 AS y WHERE 0 UNION ALL VALUES (0, 1), (2, 3), ...". + Named(Vec, Vec>), } pub enum FromClause { @@ -222,6 +230,7 @@ macro_rules! interpose { } } } + impl QueryFragment for ColumnOrExpression { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { use self::ColumnOrExpression::*; @@ -391,13 +400,63 @@ impl QueryFragment for TableOrSubquery { out.push_identifier(table_alias.as_str()) }, &Subquery(ref subquery) => { - subquery.push_sql(out)?; - Ok(()) + subquery.push_sql(out) + }, + &Values(ref values, ref table_alias) => { + // XXX: does this work for Values::Unnamed? + out.push_sql("("); + values.push_sql(out)?; + out.push_sql(") AS "); + out.push_identifier(table_alias.as_str()) }, } } } +impl QueryFragment for Values { + fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { + // There are at least 3 ways to name the columns of a VALUES table: + // 1) the columns are named "", ":1", ":2", ... -- but this is undocumented. See + // http://stackoverflow.com/a/40921724. + // 2) A CTE ("WITH" statement) can declare the shape of the table, like "WITH + // table_name(column_name, ...) AS (VALUES ...)". + // 3) We can "UNION ALL" a dummy "SELECT" statement in place. + // + // We don't want to use an undocumented SQLite quirk, and we're a little concerned that some + // SQL systems will not optimize WITH statements well. It's also convenient to have an in + // place table to query, so for now we implement option 3). + if let &Values::Named(ref names, _) = self { + let alias = &names[0]; + out.push_sql("SELECT 0 AS "); + out.push_identifier(alias.as_str())?; + + for alias in &names[1..] { + out.push_sql(", 0 AS "); + out.push_identifier(alias.as_str())?; + } + + out.push_sql(" WHERE 0 UNION ALL "); + } + + let values = match self { + &Values::Named(_, ref values) => values, + &Values::Unnamed(ref values) => values, + }; + + out.push_sql("VALUES "); + + interpose!(outer, values, + { out.push_sql("("); + interpose!(inner, outer, + { out.push_typed_value(inner)? }, + { out.push_sql(", ") }); + out.push_sql(")"); + }, + { out.push_sql(", ") }); + Ok(()) + } +} + impl QueryFragment for FromClause { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { use self::FromClause::*; @@ -494,7 +553,7 @@ mod tests { DatomsTable, }; - fn build_constraint(c: Constraint) -> String { + fn build(c: &QueryFragment) -> String { let mut builder = SQLiteQueryBuilder::new(); c.push_sql(&mut builder) .map(|_| builder.finish()) @@ -524,9 +583,9 @@ mod tests { ], }; - assert_eq!("`datoms01`.v IN ()", build_constraint(none)); - assert_eq!("`datoms01`.v IN (123)", build_constraint(one)); - assert_eq!("`datoms01`.v IN (123, 456, 789)", build_constraint(three)); + assert_eq!("`datoms01`.v IN ()", build(&none)); + assert_eq!("`datoms01`.v IN (123)", build(&one)); + assert_eq!("`datoms01`.v IN (123, 456, 789)", build(&three)); } #[test] @@ -552,7 +611,36 @@ mod tests { // Two sets of parens: the outermost AND only has one child, // but still contributes parens. - assert_eq!("((123 = 456 AND 789 = 246))", build_constraint(c)); + assert_eq!("((123 = 456 AND 789 = 246))", build(&c)); + } + + #[test] + fn test_unnamed_values() { + let build = |values| build(&Values::Unnamed(values)); + assert_eq!(build(vec![vec![TypedValue::Long(1)]]), + "VALUES (1)"); + + assert_eq!(build(vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)]]), + "VALUES (0, 1)"); + + assert_eq!(build(vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)], + vec![TypedValue::Boolean(true), TypedValue::Long(2)]]), + "VALUES (0, 1), (1, 2)"); + } + + #[test] + fn test_named_values() { + let build = |names: Vec<_>, values| build(&Values::Named(names.into_iter().map(Variable::from_valid_name).collect(), values)); + assert_eq!(build(vec!["?a"], vec![vec![TypedValue::Long(1)]]), + "SELECT 0 AS `?a` WHERE 0 UNION ALL VALUES (1)"); + + assert_eq!(build(vec!["?a", "?b"], vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)]]), + "SELECT 0 AS `?a`, 0 AS `?b` WHERE 0 UNION ALL VALUES (0, 1)"); + + assert_eq!(build(vec!["?a", "?b"], + vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)], + vec![TypedValue::Boolean(true), TypedValue::Long(2)]]), + "SELECT 0 AS `?a`, 0 AS `?b` WHERE 0 UNION ALL VALUES (0, 1), (1, 2)"); } #[test] From 63574af7ace4329feb9160011f8cba1ce133a678 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Fri, 2 Jun 2017 13:03:16 -0700 Subject: [PATCH 06/20] Pre: flatten the representation of VALUES. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single vec that's traversed in chunks is more efficient than multiple vecs… and this ensures that each sub-vec is the same size. --- query-sql/src/lib.rs | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 15027bfd..9a542007 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -157,9 +157,13 @@ pub enum TableOrSubquery { pub enum Values { /// Like "VALUES (0, 1), (2, 3), ...". - Unnamed(Vec>), + /// The vector must be of a length that is a multiple of the given size. + Unnamed(usize, Vec), + /// Like "SELECT 0 AS x, SELECT 0 AS y WHERE 0 UNION ALL VALUES (0, 1), (2, 3), ...". - Named(Vec, Vec>), + /// The vector of values must be of a length that is a multiple of the length + /// of the vector of names. + Named(Vec, Vec), } pub enum FromClause { @@ -220,7 +224,13 @@ fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult { /// without producing an intermediate string sequence. macro_rules! interpose { ( $name: pat, $across: expr, $body: block, $inter: block ) => { - let mut seq = $across.iter(); + interpose_iter!($name, $across.iter(), $body, $inter) + } +} + +macro_rules! interpose_iter { + ( $name: pat, $across: expr, $body: block, $inter: block ) => { + let mut seq = $across; if let Some($name) = seq.next() { $body; for $name in seq { @@ -439,20 +449,20 @@ impl QueryFragment for Values { } let values = match self { - &Values::Named(_, ref values) => values, - &Values::Unnamed(ref values) => values, + &Values::Named(ref names, ref values) => values.chunks(names.len()), + &Values::Unnamed(ref size, ref values) => values.chunks(*size), }; out.push_sql("VALUES "); - interpose!(outer, values, - { out.push_sql("("); - interpose!(inner, outer, - { out.push_typed_value(inner)? }, - { out.push_sql(", ") }); - out.push_sql(")"); - }, - { out.push_sql(", ") }); + interpose_iter!(outer, values, + { out.push_sql("("); + interpose!(inner, outer, + { out.push_typed_value(inner)? }, + { out.push_sql(", ") }); + out.push_sql(")"); + }, + { out.push_sql(", ") }); Ok(()) } } From 70c5bcfa9985e9132457cb0611c249cbcd64069c Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Fri, 2 Jun 2017 13:34:02 -0700 Subject: [PATCH 07/20] Pre: simplify values SQL expansion. This uses `interpose` instead of manual looping. --- query-sql/src/lib.rs | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 9a542007..470a8024 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -434,16 +434,13 @@ impl QueryFragment for Values { // // We don't want to use an undocumented SQLite quirk, and we're a little concerned that some // SQL systems will not optimize WITH statements well. It's also convenient to have an in - // place table to query, so for now we implement option 3). + // place table to query, so for now we implement option 3. if let &Values::Named(ref names, _) = self { - let alias = &names[0]; - out.push_sql("SELECT 0 AS "); - out.push_identifier(alias.as_str())?; - - for alias in &names[1..] { - out.push_sql(", 0 AS "); - out.push_identifier(alias.as_str())?; - } + out.push_sql("SELECT "); + interpose!(alias, names, + { out.push_sql("0 AS "); + out.push_identifier(alias.as_str())? }, + { out.push_sql(", ") }); out.push_sql(" WHERE 0 UNION ALL "); } @@ -626,30 +623,31 @@ mod tests { #[test] fn test_unnamed_values() { - let build = |values| build(&Values::Unnamed(values)); - assert_eq!(build(vec![vec![TypedValue::Long(1)]]), + let build = |len, values| build(&Values::Unnamed(len, values)); + + assert_eq!(build(1, vec![TypedValue::Long(1)]), "VALUES (1)"); - assert_eq!(build(vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)]]), + assert_eq!(build(2, vec![TypedValue::Boolean(false), TypedValue::Long(1)]), "VALUES (0, 1)"); - assert_eq!(build(vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)], - vec![TypedValue::Boolean(true), TypedValue::Long(2)]]), + assert_eq!(build(2, vec![TypedValue::Boolean(false), TypedValue::Long(1), + TypedValue::Boolean(true), TypedValue::Long(2)]), "VALUES (0, 1), (1, 2)"); } #[test] fn test_named_values() { let build = |names: Vec<_>, values| build(&Values::Named(names.into_iter().map(Variable::from_valid_name).collect(), values)); - assert_eq!(build(vec!["?a"], vec![vec![TypedValue::Long(1)]]), + assert_eq!(build(vec!["?a"], vec![TypedValue::Long(1)]), "SELECT 0 AS `?a` WHERE 0 UNION ALL VALUES (1)"); - assert_eq!(build(vec!["?a", "?b"], vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)]]), + assert_eq!(build(vec!["?a", "?b"], vec![TypedValue::Boolean(false), TypedValue::Long(1)]), "SELECT 0 AS `?a`, 0 AS `?b` WHERE 0 UNION ALL VALUES (0, 1)"); assert_eq!(build(vec!["?a", "?b"], - vec![vec![TypedValue::Boolean(false), TypedValue::Long(1)], - vec![TypedValue::Boolean(true), TypedValue::Long(2)]]), + vec![TypedValue::Boolean(false), TypedValue::Long(1), + TypedValue::Boolean(true), TypedValue::Long(2)]), "SELECT 0 AS `?a`, 0 AS `?b` WHERE 0 UNION ALL VALUES (0, 1), (1, 2)"); } From 002c918c96923ed4636d3f2d39e125e4e65ad3ac Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 26 Apr 2017 10:17:15 -0700 Subject: [PATCH 08/20] Pre: Move PushComputed up module hierarchy; make it public. --- query-algebrizer/src/clauses/mod.rs | 12 ++++++++++++ query-algebrizer/src/clauses/or.rs | 19 +++++-------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 5b47beff..0564083a 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -841,6 +841,18 @@ impl ConjoiningClauses { } } +pub trait PushComputed { + fn push_computed(&mut self, item: ComputedTable) -> DatomsTable; +} + +impl PushComputed for Vec { + fn push_computed(&mut self, item: ComputedTable) -> DatomsTable { + let next_index = self.len(); + self.push(item); + DatomsTable::Computed(next_index) + } +} + // These are helpers that tests use to build Schema instances. #[cfg(test)] fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index 971ff010..e619cdfc 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -29,7 +29,10 @@ use mentat_query::{ WhereClause, }; -use clauses::ConjoiningClauses; +use clauses::{ + ConjoiningClauses, + PushComputed, +}; use errors::{ Result, @@ -74,18 +77,6 @@ fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePla } } -trait PushComputed { - fn push_computed(&mut self, item: ComputedTable) -> DatomsTable; -} - -impl PushComputed for Vec { - fn push_computed(&mut self, item: ComputedTable) -> DatomsTable { - let next_index = self.len(); - self.push(item); - DatomsTable::Computed(next_index) - } -} - pub enum DeconstructedOrJoin { KnownSuccess, KnownEmpty(EmptyBecause), @@ -1119,4 +1110,4 @@ mod testing { [_ :foo/height ?x]]"#; compare_ccs(alg(&schema, query), alg(&schema, simple)); } -} \ No newline at end of file +} From 2f38f1e73e86da2f7e674842ea6f97db682bf4ea Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 26 Apr 2017 15:17:02 -0700 Subject: [PATCH 09/20] Pre: Make it easier to debug binding errors. --- query-projector/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index 358667e5..64988b19 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -164,7 +164,7 @@ fn candidate_column(query: &AlgebraicQuery, var: &Variable) -> (ColumnOrExpressi let columns = query.cc .column_bindings .get(var) - .expect("Every variable has a binding"); + .expect(format!("Every variable should have a binding, but {:?} does not", var).as_str()); let qa = columns[0].clone(); let name = VariableColumn::Variable(var.clone()).column_name(); @@ -496,4 +496,4 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection { }, } } -} \ No newline at end of file +} From 4d2eb7222ef8bc7d0538b601635fbe082bfd1d9c Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 26 Apr 2017 15:31:09 -0700 Subject: [PATCH 10/20] Pre: Generalize NonNumericArgument to InvalidArgument. --- query-algebrizer/src/clauses/resolve.rs | 2 +- query-algebrizer/src/errors.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/query-algebrizer/src/clauses/resolve.rs b/query-algebrizer/src/clauses/resolve.rs index 58e0d902..bb8ae199 100644 --- a/query-algebrizer/src/clauses/resolve.rs +++ b/query-algebrizer/src/clauses/resolve.rs @@ -59,7 +59,7 @@ impl ConjoiningClauses { Constant(NonIntegerConstant::BigInteger(_)) | Vector(_) => { self.mark_known_empty(EmptyBecause::NonNumericArgument); - bail!(ErrorKind::NonNumericArgument(function.clone(), position)); + bail!(ErrorKind::InvalidArgument(function.clone(), "numeric", position)); }, Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), } diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index b76f5ac5..0c93011d 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -42,9 +42,10 @@ error_chain! { display("unbound variable: {}", name) } - NonNumericArgument(function: PlainSymbol, position: usize) { + + InvalidArgument(function: PlainSymbol, expected_type: &'static str, position: usize) { description("invalid argument") - display("invalid argument to {}: expected numeric in position {}.", function, position) + display("invalid argument to {}: expected {} in position {}.", function, expected_type, position) } InvalidLimit(val: String, kind: ValueType) { From 13e27c83e27b0a2fca2da15b39fc0f3140b855d3 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 26 Apr 2017 09:50:09 -0700 Subject: [PATCH 11/20] Pre: Modify predicate implementation in preparation for functions that bind. --- query-algebrizer/src/clauses/predicate.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index fca10e9a..e0c56b54 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -30,15 +30,13 @@ use types::{ /// Application of predicates. impl ConjoiningClauses { - /// There are several kinds of predicates/functions in our Datalog: + /// There are several kinds of predicates in our Datalog: /// - A limited set of binary comparison operators: < > <= >= !=. /// These are converted into SQLite binary comparisons and some type constraints. - /// - A set of predicates like `fulltext` and `get-else` that are translated into - /// SQL `MATCH`es or joins, yielding bindings. /// - In the future, some predicates that are implemented via function calls in SQLite. /// /// At present we have implemented only the five built-in comparison binary operators. - pub fn apply_predicate<'s, 'p>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { + pub fn apply_predicate<'s>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { // Because we'll be growing the set of built-in predicates, handling each differently, // and ultimately allowing user-specified predicates, we match on the predicate name first. if let Some(op) = NumericComparison::from_datalog_operator(predicate.operator.0.as_str()) { @@ -53,7 +51,7 @@ impl ConjoiningClauses { /// - Ensures that the predicate functions name a known operator. /// - Accumulates a `NumericInequality` constraint into the `wheres` list. #[allow(unused_variables)] - pub fn apply_numeric_predicate<'s, 'p>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { + pub fn apply_numeric_predicate<'s>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { if predicate.args.len() != 2 { bail!(ErrorKind::InvalidNumberOfArguments(predicate.operator.clone(), predicate.args.len(), 2)); } From 899e5d097184e889566988b1437d837db0aa7139 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 5 Jun 2017 19:59:53 -0700 Subject: [PATCH 12/20] Pre: add ConjoiningClauses::bind_value. --- query-algebrizer/src/clauses/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 0564083a..e4191fc1 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -330,6 +330,12 @@ impl ConjoiningClauses { } impl ConjoiningClauses { + /// Be careful with this. It'll overwrite existing bindings. + pub fn bind_value(&mut self, var: &Variable, value: TypedValue) { + self.constrain_var_to_type(var.clone(), value.value_type()); + self.value_bindings.insert(var.clone(), value); + } + pub fn bound_value(&self, var: &Variable) -> Option { self.value_bindings.get(var).cloned() } From 9ac2b8c68082f4153b7a88227106d60be275f406 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 5 Jun 2017 20:00:06 -0700 Subject: [PATCH 13/20] Pre: add ConjoiningClauses::known_type_set. --- query-algebrizer/src/clauses/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index e4191fc1..eabe925a 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -356,6 +356,10 @@ impl ConjoiningClauses { } } + pub fn known_type_set(&self, var: &Variable) -> ValueTypeSet { + self.known_types.get(var).cloned().unwrap_or(ValueTypeSet::any()) + } + pub fn bind_column_to_var>(&mut self, schema: &Schema, table: TableAlias, column: C, var: Variable) { let column = column.into(); // Do we have an external binding for this? From 4a886aae170e29f44cdbb270706707ec7299d2fb Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Tue, 6 Jun 2017 09:24:36 -0700 Subject: [PATCH 14/20] Pre: derive Debug. --- query-algebrizer/src/lib.rs | 2 +- query-algebrizer/src/types.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index a890d065..504a6e14 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -51,7 +51,7 @@ pub use clauses::{ QueryInputs, }; -#[allow(dead_code)] +#[derive(Debug)] pub struct AlgebraicQuery { default_source: SrcVar, pub find_spec: FindSpec, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 387d0d0d..f4761780 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -228,6 +228,7 @@ impl Debug for QueryValue { /// Represents an entry in the ORDER BY list: a variable or a variable's type tag. /// (We require order vars to be projected, so we can simply use a variable here.) +#[derive(Debug)] pub struct OrderBy(pub Direction, pub VariableColumn); impl From for OrderBy { From d30ad428e83b8d1772c13d6186336739f7a9ad51 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Fri, 2 Jun 2017 15:34:53 -0700 Subject: [PATCH 15/20] Pre: take a dependency on maplit to allow BTreeSet literals. --- query-algebrizer/Cargo.toml | 5 ++++- query-parser/Cargo.toml | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/query-algebrizer/Cargo.toml b/query-algebrizer/Cargo.toml index bfa9ed84..38a30fa5 100644 --- a/query-algebrizer/Cargo.toml +++ b/query-algebrizer/Cargo.toml @@ -15,4 +15,7 @@ path = "../query" # Only for tests. [dev-dependencies.mentat_query_parser] -path = "../query-parser" \ No newline at end of file +path = "../query-parser" + +[dev-dependencies] +maplit = "0.1" diff --git a/query-parser/Cargo.toml b/query-parser/Cargo.toml index 96f0599b..1b5fd7c2 100644 --- a/query-parser/Cargo.toml +++ b/query-parser/Cargo.toml @@ -6,6 +6,7 @@ workspace = ".." [dependencies] combine = "2.3.2" error-chain = "0.8.1" +maplit = "0.1" matches = "0.1" [dependencies.edn] From c6e933c396dc1abe894f66c1e8df4a9a8c121304 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Fri, 2 Jun 2017 15:36:12 -0700 Subject: [PATCH 16/20] Pre: make rule_vars return unique vars. --- query-algebrizer/src/clauses/not.rs | 2 +- query-algebrizer/src/clauses/or.rs | 2 +- query-algebrizer/src/lib.rs | 4 ++++ query-algebrizer/src/validate.rs | 6 ++--- query-parser/src/lib.rs | 3 +++ query-parser/src/parse.rs | 36 +++++++++++++++-------------- query-parser/tests/find_tests.rs | 7 ++++-- query/src/lib.rs | 2 +- 8 files changed, 37 insertions(+), 25 deletions(-) diff --git a/query-algebrizer/src/clauses/not.rs b/query-algebrizer/src/clauses/not.rs index 45723c51..2c2a57cf 100644 --- a/query-algebrizer/src/clauses/not.rs +++ b/query-algebrizer/src/clauses/not.rs @@ -32,7 +32,7 @@ impl ConjoiningClauses { pub fn apply_not_join(&mut self, schema: &Schema, not_join: NotJoin) -> Result<()> { let unified = match not_join.unify_vars { UnifyVars::Implicit => not_join.collect_mentioned_variables(), - UnifyVars::Explicit(vs) => vs.into_iter().collect(), + UnifyVars::Explicit(vs) => vs, }; let mut template = self.use_as_template(&unified); diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index e619cdfc..d4dfe2d9 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -552,7 +552,7 @@ impl ConjoiningClauses { let (join_clauses, unify_vars, mentioned_vars) = or_join.dismember(); let projected = match unify_vars { UnifyVars::Implicit => mentioned_vars.into_iter().collect(), - UnifyVars::Explicit(vs) => vs.into_iter().collect(), + UnifyVars::Explicit(vs) => vs, }; let template = self.use_as_template(&projected); diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 504a6e14..9af4f0c2 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -13,6 +13,10 @@ extern crate enum_set; #[macro_use] extern crate error_chain; +#[cfg(test)] +#[macro_use] +extern crate maplit; + extern crate mentat_core; extern crate mentat_query; diff --git a/query-algebrizer/src/validate.rs b/query-algebrizer/src/validate.rs index 82fcc327..e5bb0df0 100644 --- a/query-algebrizer/src/validate.rs +++ b/query-algebrizer/src/validate.rs @@ -212,7 +212,7 @@ mod tests { (and [?artist :artist/type ?type] [?type :artist/role :artist.role/parody]))]"#; let parsed = parse_find_string(query).expect("expected successful parse"); - let clauses = valid_or_join(parsed, UnifyVars::Explicit(vec![Variable::from_valid_name("?artist")])); + let clauses = valid_or_join(parsed, UnifyVars::Explicit(btreeset!{Variable::from_valid_name("?artist")})); // Let's do some detailed parse checks. let mut arms = clauses.into_iter(); @@ -322,7 +322,7 @@ mod tests { [?release :release/artists ?artist] [?release :release/year 1970])]"#; let parsed = parse_find_string(query).expect("expected successful parse"); - let clauses = valid_not_join(parsed, UnifyVars::Explicit(vec![Variable::from_valid_name("?artist")])); + let clauses = valid_not_join(parsed, UnifyVars::Explicit(btreeset!{Variable::from_valid_name("?artist")})); let release = PatternNonValuePlace::Variable(Variable::from_valid_name("?release")); let artist = PatternValuePlace::Variable(Variable::from_valid_name("?artist")); @@ -375,4 +375,4 @@ mod tests { _ => panic!(), } } -} \ No newline at end of file +} diff --git a/query-parser/src/lib.rs b/query-parser/src/lib.rs index 8158d279..1c035276 100644 --- a/query-parser/src/lib.rs +++ b/query-parser/src/lib.rs @@ -10,6 +10,9 @@ #![allow(unused_imports)] +#[macro_use] +extern crate maplit; + #[macro_use] extern crate error_chain; diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index f5a58447..46f12751 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -197,9 +197,9 @@ def_matches_plain_symbol!(Where, not, "not"); def_matches_plain_symbol!(Where, not_join, "not-join"); -def_parser!(Where, rule_vars, Vec, { +def_parser!(Where, rule_vars, BTreeSet, { seq() - .of_exactly(many1(Query::variable())) + .of_exactly(many1(Query::variable()).and_then(unique_vars)) }); def_parser!(Where, or_pattern_clause, OrWhereClause, { @@ -392,18 +392,20 @@ def_parser!(Find, spec, FindSpec, { &mut try(Find::find_rel())]) }); +fn unique_vars(vars: Vec) -> std::result::Result, combine::primitives::Error> { + let given = vars.len(); + let set: BTreeSet = vars.into_iter().collect(); + if given != set.len() { + // TODO: find out what the variable is! + let e = Box::new(Error::from_kind(ErrorKind::DuplicateVariableError)); + Err(combine::primitives::Error::Other(e)) + } else { + Ok(set) + } +} + def_parser!(Find, vars, BTreeSet, { - many(Query::variable()).and_then(|vars: Vec| { - let given = vars.len(); - let set: BTreeSet = vars.into_iter().collect(); - if given != set.len() { - // TODO: find out what the variable is! - let e = Box::new(Error::from_kind(ErrorKind::DuplicateVariableError)); - Err(combine::primitives::Error::Other(e)) - } else { - Ok(set) - } - }) + many(Query::variable()).and_then(unique_vars) }); /// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find @@ -573,7 +575,7 @@ mod test { let e = edn::PlainSymbol::new("?e"); let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone())]); assert_parses_to!(Where::rule_vars, input, - vec![variable(e.clone())]); + btreeset!{variable(e.clone())}); } #[test] @@ -583,7 +585,7 @@ mod test { let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(e.clone()), edn::Value::PlainSymbol(f.clone()),]); assert_parses_to!(|| vector().of_exactly(Find::vars()), input, - vec![variable(e.clone()), variable(f.clone())].into_iter().collect()); + btreeset!{variable(e.clone()), variable(f.clone())}); let g = edn::PlainSymbol::new("?g"); let input = edn::Value::Vector(vec![edn::Value::PlainSymbol(g.clone()), @@ -642,7 +644,7 @@ mod test { edn::Value::PlainSymbol(v.clone())])].into_iter().collect()); assert_parses_to!(Where::or_join_clause, input, WhereClause::OrJoin( - OrJoin::new(UnifyVars::Explicit(vec![variable(e.clone())]), + OrJoin::new(UnifyVars::Explicit(btreeset!{variable(e.clone())}), vec![OrWhereClause::Clause( WhereClause::Pattern(Pattern { source: None, @@ -685,7 +687,7 @@ mod test { "(not-join [?e] [?e ?a ?v])", WhereClause::NotJoin( NotJoin { - unify_vars: UnifyVars::Explicit(vec![variable(e.clone())]), + unify_vars: UnifyVars::Explicit(btreeset!{variable(e.clone())}), clauses: vec![WhereClause::Pattern(Pattern { source: None, entity: PatternNonValuePlace::Variable(variable(e)), diff --git a/query-parser/tests/find_tests.rs b/query-parser/tests/find_tests.rs index be8ddd6c..d0873bce 100644 --- a/query-parser/tests/find_tests.rs +++ b/query-parser/tests/find_tests.rs @@ -8,6 +8,9 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +#[macro_use] +extern crate maplit; + extern crate edn; extern crate mentat_core; extern crate mentat_query; @@ -111,7 +114,7 @@ fn can_parse_unit_or_join() { assert_eq!(p.where_clauses, vec![ WhereClause::OrJoin(OrJoin::new( - UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), + UnifyVars::Explicit(btreeset!{Variable::from_valid_name("?x")}), vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { @@ -136,7 +139,7 @@ fn can_parse_simple_or_join() { assert_eq!(p.where_clauses, vec![ WhereClause::OrJoin(OrJoin::new( - UnifyVars::Explicit(vec![Variable::from_valid_name("?x")]), + UnifyVars::Explicit(btreeset!{Variable::from_valid_name("?x")}), vec![ OrWhereClause::Clause( WhereClause::Pattern(Pattern { diff --git a/query/src/lib.rs b/query/src/lib.rs index 414090f0..fd4f74f8 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -615,7 +615,7 @@ pub enum UnifyVars { /// Only the named variables will be unified with the enclosing query. /// /// Every 'arm' in an `or-join` must mention the entire set of explicit vars. - Explicit(Vec), + Explicit(BTreeSet), } impl WhereClause { From b9cbf92205fa0d0ceda5bd7e9dcd9dc2ad7380ca Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 3 Apr 2017 16:46:11 -0700 Subject: [PATCH 17/20] Part 1: Parse functions in where clauses. --- query-parser/src/parse.rs | 151 ++++++++++++++++++++++++++++++++++++++ query/src/lib.rs | 58 ++++++++++++++- 2 files changed, 207 insertions(+), 2 deletions(-) diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 46f12751..3776429c 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -38,6 +38,7 @@ use self::mentat_parser_utils::value_and_span::{ }; use self::mentat_query::{ + Binding, Direction, Element, FindQuery, @@ -57,7 +58,9 @@ use self::mentat_query::{ SrcVar, UnifyVars, Variable, + VariableOrPlaceholder, WhereClause, + WhereFn, }; error_chain! { @@ -279,6 +282,25 @@ def_parser!(Where, pred, WhereClause, { }))) }); +/// A vector containing a parenthesized function expression and a binding. +def_parser!(Where, where_fn, WhereClause, { + // Accept either a nested list or a nested vector here: + // `[(foo ?x ?y) binding]` or `[[foo ?x ?y] binding]` + vector() + .of_exactly( + (seq().of_exactly( + (Query::predicate_fn(), Query::arguments())), + Bind::binding()) + .map(|((f, args), binding)| { + WhereClause::WhereFn( + WhereFn { + operator: f.0, + args: args, + binding: binding, + }) + })) +}); + def_parser!(Where, pattern, WhereClause, { vector() .of_exactly( @@ -331,6 +353,7 @@ def_parser!(Where, clause, WhereClause, { try(Where::not_clause()), try(Where::pred()), + try(Where::where_fn()), ]) }); @@ -345,6 +368,8 @@ def_matches_plain_symbol!(Find, period, "."); def_matches_plain_symbol!(Find, ellipsis, "..."); +def_matches_plain_symbol!(Find, placeholder, "_"); + def_parser!(Find, find_scalar, FindSpec, { Query::variable() .skip(Find::period()) @@ -448,6 +473,48 @@ def_parser!(Find, query, FindQuery, { }) }); +pub struct Bind<'a>(std::marker::PhantomData<&'a ()>); + +def_parser!(Bind, bind_scalar, Binding, { + Query::variable() + .skip(eof()) + .map(|var| Binding::BindScalar(var)) +}); + +def_parser!(Bind, variable_or_placeholder, VariableOrPlaceholder, { + Query::variable().map(VariableOrPlaceholder::Variable) + .or(Find::placeholder().map(|_| VariableOrPlaceholder::Placeholder)) +}); + +def_parser!(Bind, bind_coll, Binding, { + vector() + .of_exactly(Query::variable() + .skip(Find::ellipsis())) + .map(Binding::BindColl) +}); + +def_parser!(Bind, bind_rel, Binding, { + vector().of_exactly( + vector().of_exactly( + many1::, _>(Bind::variable_or_placeholder()) + .map(Binding::BindRel))) +}); + +def_parser!(Bind, bind_tuple, Binding, { + vector().of_exactly( + many1::, _>(Bind::variable_or_placeholder()) + .map(Binding::BindTuple)) +}); + +def_parser!(Bind, binding, Binding, { + // Any one of the four binding types might apply, so we combine them with `choice`. Our parsers + // consume input, so we need to wrap them in `try` so that they operate independently. + choice([try(Bind::bind_scalar()), + try(Bind::bind_coll()), + try(Bind::bind_tuple()), + try(Bind::bind_rel())]) +}); + pub fn parse_find_string(string: &str) -> Result { let expr = edn::parse::value(string)?; Find::query() @@ -467,6 +534,7 @@ mod test { use self::combine::Parser; use self::edn::OrderedFloat; use self::mentat_query::{ + Binding, Element, FindSpec, NonIntegerConstant, @@ -475,6 +543,7 @@ mod test { PatternValuePlace, SrcVar, Variable, + VariableOrPlaceholder, }; use super::*; @@ -793,4 +862,86 @@ mod test { FnArg::Variable(variable(vy)), ])); } + + #[test] + fn test_bind_scalar() { + let vx = edn::PlainSymbol::new("?x"); + assert_edn_parses_to!(|| list().of_exactly(Bind::binding()), + "(?x)", + Binding::BindScalar(variable(vx))); + } + + #[test] + fn test_bind_coll() { + let vx = edn::PlainSymbol::new("?x"); + assert_edn_parses_to!(|| list().of_exactly(Bind::binding()), + "([?x ...])", + Binding::BindColl(variable(vx))); + } + + #[test] + fn test_bind_rel() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let vw = edn::PlainSymbol::new("?w"); + assert_edn_parses_to!(|| list().of_exactly(Bind::binding()), + "([[?x ?y _ ?w]])", + Binding::BindRel(vec![VariableOrPlaceholder::Variable(variable(vx)), + VariableOrPlaceholder::Variable(variable(vy)), + VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(variable(vw)), + ])); + } + + #[test] + fn test_bind_tuple() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let vw = edn::PlainSymbol::new("?w"); + assert_edn_parses_to!(|| list().of_exactly(Bind::binding()), + "([?x ?y _ ?w])", + Binding::BindTuple(vec![VariableOrPlaceholder::Variable(variable(vx)), + VariableOrPlaceholder::Variable(variable(vy)), + VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(variable(vw)), + ])); + } + + #[test] + fn test_where_fn() { + assert_edn_parses_to!(Where::where_fn, + "[(f ?x 1) ?y]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![FnArg::Variable(Variable::from_valid_name("?x")), + FnArg::EntidOrInteger(1)], + binding: Binding::BindScalar(Variable::from_valid_name("?y")), + })); + + assert_edn_parses_to!(Where::where_fn, + "[(f ?x) [?y ...]]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![FnArg::Variable(Variable::from_valid_name("?x"))], + binding: Binding::BindColl(Variable::from_valid_name("?y")), + })); + + assert_edn_parses_to!(Where::where_fn, + "[(f) [?y _]]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![], + binding: Binding::BindTuple(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?y")), + VariableOrPlaceholder::Placeholder]), + })); + + assert_edn_parses_to!(Where::where_fn, + "[(f) [[_ ?y]]]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![], + binding: Binding::BindRel(vec![VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(Variable::from_valid_name("?y"))]), + })); + } } diff --git a/query/src/lib.rs b/query/src/lib.rs index fd4f74f8..abd35868 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -534,6 +534,25 @@ impl FindSpec { } } +// Datomic accepts variable or placeholder. DataScript accepts recursive bindings. Mentat sticks +// to the non-recursive form Datomic accepts, which is much simpler to process. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum VariableOrPlaceholder { + Placeholder, + Variable(Variable), +} + +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum Binding { + BindRel(Vec), + + BindColl(Variable), + + BindTuple(Vec), + + BindScalar(Variable), +} + // Note that the "implicit blank" rule applies. // A pattern with a reversed attribute — :foo/_bar — is reversed // at the point of parsing. These `Pattern` instances only represent @@ -589,6 +608,13 @@ pub struct Predicate { pub args: Vec, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct WhereFn { + pub operator: PlainSymbol, + pub args: Vec, + pub binding: Binding, +} + #[derive(Clone, Debug, Eq, PartialEq)] pub enum UnifyVars { /// `Implicit` means the variables in an `or` or `not` are derived from the enclosed pattern. @@ -664,7 +690,7 @@ pub enum WhereClause { NotJoin(NotJoin), OrJoin(OrJoin), Pred(Predicate), - WhereFn, + WhereFn(WhereFn), RuleExpr, Pattern(Pattern), } @@ -730,7 +756,7 @@ impl ContainsVariables for WhereClause { &Pred(ref p) => p.accumulate_mentioned_variables(acc), &Pattern(ref p) => p.accumulate_mentioned_variables(acc), &NotJoin(ref n) => n.accumulate_mentioned_variables(acc), - &WhereFn => (), + &WhereFn(ref f) => f.accumulate_mentioned_variables(acc), &RuleExpr => (), } } @@ -794,6 +820,34 @@ impl ContainsVariables for Predicate { } } +impl ContainsVariables for Binding { + fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { + match self { + &Binding::BindScalar(ref v) | &Binding::BindColl(ref v) => { + acc_ref(acc, v) + }, + &Binding::BindRel(ref vs) | &Binding::BindTuple(ref vs) => { + for v in vs { + if let &VariableOrPlaceholder::Variable(ref v) = v { + acc_ref(acc, v); + } + } + }, + } + } +} + +impl ContainsVariables for WhereFn { + fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { + for arg in &self.args { + if let &FnArg::Variable(ref v) = arg { + acc_ref(acc, v) + } + } + self.binding.accumulate_mentioned_variables(acc); + } +} + fn acc_ref(acc: &mut BTreeSet, v: &T) { // Roll on, reference entries! if !acc.contains(v) { From d04d22a6a693a181e3be98770b2eb4c02142cdcf Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 7 Jun 2017 10:59:25 -0700 Subject: [PATCH 18/20] Part 2: refactor projector to be reusable from translator. This allows the translator to also use bound values in nested queries. --- query-projector/src/lib.rs | 50 +++++++++++++++++++---------- query-translator/src/translate.rs | 53 ++++++++++++++++--------------- 2 files changed, 60 insertions(+), 43 deletions(-) diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index 64988b19..b9922731 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -28,6 +28,7 @@ use rusqlite::{ use mentat_core::{ SQLValueType, TypedValue, + ValueType, }; use mentat_db::{ @@ -44,6 +45,7 @@ use mentat_query::{ use mentat_query_algebrizer::{ AlgebraicQuery, ColumnName, + ConjoiningClauses, VariableColumn, }; @@ -157,29 +159,43 @@ impl TypedIndex { } } -fn candidate_column(query: &AlgebraicQuery, var: &Variable) -> (ColumnOrExpression, Name) { +fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) { // Every variable should be bound by the top-level CC to at least // one column in the query. If that constraint is violated it's a // bug in our code, so it's appropriate to panic here. - let columns = query.cc - .column_bindings - .get(var) - .expect(format!("Every variable should have a binding, but {:?} does not", var).as_str()); + let columns = cc.column_bindings + .get(var) + .expect(format!("Every variable should have a binding, but {:?} does not", var).as_str()); let qa = columns[0].clone(); let name = VariableColumn::Variable(var.clone()).column_name(); (ColumnOrExpression::Column(qa), name) } -fn candidate_type_column(query: &AlgebraicQuery, var: &Variable) -> (ColumnOrExpression, Name) { - let extracted_alias = query.cc - .extracted_types - .get(var) - .expect("Every variable has a known type or an extracted type"); +fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> (ColumnOrExpression, Name) { + let extracted_alias = cc.extracted_types + .get(var) + .expect("Every variable has a known type or an extracted type"); let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name(); (ColumnOrExpression::Column(extracted_alias.clone()), type_name) } +/// Return the projected column -- that is, a value or SQL column and an associated name -- for a +/// given variable. Also return the type, if known. +/// Callers are expected to determine whether to project a type tag as an additional SQL column. +pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> (ProjectedColumn, Option) { + if let Some(value) = cc.bound_value(&var) { + // If we already know the value, then our lives are easy. + let tag = value.value_type(); + let name = VariableColumn::Variable(var.clone()).column_name(); + (ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), Some(tag)) + } else { + // If we don't, then the CC *must* have bound the variable. + let (column, name) = candidate_column(cc, var); + (ProjectedColumn(column, name), cc.known_type(var)) + } +} + /// Walk an iterator of `Element`s, collecting projector templates and columns. /// /// Returns a pair: the SQL projection (which should always be a `Projection::Columns`) @@ -211,10 +227,10 @@ fn project_elements<'a, I: IntoIterator>( // If we're projecting this, we don't need it in :with. with.remove(var); - let (column, name) = candidate_column(query, var); - cols.push(ProjectedColumn(column, name)); - if let Some(t) = query.cc.known_type(var) { - let tag = t.value_type_tag(); + let (projected_column, maybe_type) = projected_column_for_var(&var, &query.cc); + cols.push(projected_column); + if let Some(ty) = maybe_type { + let tag = ty.value_type_tag(); templates.push(TypedIndex::Known(i, tag)); i += 1; // We used one SQL column. } else { @@ -222,7 +238,7 @@ fn project_elements<'a, I: IntoIterator>( i += 2; // We used two SQL columns. // Also project the type from the SQL query. - let (type_column, type_name) = candidate_type_column(query, &var); + let (type_column, type_name) = candidate_type_column(&query.cc, &var); cols.push(ProjectedColumn(type_column, type_name)); } } @@ -233,10 +249,10 @@ fn project_elements<'a, I: IntoIterator>( // We need to collect these into the SQL column list, but they don't affect projection. // If a variable is of a non-fixed type, also project the type tag column, so we don't // accidentally unify across types when considering uniqueness! - let (column, name) = candidate_column(query, &var); + let (column, name) = candidate_column(&query.cc, &var); cols.push(ProjectedColumn(column, name)); if query.cc.known_type(&var).is_none() { - let (type_column, type_name) = candidate_type_column(query, &var); + let (type_column, type_name) = candidate_type_column(&query.cc, &var); cols.push(ProjectedColumn(type_column, type_name)); } } diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 0dfd28a1..f755af65 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -38,6 +38,7 @@ use mentat_query_algebrizer::{ use mentat_query_projector::{ CombinedProjection, Projector, + projected_column_for_var, query_projection, }; @@ -201,33 +202,33 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq // project it as the variable name. // E.g., SELECT datoms03.v AS `?x`. for var in projection.iter() { - let col = cc.column_bindings.get(&var).unwrap()[0].clone(); - let proj = ProjectedColumn(ColumnOrExpression::Column(col), var.to_string()); - columns.push(proj); - } + let (projected_column, maybe_type) = projected_column_for_var(var, &cc); + columns.push(projected_column); - // Similarly, project type tags if they're not known conclusively in the - // outer query. - for var in type_extraction.iter() { - let expression = - if let Some(known) = cc.known_type(var) { - // If we know the type for sure, just project the constant. - // SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag` - ColumnOrExpression::Integer(known.value_type_tag()) - } else { - // Otherwise, we'll have an established type binding! This'll be - // either a datoms table or, recursively, a subquery. Project - // this: - // SELECT datoms03.v AS `?x`, - // datoms03.value_type_tag AS `?x_value_type_tag` - let extract = cc.extracted_types - .get(var) - .expect("Expected variable to have a known type or an extracted type"); - ColumnOrExpression::Column(extract.clone()) - }; - let type_column = VariableColumn::VariableTypeTag(var.clone()); - let proj = ProjectedColumn(expression, type_column.column_name()); - columns.push(proj); + // Similarly, project type tags if they're not known conclusively in the + // outer query. + // Assumption: we'll never need to project a tag without projecting the value of a variable. + if type_extraction.contains(var) { + let expression = + if let Some(ty) = maybe_type { + // If we know the type for sure, just project the constant. + // SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag` + ColumnOrExpression::Integer(ty.value_type_tag()) + } else { + // Otherwise, we'll have an established type binding! This'll be + // either a datoms table or, recursively, a subquery. Project + // this: + // SELECT datoms03.v AS `?x`, + // datoms03.value_type_tag AS `?x_value_type_tag` + let extract = cc.extracted_types + .get(var) + .expect("Expected variable to have a known type or an extracted type"); + ColumnOrExpression::Column(extract.clone()) + }; + let type_column = VariableColumn::VariableTypeTag(var.clone()); + let proj = ProjectedColumn(expression, type_column.column_name()); + columns.push(proj); + } } // Each arm simply turns into a subquery. From 79fa0994b3ad9d0d9ab6417eaf91e97cb0b25e25 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 26 Apr 2017 15:50:17 -0700 Subject: [PATCH 19/20] Part 3: Handle `ground`. (#469) r=nalexander,rnewman This version removes nalexander's lovely matrix code. It turned out that scalar and tuple bindings are sufficiently different from coll and rel -- they can directly apply as values in the query -- that there was no point in jumping through hoops to turn those single values into a matrix. Furthermore, I've standardized us on a Vec representation for rectangular matrices, which should be much more efficient, but would have required rewriting that code. Finally, coll and rel are sufficiently different from each other -- coll doesn't require processing nested collections -- that my attempts to share code between them fell somewhat flat. I had lots of nice ideas about zipping together cycles and such, but ultimately I ended up with relatively straightforward, if a bit repetitive, code. The next commit will demonstrate the value of this work -- tests that exercised scalar and tuple grounding now collapse down to the simplest possible SQL. --- query-algebrizer/Cargo.toml | 1 + query-algebrizer/src/clauses/mod.rs | 20 +- query-algebrizer/src/clauses/or.rs | 1 + query-algebrizer/src/clauses/pattern.rs | 12 +- query-algebrizer/src/clauses/predicate.rs | 10 +- query-algebrizer/src/clauses/where_fn.rs | 560 ++++++++++++++++++++++ query-algebrizer/src/errors.rs | 29 +- query-algebrizer/src/lib.rs | 1 + query-algebrizer/src/types.rs | 16 +- query-algebrizer/tests/ground.rs | 315 ++++++++++++ query-translator/src/translate.rs | 9 +- query-translator/tests/translate.rs | 174 +++++++ query/src/lib.rs | 86 +++- 13 files changed, 1205 insertions(+), 29 deletions(-) create mode 100644 query-algebrizer/src/clauses/where_fn.rs create mode 100644 query-algebrizer/tests/ground.rs diff --git a/query-algebrizer/Cargo.toml b/query-algebrizer/Cargo.toml index 38a30fa5..b1dcf062 100644 --- a/query-algebrizer/Cargo.toml +++ b/query-algebrizer/Cargo.toml @@ -18,4 +18,5 @@ path = "../query" path = "../query-parser" [dev-dependencies] +itertools = "0.5" maplit = "0.1" diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index eabe925a..e94713da 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -67,6 +67,7 @@ mod not; mod pattern; mod predicate; mod resolve; +mod where_fn; use validate::{ validate_not_join, @@ -141,6 +142,7 @@ impl Intersection for BTreeMap { /// /// - Ordinary pattern clauses turn into `FROM` parts and `WHERE` parts using `=`. /// - Predicate clauses turn into the same, but with other functions. +/// - Function clauses turn into `WHERE` parts using function-specific comparisons. /// - `not` turns into `NOT EXISTS` with `WHERE` clauses inside the subquery to /// bind it to the outer variables, or adds simple `WHERE` clauses to the outer /// clause. @@ -228,6 +230,7 @@ impl Debug for ConjoiningClauses { fmt.debug_struct("ConjoiningClauses") .field("empty_because", &self.empty_because) .field("from", &self.from) + .field("computed_tables", &self.computed_tables) .field("wheres", &self.wheres) .field("column_bindings", &self.column_bindings) .field("input_variables", &self.input_variables) @@ -479,14 +482,15 @@ impl ConjoiningClauses { /// Constrains the var if there's no existing type. /// Marks as known-empty if it's impossible for this type to apply because there's a conflicting /// type already known. - fn constrain_var_to_type(&mut self, variable: Variable, this_type: ValueType) { + fn constrain_var_to_type(&mut self, var: Variable, this_type: ValueType) { // Is there an existing mapping for this variable? // Any known inputs have already been added to known_types, and so if they conflict we'll // spot it here. - if let Some(existing) = self.known_types.insert(variable.clone(), ValueTypeSet::of_one(this_type)) { + let this_type_set = ValueTypeSet::of_one(this_type); + if let Some(existing) = self.known_types.insert(var.clone(), this_type_set) { // There was an existing mapping. Does this type match? if !existing.contains(this_type) { - self.mark_known_empty(EmptyBecause::TypeMismatch(variable, existing, this_type)); + self.mark_known_empty(EmptyBecause::TypeMismatch { var, existing, desired: this_type_set }); } } } @@ -545,10 +549,9 @@ impl ConjoiningClauses { Entry::Occupied(mut e) => { let intersected: ValueTypeSet = types.intersection(e.get()); if intersected.is_empty() { - let mismatching_type = types.exemplar().expect("types isn't none"); - let reason = EmptyBecause::TypeMismatch(e.key().clone(), - e.get().clone(), - mismatching_type); + let reason = EmptyBecause::TypeMismatch { var: e.key().clone(), + existing: e.get().clone(), + desired: types }; empty_because = Some(reason); } // Always insert, even if it's empty! @@ -838,6 +841,9 @@ impl ConjoiningClauses { WhereClause::Pred(p) => { self.apply_predicate(schema, p) }, + WhereClause::WhereFn(f) => { + self.apply_where_fn(schema, f) + }, WhereClause::OrJoin(o) => { validate_or_join(&o)?; self.apply_or_join(schema, o) diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index d4dfe2d9..26ecf48a 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -813,6 +813,7 @@ mod testing { }); schema } + /// Test that if all the attributes in an `or` fail to resolve, the entire thing fails. #[test] fn test_schema_based_failure() { diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index f9c084a3..7874ebdf 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -801,7 +801,11 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(y.clone(), ValueTypeSet::of_one(ValueType::String), ValueType::Boolean)); + EmptyBecause::TypeMismatch { + var: y.clone(), + existing: ValueTypeSet::of_one(ValueType::String), + desired: ValueTypeSet::of_one(ValueType::Boolean), + }); } #[test] @@ -839,7 +843,11 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(x.clone(), ValueTypeSet::of_one(ValueType::Ref), ValueType::Boolean)); + EmptyBecause::TypeMismatch { + var: x.clone(), + existing: ValueTypeSet::of_one(ValueType::Ref), + desired: ValueTypeSet::of_one(ValueType::Boolean), + }); } #[test] diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index e0c56b54..16b12f8f 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -222,8 +222,10 @@ mod testing { assert!(cc.is_known_empty()); assert_eq!(cc.empty_because.unwrap(), - EmptyBecause::TypeMismatch(y.clone(), - ValueTypeSet::of_numeric_types(), - ValueType::String)); + EmptyBecause::TypeMismatch { + var: y.clone(), + existing: ValueTypeSet::of_numeric_types(), + desired: ValueTypeSet::of_one(ValueType::String), + }); } -} \ No newline at end of file +} diff --git a/query-algebrizer/src/clauses/where_fn.rs b/query-algebrizer/src/clauses/where_fn.rs new file mode 100644 index 00000000..ce348657 --- /dev/null +++ b/query-algebrizer/src/clauses/where_fn.rs @@ -0,0 +1,560 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::rc::Rc; + +use mentat_core::{ + Schema, + SQLValueType, + TypedValue, + ValueType, +}; + +use mentat_query::{ + Binding, + FnArg, + NonIntegerConstant, + Variable, + VariableOrPlaceholder, + WhereFn, +}; + +use clauses::{ + ConjoiningClauses, + PushComputed, +}; + +use errors::{ + BindingError, + ErrorKind, + Result, +}; + +use super::QualifiedAlias; + +use types::{ + ComputedTable, + EmptyBecause, + SourceAlias, + ValueTypeSet, + VariableColumn, +}; + +macro_rules! coerce_to_typed_value { + ($var: ident, $val: ident, $types: expr, $type: path, $constructor: path) => { { + Ok(if !$types.contains($type) { + Impossible(EmptyBecause::TypeMismatch { + var: $var.clone(), + existing: $types, + desired: ValueTypeSet::of_one($type), + }) + } else { + Val($constructor($val).into()) + }) + } } +} + +enum ValueConversion { + Val(TypedValue), + Impossible(EmptyBecause), +} + +/// Conversion of FnArgs to TypedValues. +impl ConjoiningClauses { + /// Convert the provided `FnArg` to a `TypedValue`. + /// The conversion depends on, and can fail because of: + /// - Existing known types of a variable to which this arg will be bound. + /// - Existing bindings of a variable `FnArg`. + fn typed_value_from_arg<'s>(&self, schema: &'s Schema, var: &Variable, arg: FnArg, known_types: ValueTypeSet) -> Result { + use self::ValueConversion::*; + if known_types.is_empty() { + // If this happens, it likely means the pattern has already failed! + return Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::any(), + })); + } + + match arg { + // Longs are potentially ambiguous: they might be longs or entids. + FnArg::EntidOrInteger(x) => { + match (ValueType::Ref.accommodates_integer(x), + known_types.contains(ValueType::Ref), + known_types.contains(ValueType::Long)) { + (true, true, true) => { + // Ambiguous: this arg could be an entid or a long. + // We default to long. + Ok(Val(TypedValue::Long(x))) + }, + (true, true, false) => { + // This can only be a ref. + Ok(Val(TypedValue::Ref(x))) + }, + (_, false, true) => { + // This can only be a long. + Ok(Val(TypedValue::Long(x))) + }, + (false, true, _) => { + // This isn't a valid ref, but that's the type to which this must conform! + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_longs(), + })) + }, + (_, false, false) => { + // Non-overlapping type sets. + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_longs(), + })) + }, + } + }, + + // If you definitely want to look up an ident, do it before running the query. + FnArg::IdentOrKeyword(x) => { + match (known_types.contains(ValueType::Ref), + known_types.contains(ValueType::Keyword)) { + (true, true) => { + // Ambiguous: this could be a keyword or an ident. + // Default to keyword. + Ok(Val(TypedValue::Keyword(Rc::new(x)))) + }, + (true, false) => { + // This can only be an ident. Look it up! + match schema.get_entid(&x).map(TypedValue::Ref) { + Some(e) => Ok(Val(e)), + None => Ok(Impossible(EmptyBecause::UnresolvedIdent(x.clone()))), + } + }, + (false, true) => { + Ok(Val(TypedValue::Keyword(Rc::new(x)))) + }, + (false, false) => { + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_keywords(), + })) + }, + } + }, + + FnArg::Variable(in_var) => { + // TODO: technically you could ground an existing variable inside the query…. + if !self.input_variables.contains(&in_var) { + bail!(ErrorKind::UnboundVariable((*in_var.0).clone())); + } + match self.bound_value(&in_var) { + // The type is already known if it's a bound variable…. + Some(ref in_value) => Ok(Val(in_value.clone())), + None => bail!(ErrorKind::UnboundVariable((*in_var.0).clone())), + } + }, + + // This isn't implemented yet. + FnArg::Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(), + + // These don't make sense here. + FnArg::Vector(_) | + FnArg::SrcVar(_) => bail!(ErrorKind::InvalidGroundConstant), + + // These are all straightforward. + FnArg::Constant(NonIntegerConstant::Boolean(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Boolean, TypedValue::Boolean) + }, + FnArg::Constant(NonIntegerConstant::Instant(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Instant, TypedValue::Instant) + }, + FnArg::Constant(NonIntegerConstant::Uuid(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Uuid, TypedValue::Uuid) + }, + FnArg::Constant(NonIntegerConstant::Float(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Double, TypedValue::Double) + }, + FnArg::Constant(NonIntegerConstant::Text(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::String, TypedValue::String) + }, + } + } +} + +/// Application of `where` functions. +impl ConjoiningClauses { + /// There are several kinds of functions binding variables in our Datalog: + /// - A set of functions like `ground`, fulltext` and `get-else` that are translated into SQL + /// `VALUES`, `MATCH`, or `JOIN`, yielding bindings. + /// - In the future, some functions that are implemented via function calls in SQLite. + /// + /// At present we have implemented only a limited selection of functions. + pub fn apply_where_fn<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + // Because we'll be growing the set of built-in functions, handling each differently, and + // ultimately allowing user-specified functions, we match on the function name first. + match where_fn.operator.0.as_str() { + "ground" => self.apply_ground(schema, where_fn), + _ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())), + } + } + + fn apply_ground_place<'s>(&mut self, schema: &'s Schema, var: VariableOrPlaceholder, arg: FnArg) -> Result<()> { + match var { + VariableOrPlaceholder::Placeholder => Ok(()), + VariableOrPlaceholder::Variable(var) => self.apply_ground_var(schema, var, arg), + } + } + + /// Constrain the CC to associate the given var with the given ground argument. + /// Marks known-empty on failure. + fn apply_ground_var<'s>(&mut self, schema: &'s Schema, var: Variable, arg: FnArg) -> Result<()> { + let known_types = self.known_type_set(&var); + match self.typed_value_from_arg(schema, &var, arg, known_types)? { + ValueConversion::Val(value) => self.apply_ground_value(var, value), + ValueConversion::Impossible(because) => { + self.mark_known_empty(because); + Ok(()) + }, + } + } + + /// Marks known-empty on failure. + fn apply_ground_value(&mut self, var: Variable, value: TypedValue) -> Result<()> { + if let Some(existing) = self.bound_value(&var) { + if existing != value { + self.mark_known_empty(EmptyBecause::ConflictingBindings { + var: var.clone(), + existing: existing.clone(), + desired: value, + }); + return Ok(()) + } + } else { + self.bind_value(&var, value.clone()); + } + + // Check to see whether this variable is already associated to a column. + // If so, we want to add an equality filter (or, in the future, redo the existing patterns). + if let Some(QualifiedAlias(table, column)) = self.column_bindings + .get(&var) + .and_then(|vec| vec.get(0).cloned()) { + self.constrain_column_to_constant(table, column, value); + } + + Ok(()) + } + + /// Take a relation: a matrix of values which will successively bind to named variables of + /// the provided types. + /// Construct a computed table to yield this relation. + /// This function will panic if some invariants are not met. + fn collect_named_bindings<'s>(&mut self, schema: &'s Schema, names: Vec, types: Vec, values: Vec) { + if values.is_empty() { + return; + } + + assert!(!names.is_empty()); + assert_eq!(names.len(), types.len()); + assert!(values.len() >= names.len()); + assert_eq!(values.len() % names.len(), 0); // It's an exact multiple. + + let named_values = ComputedTable::NamedValues { + names: names.clone(), + values: values, + }; + + let table = self.computed_tables.push_computed(named_values); + let alias = self.next_alias_for_table(table); + + // Stitch the computed table into column_bindings, so we get cross-linking. + for (name, ty) in names.iter().zip(types.into_iter()) { + self.constrain_var_to_type(name.clone(), ty); + self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(name.clone()), name.clone()); + } + + self.from.push(SourceAlias(table, alias)); + } + + pub fn apply_ground<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + if where_fn.args.len() != 1 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 1)); + } + + let mut args = where_fn.args.into_iter(); + + if where_fn.binding.is_empty() { + // The binding must introduce at least one bound variable. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::NoBoundVariable)); + } + + if !where_fn.binding.is_valid() { + // The binding must not duplicate bound variables. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::RepeatedBoundVariable)); + } + + // Scalar and tuple bindings are a little special: because there's only one value, + // we can immediately substitute the value as a known value in the CC, additionally + // generating a WHERE clause if columns have already been bound. + match (where_fn.binding, args.next().unwrap()) { + (Binding::BindScalar(var), constant) => + self.apply_ground_var(schema, var, constant), + + (Binding::BindTuple(places), FnArg::Vector(children)) => { + // Just the same, but we bind more than one column at a time. + if children.len() != places.len() { + // Number of arguments don't match the number of values. TODO: better error message. + bail!(ErrorKind::GroundBindingsMismatch); + } + for (place, arg) in places.into_iter().zip(children.into_iter()) { + self.apply_ground_place(schema, place, arg)? // TODO: short-circuit on impossible. + } + Ok(()) + }, + + // Collection bindings and rel bindings are similar in that they are both + // implemented as a subquery with a projection list and a set of values. + // The difference is that BindColl has only a single variable, and its values + // are all in a single structure. That makes it substantially simpler! + (Binding::BindColl(var), FnArg::Vector(children)) => { + if children.is_empty() { + bail!(ErrorKind::InvalidGroundConstant); + } + + // Turn a collection of arguments into a Vec of `TypedValue`s of the same type. + let known_types = self.known_type_set(&var); + // Check that every value has the same type. + let mut accumulated_types = ValueTypeSet::none(); + let mut skip: Option = None; + let values = children.into_iter() + .filter_map(|arg| -> Option> { + // We need to get conversion errors out. + // We also want to mark known-empty on impossibilty, but + // still detect serious errors. + match self.typed_value_from_arg(schema, &var, arg, known_types) { + Ok(ValueConversion::Val(tv)) => { + if accumulated_types.insert(tv.value_type()) && + !accumulated_types.is_unit() { + // Values not all of the same type. + Some(Err(ErrorKind::InvalidGroundConstant.into())) + } else { + Some(Ok(tv)) + } + }, + Ok(ValueConversion::Impossible(because)) => { + // Skip this value. + skip = Some(because); + None + }, + Err(e) => Some(Err(e.into())), + } + }) + .collect::>>()?; + + if values.is_empty() { + let because = skip.expect("we skipped all rows for a reason"); + self.mark_known_empty(because); + return Ok(()); + } + + // Otherwise, we now have the values and the type. + let types = vec![accumulated_types.exemplar().unwrap()]; + let names = vec![var.clone()]; + + self.collect_named_bindings(schema, names, types, values); + Ok(()) + }, + + (Binding::BindRel(places), FnArg::Vector(rows)) => { + if rows.is_empty() { + bail!(ErrorKind::InvalidGroundConstant); + } + + // Grab the known types to which these args must conform, and track + // the places that won't be bound in the output. + let template: Vec> = + places.iter() + .map(|x| match x { + &VariableOrPlaceholder::Placeholder => None, + &VariableOrPlaceholder::Variable(ref v) => Some((v.clone(), self.known_type_set(v))), + }) + .collect(); + + // The expected 'width' of the matrix is the number of named variables. + let full_width = places.len(); + let names: Vec = places.into_iter().filter_map(|x| x.into_var()).collect(); + let expected_width = names.len(); + let expected_rows = rows.len(); + + if expected_width == 0 { + // They can't all be placeholders. + bail!(ErrorKind::InvalidGroundConstant); + } + + // Accumulate values into `matrix` and types into `a_t_f_c`. + // This representation of a rectangular matrix is more efficient than one composed + // of N separate vectors. + let mut matrix = Vec::with_capacity(expected_width * expected_rows); + let mut accumulated_types_for_columns = vec![ValueTypeSet::none(); expected_width]; + + // Loop so we can bail out. + let mut skipped_all: Option = None; + for row in rows.into_iter() { + match row { + FnArg::Vector(cols) => { + // Make sure that every row is the same length. + if cols.len() != full_width { + bail!(ErrorKind::InvalidGroundConstant); + } + + // TODO: don't accumulate twice. + let mut vals = Vec::with_capacity(expected_width); + let mut skip: Option = None; + for (col, pair) in cols.into_iter().zip(template.iter()) { + // Now we have (val, Option<(name, known_types)>). Silly, + // but this is how we iter! + // Convert each item in the row. + // If any value in the row is impossible, then skip the row. + // If all rows are impossible, fail the entire CC. + if let &Some(ref pair) = pair { + match self.typed_value_from_arg(schema, &pair.0, col, pair.1)? { + ValueConversion::Val(tv) => vals.push(tv), + ValueConversion::Impossible(because) => { + // Skip this row. It cannot produce bindings. + skip = Some(because); + break; + }, + } + } + } + + if skip.is_some() { + // Skip this row and record why, in case we skip all. + skipped_all = skip; + continue; + } + + // Accumulate the values into the matrix and the types into the type set. + for (val, acc) in vals.into_iter().zip(accumulated_types_for_columns.iter_mut()) { + let inserted = acc.insert(val.value_type()); + if inserted && !acc.is_unit() { + // Heterogeneous types. + bail!(ErrorKind::InvalidGroundConstant); + } + matrix.push(val); + } + + }, + _ => bail!(ErrorKind::InvalidGroundConstant), + } + } + + // Do we have rows? If not, the CC cannot succeed. + if matrix.is_empty() { + // We will either have bailed or will have accumulated *something* into the matrix, + // so we can safely unwrap here. + self.mark_known_empty(skipped_all.expect("we skipped for a reason")); + return Ok(()); + } + + // Take the single type from each set. We know there's only one: we got at least one + // type, 'cos we bailed out for zero rows, and we also bailed out each time we + // inserted a second type. + // By restricting to homogeneous columns, we greatly simplify projection. In the + // future, we could loosen this restriction, at the cost of projecting (some) value + // type tags. If and when we want to algebrize in two phases and allow for + // late-binding input variables, we'll probably be able to loosen this restriction + // with little penalty. + let types = accumulated_types_for_columns.into_iter() + .map(|x| x.exemplar().unwrap()) + .collect(); + self.collect_named_bindings(schema, names, types, matrix); + Ok(()) + }, + (_, _) => bail!(ErrorKind::InvalidGroundConstant), + } + } +} + +#[cfg(test)] +mod testing { + use super::*; + + use mentat_core::{ + Attribute, + ValueType, + }; + + use mentat_query::{ + Binding, + FnArg, + NamespacedKeyword, + PlainSymbol, + Variable, + }; + + use clauses::{ + add_attribute, + associate_ident, + }; + + use types::{ + ValueTypeSet, + }; + + #[test] + fn test_apply_ground() { + let vz = Variable::from_valid_name("?z"); + + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); + + // It's awkward enough to write these expansions that we give the details for the simplest + // case only. See the tests of the translator for more extensive (albeit looser) coverage. + let op = PlainSymbol::new("ground"); + cc.apply_ground(&schema, WhereFn { + operator: op, + args: vec![ + FnArg::EntidOrInteger(10), + ], + binding: Binding::BindScalar(vz.clone()), + }).expect("to be able to apply_ground"); + + assert!(!cc.is_known_empty()); + + // Finally, expand column bindings. + cc.expand_column_bindings(); + assert!(!cc.is_known_empty()); + + let clauses = cc.wheres; + assert_eq!(clauses.len(), 0); + + let column_bindings = cc.column_bindings; + assert_eq!(column_bindings.len(), 0); // Scalar doesn't need this. + + let known_types = cc.known_types; + assert_eq!(known_types.len(), 1); + assert_eq!(known_types.get(&vz).expect("to know the type of ?z"), + &ValueTypeSet::of_one(ValueType::Long)); + + let value_bindings = cc.value_bindings; + assert_eq!(value_bindings.len(), 1); + assert_eq!(value_bindings.get(&vz).expect("to have a value for ?z"), + &TypedValue::Long(10)); // We default to Long instead of entid. + } +} diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index 0c93011d..852e07fc 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -10,12 +10,20 @@ extern crate mentat_query; -use mentat_core::ValueType; +use mentat_core::{ + ValueType, +}; use self::mentat_query::{ PlainSymbol, }; +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BindingError { + NoBoundVariable, + RepeatedBoundVariable, // TODO: include repeated variable(s). +} + error_chain! { types { Error, ErrorKind, ResultExt, Result; @@ -32,9 +40,9 @@ error_chain! { display("no function named {}", name) } - InvalidNumberOfArguments(name: PlainSymbol, number: usize, expected: usize) { + InvalidNumberOfArguments(function: PlainSymbol, number: usize, expected: usize) { description("invalid number of arguments") - display("invalid number of arguments to {}: expected {}, got {}.", name, expected, number) + display("invalid number of arguments to {}: expected {}, got {}.", function, expected, number) } UnboundVariable(name: PlainSymbol) { @@ -42,6 +50,21 @@ error_chain! { display("unbound variable: {}", name) } + InvalidBinding(function: PlainSymbol, binding_error: BindingError) { + description("invalid binding") + display("invalid binding for {}: {:?}.", function, binding_error) + } + + GroundBindingsMismatch { + description("mismatched bindings in ground") + display("mismatched bindings in ground") + } + + InvalidGroundConstant { + // TODO: flesh this out. + description("invalid expression in ground constant") + display("invalid expression in ground constant") + } InvalidArgument(function: PlainSymbol, expected_type: &'static str, position: usize) { description("invalid argument") diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 9af4f0c2..9bf9b8d1 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -46,6 +46,7 @@ use mentat_query::{ }; pub use errors::{ + BindingError, Error, ErrorKind, Result, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index f4761780..ab7222bb 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -53,6 +53,10 @@ pub enum ComputedTable { type_extraction: BTreeSet, arms: Vec<::clauses::ConjoiningClauses>, }, + NamedValues { + names: Vec, + values: Vec, + }, } impl DatomsTable { @@ -419,8 +423,8 @@ impl Debug for ColumnConstraint { #[derive(PartialEq, Clone)] pub enum EmptyBecause { - // Var, existing, desired. - TypeMismatch(Variable, ValueTypeSet, ValueType), + ConflictingBindings { var: Variable, existing: TypedValue, desired: TypedValue }, + TypeMismatch { var: Variable, existing: ValueTypeSet, desired: ValueTypeSet }, NoValidTypes(Variable), NonNumericArgument, NonStringFulltextValue, @@ -436,7 +440,11 @@ impl Debug for EmptyBecause { fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { use self::EmptyBecause::*; match self { - &TypeMismatch(ref var, ref existing, ref desired) => { + &ConflictingBindings { ref var, ref existing, ref desired } => { + write!(f, "Var {:?} can't be {:?} because it's already bound to {:?}", + var, desired, existing) + }, + &TypeMismatch { ref var, ref existing, ref desired } => { write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", var, desired, existing) }, @@ -573,4 +581,4 @@ impl ValueTypeSet { pub fn is_unit(&self) -> bool { self.0.len() == 1 } -} \ No newline at end of file +} diff --git a/query-algebrizer/tests/ground.rs b/query-algebrizer/tests/ground.rs new file mode 100644 index 00000000..0a2bdf69 --- /dev/null +++ b/query-algebrizer/tests/ground.rs @@ -0,0 +1,315 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate mentat_core; +extern crate mentat_query; +extern crate mentat_query_algebrizer; +extern crate mentat_query_parser; + +use mentat_core::{ + Attribute, + Entid, + Schema, + ValueType, + TypedValue, +}; + +use mentat_query_parser::{ + parse_find_string, +}; + +use mentat_query::{ + NamespacedKeyword, + PlainSymbol, + Variable, +}; + +use mentat_query_algebrizer::{ + BindingError, + ConjoiningClauses, + ComputedTable, + Error, + ErrorKind, + algebrize, +}; + +// These are helpers that tests use to build Schema instances. +#[cfg(test)] +fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { + schema.entid_map.insert(e, i.clone()); + schema.ident_map.insert(i.clone(), e); +} + +#[cfg(test)] +fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { + schema.schema_map.insert(e, a); +} + +fn prepopulated_schema() -> Schema { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "knows"), 66); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "parent"), 67); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69); + add_attribute(&mut schema, 65, Attribute { + value_type: ValueType::String, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 66, Attribute { + value_type: ValueType::Ref, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 67, Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 68, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 69, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + schema +} + +fn bails(schema: &Schema, input: &str) -> Error { + let parsed = parse_find_string(input).expect("query input to have parsed"); + algebrize(schema.into(), parsed).expect_err("algebrize to have failed") +} + +fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("query input to have parsed"); + algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc +} + +#[test] +fn test_ground_doesnt_bail_for_type_conflicts() { + // We know `?x` to be a ref, but we're attempting to ground it to a Double. + // The query can return no results. + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground 9.95) ?x]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_tuple_fails_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [5 9.95]) [?x ?p]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_scalar_fails_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground true) ?p]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_coll_skips_impossible() { + // We know `?x` to be a ref, but we're attempting to ground it to a Double. + // The query can return no results. + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [5 9.95 11]) [?x ...]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x")], + values: vec![TypedValue::Ref(5), TypedValue::Ref(11)], + }); +} + +#[test] +fn test_ground_coll_fails_if_all_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [5.1 5.2]) [?p ...]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_rel_skips_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[8 "foo"] [5 7] [9.95 9] [11 12]]) [[?x ?p]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x"), Variable::from_valid_name("?p")], + values: vec![TypedValue::Ref(5), TypedValue::Ref(7), TypedValue::Ref(11), TypedValue::Ref(12)], + }); +} + +#[test] +fn test_ground_rel_fails_if_all_impossible() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[11 5.1] [12 5.2]]) [[?x ?p]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_tuple_rejects_all_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [8 "foo" 3]) [_ _ _]]]"#; + let schema = prepopulated_schema(); + bails(&schema, &q); +} + +#[test] +fn test_ground_rel_rejects_all_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[8 "foo"]]) [[_ _]]]]"#; + let schema = prepopulated_schema(); + bails(&schema, &q); +} + +#[test] +fn test_ground_tuple_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [8 "foo" 3]) [?x _ ?p]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?x")), Some(TypedValue::Ref(8))); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?p")), Some(TypedValue::Ref(3))); +} + +#[test] +fn test_ground_rel_placeholders() { + let q = r#"[:find ?x :where [?x :foo/knows ?p] [(ground [[8 "foo" 3] [5 false 7] [5 9.95 9]]) [[?x _ ?p]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x"), Variable::from_valid_name("?p")], + values: vec![ + TypedValue::Ref(8), + TypedValue::Ref(3), + TypedValue::Ref(5), + TypedValue::Ref(7), + TypedValue::Ref(5), + TypedValue::Ref(9), + ], + }); +} + +// Nothing to do with ground, but while we're here… +#[test] +fn test_multiple_reference_type_failure() { + let q = r#"[:find ?x :where [?x :foo/age ?y] [?x :foo/knows ?y]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_some()); +} + +#[test] +fn test_ground_tuple_infers_types() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [8 10]) [?x ?v]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?x")), Some(TypedValue::Ref(8))); + assert_eq!(cc.bound_value(&Variable::from_valid_name("?v")), Some(TypedValue::Long(10))); +} + +#[test] +fn test_ground_rel_infers_types() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [[8 10]]) [[?x ?v]]]]"#; + let schema = prepopulated_schema(); + let cc = alg(&schema, &q); + assert!(cc.empty_because.is_none()); + assert_eq!(cc.computed_tables[0], ComputedTable::NamedValues { + names: vec![Variable::from_valid_name("?x"), Variable::from_valid_name("?v")], + values: vec![TypedValue::Ref(8), TypedValue::Long(10)], + }); +} + +#[test] +fn test_ground_coll_heterogeneous_types() { + let q = r#"[:find ?x :where [?x _ ?v] [(ground [false 8.5]) [?v ...]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidGroundConstant, _) => { + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_rel_heterogeneous_types() { + let q = r#"[:find ?x :where [?x _ ?v] [(ground [[false] [5]]) [[?v]]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidGroundConstant, _) => { + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_tuple_duplicate_vars() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [8 10]) [?x ?x]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidBinding(v, e), _) => { + assert_eq!(v, PlainSymbol::new("ground")); + assert_eq!(e, BindingError::RepeatedBoundVariable); + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_rel_duplicate_vars() { + let q = r#"[:find ?x :where [?x :foo/age ?v] [(ground [[8 10]]) [[?x ?x]]]]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::InvalidBinding(v, e), _) => { + assert_eq!(v, PlainSymbol::new("ground")); + assert_eq!(e, BindingError::RepeatedBoundVariable); + }, + _ => { + panic!(); + }, + } +} + +#[test] +fn test_ground_nonexistent_variable_invalid() { + let q = r#"[:find ?x ?e :where [?e _ ?x] (not [(ground 17) ?v])]"#; + let schema = prepopulated_schema(); + let e = bails(&schema, &q); + match e { + Error(ErrorKind::UnboundVariable(PlainSymbol(v)), _) => { + assert_eq!(v, "?v".to_string()); + }, + _ => { + panic!(); + }, + } +} diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index f755af65..0928aa90 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -52,6 +52,7 @@ use mentat_query_sql::{ SelectQuery, TableList, TableOrSubquery, + Values, }; trait ToConstraint { @@ -240,7 +241,13 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq }, ComputedTable::Subquery(subquery) => { TableOrSubquery::Subquery(Box::new(cc_to_exists(subquery))) - } + }, + ComputedTable::NamedValues { + names, values, + } => { + // We assume column homogeneity, so we won't have any type tag columns. + TableOrSubquery::Values(Values::Named(names, values), alias) + }, } } diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 5c28c56b..a9c42274 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -550,3 +550,177 @@ fn test_complex_nested_or_join_type_projection() { LIMIT 1"); assert_eq!(args, vec![]); } + +#[test] +fn test_ground_scalar() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x . :where [(ground "yyy") ?x]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT $v0 AS `?x` LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find ?x . :in ?v :where [(ground ?v) ?x]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?v"), TypedValue::String(Rc::new("aaa".into())))]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + assert_eq!(sql, "SELECT $v0 AS `?x` LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "aaa"),]); +} + +#[test] +fn test_ground_tuple() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x ?y :where [(ground [1 "yyy"]) [?x ?y]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT 1 AS `?x`, $v0 AS `?y`"); + assert_eq!(args, vec![make_arg("$v0", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find [?x ?y] :in ?u ?v :where [(ground [?u ?v]) [?x ?y]]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?u"), TypedValue::Long(2)), + (Variable::from_valid_name("?v"), TypedValue::String(Rc::new("aaa".into()))),]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + // TODO: treat 2 as an input variable that could be bound late, rather than eagerly binding it. + assert_eq!(sql, "SELECT 2 AS `?x`, $v0 AS `?y` LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "aaa"),]); +} + +#[test] +fn test_ground_coll() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x :where [(ground ["xxx" "yyy"]) [?x ...]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x` FROM \ + (SELECT 0 AS `?x` WHERE 0 UNION ALL VALUES ($v0), ($v1)) AS `c00`"); + assert_eq!(args, vec![make_arg("$v0", "xxx"), + make_arg("$v1", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find ?x :in ?u ?v :where [(ground [?u ?v]) [?x ...]]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?u"), TypedValue::Long(2)), + (Variable::from_valid_name("?v"), TypedValue::Long(3)),]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + // TODO: treat 2 and 3 as input variables that could be bound late, rather than eagerly binding. + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x` FROM \ + (SELECT 0 AS `?x` WHERE 0 UNION ALL VALUES (2), (3)) AS `c00`"); + assert_eq!(args, vec![]); +} + +#[test] +fn test_ground_rel() { + let schema = prepopulated_schema(); + + // Verify that we accept inline constants. + let query = r#"[:find ?x ?y :where [(ground [[1 "xxx"] [2 "yyy"]]) [[?x ?y]]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x`, `c00`.`?y` AS `?y` FROM \ + (SELECT 0 AS `?x`, 0 AS `?y` WHERE 0 UNION ALL VALUES (1, $v0), (2, $v1)) AS `c00`"); + assert_eq!(args, vec![make_arg("$v0", "xxx"), + make_arg("$v1", "yyy")]); + + // Verify that we accept bound input constants. + let query = r#"[:find ?x ?y :in ?u ?v :where [(ground [[?u 1] [?v 2]]) [[?x ?y]]]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?u"), TypedValue::Long(3)), + (Variable::from_valid_name("?v"), TypedValue::Long(4)),]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + // TODO: treat 3 and 4 as input variables that could be bound late, rather than eagerly binding. + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x`, `c00`.`?y` AS `?y` FROM \ + (SELECT 0 AS `?x`, 0 AS `?y` WHERE 0 UNION ALL VALUES (3, 1), (4, 2)) AS `c00`"); + assert_eq!(args, vec![]); +} + +#[test] +fn test_compound_with_ground() { + let schema = prepopulated_schema(); + + // Verify that we can use the resulting CCs as children in compound CCs. + let query = r#"[:find ?x :where (or [(ground "yyy") ?x] + [(ground "zzz") ?x])]"#; + let SQLQuery { sql, args } = translate(&schema, query); + + // This is confusing because the computed tables (like `c00`) are numbered sequentially in each + // arm of the `or` rather than numbered globally. But SQLite scopes the names correctly, so it + // works. In the future, we might number the computed tables globally to make this more clear. + assert_eq!(sql, "SELECT DISTINCT `c00`.`?x` AS `?x` FROM (\ + SELECT $v0 AS `?x` UNION \ + SELECT $v1 AS `?x`) AS `c00`"); + assert_eq!(args, vec![make_arg("$v0", "yyy"), + make_arg("$v1", "zzz"),]); + + // Verify that we can use ground to constrain the bindings produced by earlier clauses. + let query = r#"[:find ?x . :where [_ :foo/bar ?x] [(ground "yyy") ?x]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT $v0 AS `?x` FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + + assert_eq!(args, vec![make_arg("$v0", "yyy")]); + + // Verify that we can further constrain the bindings produced by our clause. + let query = r#"[:find ?x . :where [(ground "yyy") ?x] [_ :foo/bar ?x]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT $v0 AS `?x` FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + + assert_eq!(args, vec![make_arg("$v0", "yyy")]); +} + +#[test] +fn test_unbound_attribute_with_ground_entity() { + let query = r#"[:find ?x ?v :where [?x _ ?v] (not [(ground 17) ?x])]"#; + let schema = prepopulated_schema(); + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, \ + `all_datoms00`.v AS `?v`, \ + `all_datoms00`.value_type_tag AS `?v_value_type_tag` \ + FROM `all_datoms` AS `all_datoms00` \ + WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.e = 17)"); +} + +#[test] +fn test_unbound_attribute_with_ground() { + // TODO: this needs to expand the type code. #475. + let query = r#"[:find ?x :where [?x _ ?v] (not [(ground 5) ?v])]"#; + let schema = prepopulated_schema(); + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` \ + WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 5)"); +} + + +#[test] +fn test_not_with_ground() { + let mut schema = prepopulated_schema(); + associate_ident(&mut schema, NamespacedKeyword::new("db", "valueType"), 7); + associate_ident(&mut schema, NamespacedKeyword::new("db.type", "ref"), 23); + associate_ident(&mut schema, NamespacedKeyword::new("db.type", "bool"), 28); + associate_ident(&mut schema, NamespacedKeyword::new("db.type", "instant"), 29); + add_attribute(&mut schema, 7, Attribute { + value_type: ValueType::Ref, + multival: false, + ..Default::default() + }); + + // Scalar. + // TODO: this kind of simple `not` should be implemented without the subquery. #476. + let query = r#"[:find ?x :where [?x :db/valueType ?v] (not [(ground :db.type/instant) ?v])]"#; + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, + "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 7 AND NOT \ + EXISTS (SELECT 1 WHERE `datoms00`.v = 29)"); + + // Coll. + // TODO: we can generate better SQL for this, too. #476. + let query = r#"[:find ?x :where [?x :db/valueType ?v] (not [(ground [:db.type/bool :db.type/instant]) [?v ...]])]"#; + let SQLQuery { sql, .. } = translate(&schema, query); + assert_eq!(sql, + "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 7 AND NOT EXISTS \ + (SELECT 1 FROM (SELECT 0 AS `?v` WHERE 0 UNION ALL VALUES (28), (29)) AS `c00` \ + WHERE `datoms00`.v = `c00`.`?v`)"); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index abd35868..c412baac 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -35,6 +35,7 @@ extern crate mentat_core; use std::collections::{ BTreeSet, + HashSet, }; use std::fmt; @@ -59,7 +60,7 @@ use mentat_core::{ pub type SrcVarName = String; // Do not include the required syntactic '$'. -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Variable(pub Rc); impl Variable { @@ -536,21 +537,90 @@ impl FindSpec { // Datomic accepts variable or placeholder. DataScript accepts recursive bindings. Mentat sticks // to the non-recursive form Datomic accepts, which is much simpler to process. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum VariableOrPlaceholder { Placeholder, Variable(Variable), } +impl VariableOrPlaceholder { + pub fn into_var(self) -> Option { + match self { + VariableOrPlaceholder::Placeholder => None, + VariableOrPlaceholder::Variable(var) => Some(var), + } + } + + pub fn var(&self) -> Option<&Variable> { + match self { + &VariableOrPlaceholder::Placeholder => None, + &VariableOrPlaceholder::Variable(ref var) => Some(var), + } + } +} + #[derive(Clone,Debug,Eq,PartialEq)] pub enum Binding { - BindRel(Vec), - - BindColl(Variable), - - BindTuple(Vec), - BindScalar(Variable), + BindColl(Variable), + BindRel(Vec), + BindTuple(Vec), +} + +impl Binding { + /// Return each variable or `None`, in order. + pub fn variables(&self) -> Vec> { + match self { + &Binding::BindScalar(ref var) | &Binding::BindColl(ref var) => vec![Some(var.clone())], + &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => vars.iter().map(|x| x.var().cloned()).collect(), + } + } + + /// Return `true` if no variables are bound, i.e., all binding entries are placeholders. + pub fn is_empty(&self) -> bool { + match self { + &Binding::BindScalar(_) | &Binding::BindColl(_) => false, + &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => vars.iter().all(|x| x.var().is_none()), + } + } + + /// Return `true` if no variable is bound twice, i.e., each binding entry is either a + /// placeholder or unique. + /// + /// ``` + /// extern crate mentat_query; + /// use std::rc::Rc; + /// + /// let v = mentat_query::Variable::from_valid_name("?foo"); + /// let vv = mentat_query::VariableOrPlaceholder::Variable(v); + /// let p = mentat_query::VariableOrPlaceholder::Placeholder; + /// + /// let e = mentat_query::Binding::BindTuple(vec![p.clone()]); + /// let b = mentat_query::Binding::BindTuple(vec![p.clone(), vv.clone()]); + /// let d = mentat_query::Binding::BindTuple(vec![vv.clone(), p, vv]); + /// assert!(b.is_valid()); // One var, one placeholder: OK. + /// assert!(!e.is_valid()); // Empty: not OK. + /// assert!(!d.is_valid()); // Duplicate var: not OK. + /// ``` + pub fn is_valid(&self) -> bool { + match self { + &Binding::BindScalar(_) | &Binding::BindColl(_) => true, + &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => { + let mut acc = HashSet::::new(); + for var in vars { + if let &VariableOrPlaceholder::Variable(ref var) = var { + if !acc.insert(var.clone()) { + // It's invalid if there was an equal var already present in the set -- + // i.e., we have a duplicate var. + return false; + } + } + } + // We're not valid if every place is a placeholder! + !acc.is_empty() + } + } + } } // Note that the "implicit blank" rule applies. From e1e549440f35440908f7cfebde08bd8bc73cca4e Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Fri, 9 Jun 2017 07:40:32 -0700 Subject: [PATCH 20/20] Expand type code when applying ground. (#475) --- query-algebrizer/src/clauses/where_fn.rs | 10 ++++++++++ query-translator/tests/translate.rs | 11 +++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/query-algebrizer/src/clauses/where_fn.rs b/query-algebrizer/src/clauses/where_fn.rs index ce348657..40c047fb 100644 --- a/query-algebrizer/src/clauses/where_fn.rs +++ b/query-algebrizer/src/clauses/where_fn.rs @@ -40,6 +40,7 @@ use errors::{ use super::QualifiedAlias; use types::{ + ColumnConstraint, ComputedTable, EmptyBecause, SourceAlias, @@ -241,6 +242,8 @@ impl ConjoiningClauses { self.bind_value(&var, value.clone()); } + let vt = value.value_type(); + // Check to see whether this variable is already associated to a column. // If so, we want to add an equality filter (or, in the future, redo the existing patterns). if let Some(QualifiedAlias(table, column)) = self.column_bindings @@ -249,6 +252,13 @@ impl ConjoiningClauses { self.constrain_column_to_constant(table, column, value); } + // Are we also trying to figure out the type of the value when the query runs? + // If so, constrain that! + if let Some(table) = self.extracted_types.get(&var) + .map(|qa| qa.0.clone()) { + self.wheres.add_intersection(ColumnConstraint::HasType(table, vt)); + } + Ok(()) } diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index a9c42274..5b3b0c1f 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -684,12 +684,15 @@ fn test_unbound_attribute_with_ground_entity() { #[test] fn test_unbound_attribute_with_ground() { - // TODO: this needs to expand the type code. #475. - let query = r#"[:find ?x :where [?x _ ?v] (not [(ground 5) ?v])]"#; + let query = r#"[:find ?x ?v :where [?x _ ?v] (not [(ground 17) ?v])]"#; let schema = prepopulated_schema(); let SQLQuery { sql, .. } = translate(&schema, query); - assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` \ - WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 5)"); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, \ + `all_datoms00`.v AS `?v`, \ + `all_datoms00`.value_type_tag AS `?v_value_type_tag` \ + FROM `all_datoms` AS `all_datoms00` \ + WHERE NOT EXISTS (SELECT 1 WHERE `all_datoms00`.v = 17 AND \ + `all_datoms00`.value_type_tag = 5)"); }