diff --git a/query-projector/src/errors.rs b/query-projector/src/errors.rs index 14d59ab3..7046284e 100644 --- a/query-projector/src/errors.rs +++ b/query-projector/src/errors.rs @@ -44,6 +44,10 @@ error_chain! { description("cannot apply projection operation to types") display("cannot apply projection operation {:?} to types {:?}", op, types) } + InvalidProjection(t: String) { + description("invalid projection") + display("invalid projection: {}", t) + } UnboundVariable(var: PlainSymbol) { description("cannot project unbound variable") display("cannot project unbound variable {:?}", var) diff --git a/query-projector/src/project.rs b/query-projector/src/project.rs index cbdb9728..e63f6bee 100644 --- a/query-projector/src/project.rs +++ b/query-projector/src/project.rs @@ -147,7 +147,6 @@ pub(crate) fn project_elements<'a, I: IntoIterator>( let mut i: i32 = 0; let mut min_max_count: usize = 0; - let mut corresponding_count: usize = 0; let mut templates = vec![]; let mut aggregates = false; @@ -157,27 +156,56 @@ pub(crate) fn project_elements<'a, I: IntoIterator>( // in the result." // We use an ordered set here so that we group in the correct order. let mut outer_variables = IndexSet::new(); + let mut corresponded_variables = IndexSet::new(); // Any variable that we are projecting from the inner query. let mut inner_variables = BTreeSet::new(); for e in elements { - if let &Element::Corresponding(_) = e { - corresponding_count += 1; - } + // Check for and reject duplicates. + match e { + &Element::Variable(ref var) => { + if outer_variables.contains(var) { + bail!(ErrorKind::InvalidProjection(format!("Duplicate variable {} in query.", var))); + } + if corresponded_variables.contains(var) { + bail!(ErrorKind::InvalidProjection(format!("Can't project both {} and `(the {})` from a query.", var, var))); + } + }, + &Element::Corresponding(ref var) => { + if outer_variables.contains(var) { + bail!(ErrorKind::InvalidProjection(format!("Can't project both {} and `(the {})` from a query.", var, var))); + } + if corresponded_variables.contains(var) { + bail!(ErrorKind::InvalidProjection(format!("`(the {})` appears twice in query.", var))); + } + }, + &Element::Aggregate(_) => { + }, + }; + // Record variables -- (the ?x) and ?x are different in this regard, because we don't want + // to group on variables that are corresponding-projected. + match e { + &Element::Variable(ref var) => { + outer_variables.insert(var.clone()); + }, + &Element::Corresponding(ref var) => { + // We will project these later; don't put them in `outer_variables` + // so we know not to group them. + corresponded_variables.insert(var.clone()); + }, + &Element::Aggregate(_) => { + }, + }; + + // Now do the main processing of each element. match e { // Each time we come across a variable, we push a SQL column // into the SQL projection, aliased to the name of the variable, // and we push an annotated index into the projector. &Element::Variable(ref var) | &Element::Corresponding(ref var) => { - if outer_variables.contains(var) { - eprintln!("Warning: duplicate variable {} in query.", var); - } - - // TODO: it's an error to have `[:find ?x (the ?x) …]`. - outer_variables.insert(var.clone()); inner_variables.insert(var.clone()); let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?; @@ -243,10 +271,10 @@ pub(crate) fn project_elements<'a, I: IntoIterator>( } } - match (min_max_count, corresponding_count) { + match (min_max_count, corresponded_variables.len()) { (0, 0) | (_, 0) => {}, (0, _) => { - eprintln!("Warning: used `(the ?var)` without `min` or `max`."); + bail!(ErrorKind::InvalidProjection("Warning: used `the` without `min` or `max`.".to_string())); }, (1, _) => { // This is the success case! @@ -324,15 +352,21 @@ pub(crate) fn project_elements<'a, I: IntoIterator>( // We don't allow grouping on anything but a variable bound in the query. // We group by tag if necessary. let mut group_by = Vec::with_capacity(outer_variables.len() + 2); - for var in outer_variables.into_iter() { + + let vars = outer_variables.into_iter().zip(::std::iter::repeat(true)); + let corresponds = corresponded_variables.into_iter().zip(::std::iter::repeat(false)); + + for (var, group) in vars.chain(corresponds) { if query.cc.is_value_bound(&var) { continue; } - // The GROUP BY goes outside, but it needs every variable and type tag to be - // projected from inside. Collect in both directions here. - let name = VariableColumn::Variable(var.clone()).column_name(); - group_by.push(GroupBy::ProjectedColumn(name)); + if group { + // The GROUP BY goes outside, but it needs every variable and type tag to be + // projected from inside. Collect in both directions here. + let name = VariableColumn::Variable(var.clone()).column_name(); + group_by.push(GroupBy::ProjectedColumn(name)); + } let needs_type_projection = !query.cc.known_type_set(&var).has_unique_type_tag(); @@ -352,7 +386,9 @@ pub(crate) fn project_elements<'a, I: IntoIterator>( .ok_or_else(|| ErrorKind::NoTypeAvailableForVariable(var.name().clone()))?; inner_projection.push(ProjectedColumn(ColumnOrExpression::Column(type_col), type_name.clone())); } - group_by.push(GroupBy::ProjectedColumn(type_name)); + if group { + group_by.push(GroupBy::ProjectedColumn(type_name)); + } }; } diff --git a/query-projector/tests/aggregates.rs b/query-projector/tests/aggregates.rs index 027f8ebe..2590d141 100644 --- a/query-projector/tests/aggregates.rs +++ b/query-projector/tests/aggregates.rs @@ -86,3 +86,30 @@ fn test_aggregate_unsuitable_type() { // … when we look at the projection list, we cannot reconcile the types. assert!(query_projection(&algebrized).is_err()); } + +#[test] +fn test_the_without_max_or_min() { + let schema = prepopulated_schema(); + + let query = r#"[:find (the ?e) ?a + :where + [?e :foo/age ?a]]"#; + + // While the query itself algebrizes and parses… + let parsed = parse_find_string(query).expect("query input to have parsed"); + let algebrized = algebrize(Known::for_schema(&schema), parsed).expect("query algebrizes"); + + // … when we look at the projection list, we cannot reconcile the types. + let projection = query_projection(&algebrized); + assert!(projection.is_err()); + use ::mentat_query_projector::errors::{ + ErrorKind, + Error, + }; + match projection { + Result::Err(Error(ErrorKind::InvalidProjection(s) , _)) => { + assert_eq!(s.as_str(), "Warning: used `the` without `min` or `max`."); + }, + _ => panic!(), + } +} diff --git a/tests/query.rs b/tests/query.rs index 5167907a..444ad86b 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -1084,6 +1084,112 @@ fn test_combinatorial() { .expect("scalar results").unwrap()); } +#[test] +fn test_aggregate_the() { + let mut store = Store::open("").expect("opened"); + + store.transact(r#"[ + {:db/ident :visit/visitedOnDevice + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/one} + {:db/ident :visit/visitAt + :db/valueType :db.type/instant + :db/cardinality :db.cardinality/one} + {:db/ident :site/visit + :db/valueType :db.type/ref + :db/isComponent true + :db/cardinality :db.cardinality/many} + {:db/ident :site/url + :db/valueType :db.type/string + :db/unique :db.unique/identity + :db/cardinality :db.cardinality/one + :db/index true} + {:db/ident :visit/page + :db/valueType :db.type/ref + :db/isComponent true ; Debatable. + :db/cardinality :db.cardinality/one} + {:db/ident :page/title + :db/valueType :db.type/string + :db/fulltext true + :db/index true + :db/cardinality :db.cardinality/one} + {:db/ident :visit/container + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/one} + ]"#).expect("transacted schema"); + + store.transact(r#"[ + {:db/ident :container/facebook} + {:db/ident :container/personal} + + {:db/ident :device/my-desktop} + ]"#).expect("transacted idents"); + + store.transact(r#"[ + {:visit/visitedOnDevice :device/my-desktop + :visit/visitAt #inst "2018-04-06T20:46:00Z" + :visit/container :container/facebook + :db/id "another" + :visit/page "fbpage2"} + {:db/id "fbpage2" + :page/title "(1) Facebook"} + {:visit/visitedOnDevice :device/my-desktop + :visit/visitAt #inst "2018-04-06T18:46:00Z" + :visit/container :container/facebook + :db/id "fbvisit" + :visit/page "fbpage"} + {:db/id "fbpage" + :page/title "(2) Facebook"} + {:site/url "https://www.facebook.com" + :db/id "aa" + :site/visit ["personalvisit" "another" "fbvisit"]} + {:visit/visitedOnDevice :device/my-desktop + :visit/visitAt #inst "2018-04-06T18:46:00Z" + :visit/container :container/personal + :db/id "personalvisit" + :visit/page "personalpage"} + {:db/id "personalpage" + :page/title "Facebook - Log In or Sign Up"} + ]"#).expect("transacted data"); + + let per_title = + store.q_once(r#" + [:find (max ?visitDate) ?title + :where [?site :site/url "https://www.facebook.com"] + [?site :site/visit ?visit] + [?visit :visit/container :container/facebook] + [?visit :visit/visitAt ?visitDate] + [?visit :visit/page ?page] + [?page :page/title ?title]]"#, None) + .into_rel_result() + .expect("two results"); + + let corresponding_title = + store.q_once(r#" + [:find (max ?visitDate) (the ?title) + :where [?site :site/url "https://www.facebook.com"] + [?site :site/visit ?visit] + [?visit :visit/container :container/facebook] + [?visit :visit/visitAt ?visitDate] + [?visit :visit/page ?page] + [?page :page/title ?title]]"#, None) + .into_rel_result() + .expect("one result"); + + // This test shows the distinction between `?title` and `(the ?title`) — the former returns two + // results, while the latter returns one. Without `the` we group by `?title`, getting the + // maximum visit date for each title; with it we don't group by value, instead getting the title + // that corresponds to the maximum visit date. + // + // 'Group' in this context translates to GROUP BY in the generated SQL. + assert_eq!(2, per_title.len()); + assert_eq!(1, corresponding_title.len()); + + assert_eq!(corresponding_title, + vec![vec![TypedValue::Instant(DateTime::::from_str("2018-04-06T20:46:00.000Z").unwrap()), + TypedValue::typed_string("(1) Facebook")]]); +} + #[test] fn test_aggregation_implicit_grouping() { let mut store = Store::open("").expect("opened");