Fix (the ?foo) (#633) r=nalexander

Don't group by ?var when processing (the ?var).

This PR also finishes error generation in the projector.
This commit is contained in:
Richard Newman 2018-04-10 11:58:58 -07:00 committed by GitHub
parent 909b2a8be5
commit 1509d16c3e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 191 additions and 18 deletions

View file

@ -44,6 +44,10 @@ error_chain! {
description("cannot apply projection operation to types")
display("cannot apply projection operation {:?} to types {:?}", op, types)
}
InvalidProjection(t: String) {
description("invalid projection")
display("invalid projection: {}", t)
}
UnboundVariable(var: PlainSymbol) {
description("cannot project unbound variable")
display("cannot project unbound variable {:?}", var)

View file

@ -147,7 +147,6 @@ pub(crate) fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
let mut i: i32 = 0;
let mut min_max_count: usize = 0;
let mut corresponding_count: usize = 0;
let mut templates = vec![];
let mut aggregates = false;
@ -157,27 +156,56 @@ pub(crate) fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
// in the result."
// We use an ordered set here so that we group in the correct order.
let mut outer_variables = IndexSet::new();
let mut corresponded_variables = IndexSet::new();
// Any variable that we are projecting from the inner query.
let mut inner_variables = BTreeSet::new();
for e in elements {
if let &Element::Corresponding(_) = e {
corresponding_count += 1;
}
// Check for and reject duplicates.
match e {
&Element::Variable(ref var) => {
if outer_variables.contains(var) {
bail!(ErrorKind::InvalidProjection(format!("Duplicate variable {} in query.", var)));
}
if corresponded_variables.contains(var) {
bail!(ErrorKind::InvalidProjection(format!("Can't project both {} and `(the {})` from a query.", var, var)));
}
},
&Element::Corresponding(ref var) => {
if outer_variables.contains(var) {
bail!(ErrorKind::InvalidProjection(format!("Can't project both {} and `(the {})` from a query.", var, var)));
}
if corresponded_variables.contains(var) {
bail!(ErrorKind::InvalidProjection(format!("`(the {})` appears twice in query.", var)));
}
},
&Element::Aggregate(_) => {
},
};
// Record variables -- (the ?x) and ?x are different in this regard, because we don't want
// to group on variables that are corresponding-projected.
match e {
&Element::Variable(ref var) => {
outer_variables.insert(var.clone());
},
&Element::Corresponding(ref var) => {
// We will project these later; don't put them in `outer_variables`
// so we know not to group them.
corresponded_variables.insert(var.clone());
},
&Element::Aggregate(_) => {
},
};
// Now do the main processing of each element.
match e {
// Each time we come across a variable, we push a SQL column
// into the SQL projection, aliased to the name of the variable,
// and we push an annotated index into the projector.
&Element::Variable(ref var) |
&Element::Corresponding(ref var) => {
if outer_variables.contains(var) {
eprintln!("Warning: duplicate variable {} in query.", var);
}
// TODO: it's an error to have `[:find ?x (the ?x) …]`.
outer_variables.insert(var.clone());
inner_variables.insert(var.clone());
let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?;
@ -243,10 +271,10 @@ pub(crate) fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
}
}
match (min_max_count, corresponding_count) {
match (min_max_count, corresponded_variables.len()) {
(0, 0) | (_, 0) => {},
(0, _) => {
eprintln!("Warning: used `(the ?var)` without `min` or `max`.");
bail!(ErrorKind::InvalidProjection("Warning: used `the` without `min` or `max`.".to_string()));
},
(1, _) => {
// This is the success case!
@ -324,15 +352,21 @@ pub(crate) fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
// We don't allow grouping on anything but a variable bound in the query.
// We group by tag if necessary.
let mut group_by = Vec::with_capacity(outer_variables.len() + 2);
for var in outer_variables.into_iter() {
let vars = outer_variables.into_iter().zip(::std::iter::repeat(true));
let corresponds = corresponded_variables.into_iter().zip(::std::iter::repeat(false));
for (var, group) in vars.chain(corresponds) {
if query.cc.is_value_bound(&var) {
continue;
}
// The GROUP BY goes outside, but it needs every variable and type tag to be
// projected from inside. Collect in both directions here.
let name = VariableColumn::Variable(var.clone()).column_name();
group_by.push(GroupBy::ProjectedColumn(name));
if group {
// The GROUP BY goes outside, but it needs every variable and type tag to be
// projected from inside. Collect in both directions here.
let name = VariableColumn::Variable(var.clone()).column_name();
group_by.push(GroupBy::ProjectedColumn(name));
}
let needs_type_projection = !query.cc.known_type_set(&var).has_unique_type_tag();
@ -352,7 +386,9 @@ pub(crate) fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
.ok_or_else(|| ErrorKind::NoTypeAvailableForVariable(var.name().clone()))?;
inner_projection.push(ProjectedColumn(ColumnOrExpression::Column(type_col), type_name.clone()));
}
group_by.push(GroupBy::ProjectedColumn(type_name));
if group {
group_by.push(GroupBy::ProjectedColumn(type_name));
}
};
}

View file

@ -86,3 +86,30 @@ fn test_aggregate_unsuitable_type() {
// … when we look at the projection list, we cannot reconcile the types.
assert!(query_projection(&algebrized).is_err());
}
#[test]
fn test_the_without_max_or_min() {
let schema = prepopulated_schema();
let query = r#"[:find (the ?e) ?a
:where
[?e :foo/age ?a]]"#;
// While the query itself algebrizes and parses…
let parsed = parse_find_string(query).expect("query input to have parsed");
let algebrized = algebrize(Known::for_schema(&schema), parsed).expect("query algebrizes");
// … when we look at the projection list, we cannot reconcile the types.
let projection = query_projection(&algebrized);
assert!(projection.is_err());
use ::mentat_query_projector::errors::{
ErrorKind,
Error,
};
match projection {
Result::Err(Error(ErrorKind::InvalidProjection(s) , _)) => {
assert_eq!(s.as_str(), "Warning: used `the` without `min` or `max`.");
},
_ => panic!(),
}
}

View file

@ -1084,6 +1084,112 @@ fn test_combinatorial() {
.expect("scalar results").unwrap());
}
#[test]
fn test_aggregate_the() {
let mut store = Store::open("").expect("opened");
store.transact(r#"[
{:db/ident :visit/visitedOnDevice
:db/valueType :db.type/ref
:db/cardinality :db.cardinality/one}
{:db/ident :visit/visitAt
:db/valueType :db.type/instant
:db/cardinality :db.cardinality/one}
{:db/ident :site/visit
:db/valueType :db.type/ref
:db/isComponent true
:db/cardinality :db.cardinality/many}
{:db/ident :site/url
:db/valueType :db.type/string
:db/unique :db.unique/identity
:db/cardinality :db.cardinality/one
:db/index true}
{:db/ident :visit/page
:db/valueType :db.type/ref
:db/isComponent true ; Debatable.
:db/cardinality :db.cardinality/one}
{:db/ident :page/title
:db/valueType :db.type/string
:db/fulltext true
:db/index true
:db/cardinality :db.cardinality/one}
{:db/ident :visit/container
:db/valueType :db.type/ref
:db/cardinality :db.cardinality/one}
]"#).expect("transacted schema");
store.transact(r#"[
{:db/ident :container/facebook}
{:db/ident :container/personal}
{:db/ident :device/my-desktop}
]"#).expect("transacted idents");
store.transact(r#"[
{:visit/visitedOnDevice :device/my-desktop
:visit/visitAt #inst "2018-04-06T20:46:00Z"
:visit/container :container/facebook
:db/id "another"
:visit/page "fbpage2"}
{:db/id "fbpage2"
:page/title "(1) Facebook"}
{:visit/visitedOnDevice :device/my-desktop
:visit/visitAt #inst "2018-04-06T18:46:00Z"
:visit/container :container/facebook
:db/id "fbvisit"
:visit/page "fbpage"}
{:db/id "fbpage"
:page/title "(2) Facebook"}
{:site/url "https://www.facebook.com"
:db/id "aa"
:site/visit ["personalvisit" "another" "fbvisit"]}
{:visit/visitedOnDevice :device/my-desktop
:visit/visitAt #inst "2018-04-06T18:46:00Z"
:visit/container :container/personal
:db/id "personalvisit"
:visit/page "personalpage"}
{:db/id "personalpage"
:page/title "Facebook - Log In or Sign Up"}
]"#).expect("transacted data");
let per_title =
store.q_once(r#"
[:find (max ?visitDate) ?title
:where [?site :site/url "https://www.facebook.com"]
[?site :site/visit ?visit]
[?visit :visit/container :container/facebook]
[?visit :visit/visitAt ?visitDate]
[?visit :visit/page ?page]
[?page :page/title ?title]]"#, None)
.into_rel_result()
.expect("two results");
let corresponding_title =
store.q_once(r#"
[:find (max ?visitDate) (the ?title)
:where [?site :site/url "https://www.facebook.com"]
[?site :site/visit ?visit]
[?visit :visit/container :container/facebook]
[?visit :visit/visitAt ?visitDate]
[?visit :visit/page ?page]
[?page :page/title ?title]]"#, None)
.into_rel_result()
.expect("one result");
// This test shows the distinction between `?title` and `(the ?title`) — the former returns two
// results, while the latter returns one. Without `the` we group by `?title`, getting the
// maximum visit date for each title; with it we don't group by value, instead getting the title
// that corresponds to the maximum visit date.
//
// 'Group' in this context translates to GROUP BY in the generated SQL.
assert_eq!(2, per_title.len());
assert_eq!(1, corresponding_title.len());
assert_eq!(corresponding_title,
vec![vec![TypedValue::Instant(DateTime::<Utc>::from_str("2018-04-06T20:46:00.000Z").unwrap()),
TypedValue::typed_string("(1) Facebook")]]);
}
#[test]
fn test_aggregation_implicit_grouping() {
let mut store = Store::open("").expect("opened");