diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 5f226be0..d9690859 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -330,7 +330,7 @@ impl Debug for QueryValue { /// Represents an entry in the ORDER BY list: a variable or a variable's type tag. /// (We require order vars to be projected, so we can simply use a variable here.) -#[derive(Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct OrderBy(pub Direction, pub VariableColumn); impl From for OrderBy { diff --git a/query-projector/src/aggregates.rs b/query-projector/src/aggregates.rs index 6bd011b8..99808fa7 100644 --- a/query-projector/src/aggregates.rs +++ b/query-projector/src/aggregates.rs @@ -164,6 +164,15 @@ impl SimpleAggregate { Count | Sum => false, } } + + /// Return `true` if this aggregate can be `NULL` over 0 rows. + pub(crate) fn is_nullable(&self) -> bool { + use self::SimpleAggregationOp::*; + match self.op { + Avg | Max | Min => true, + Count | Sum => false, + } + } } pub(crate) trait SimpleAggregation { @@ -201,7 +210,11 @@ pub(crate) fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc sql_op: simple.op.to_sql(), arg: ColumnOrExpression::Value(value), }; - ColumnOrExpression::Expression(Box::new(expression), return_type) + if simple.is_nullable() { + ColumnOrExpression::NullableAggregate(Box::new(expression), return_type) + } else { + ColumnOrExpression::Expression(Box::new(expression), return_type) + } } } else { // The common case: the values are bound during execution. @@ -210,7 +223,11 @@ pub(crate) fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc sql_op: simple.op.to_sql(), arg: ColumnOrExpression::ExistingColumn(name), }; - ColumnOrExpression::Expression(Box::new(expression), return_type) + if simple.is_nullable() { + ColumnOrExpression::NullableAggregate(Box::new(expression), return_type) + } else { + ColumnOrExpression::Expression(Box::new(expression), return_type) + } }; Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type)) } diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index b6d85d82..2bba08eb 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -62,7 +62,10 @@ pub enum ColumnOrExpression { Integer(i32), // We use these for type codes etc. Long(i64), Value(TypedValue), - Expression(Box, ValueType), // Track the return type. + // Some aggregates (`min`, `max`, `avg`) can be over 0 rows, and therefore can be `NULL`; that + // needs special treatment. + NullableAggregate(Box, ValueType), // Track the return type. + Expression(Box, ValueType), // Track the return type. } pub enum Expression { @@ -130,6 +133,12 @@ pub enum Constraint { left: ColumnOrExpression, list: Vec, }, + IsNull { + value: ColumnOrExpression, + }, + IsNotNull { + value: ColumnOrExpression, + }, NotExists { subquery: TableOrSubquery, }, @@ -278,6 +287,7 @@ impl QueryFragment for ColumnOrExpression { &Value(ref v) => { out.push_typed_value(v) }, + &NullableAggregate(ref e, _) | &Expression(ref e, _) => { e.push_sql(out) }, @@ -343,6 +353,18 @@ impl QueryFragment for Constraint { right.push_sql(out) }, + &IsNull { ref value } => { + value.push_sql(out)?; + out.push_sql(" IS NULL"); + Ok(()) + }, + + &IsNotNull { ref value } => { + value.push_sql(out)?; + out.push_sql(" IS NOT NULL"); + Ok(()) + }, + &And { ref constraints } => { // An empty intersection is true. if constraints.is_empty() { diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 54b8e5d1..ac35548d 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -427,14 +427,63 @@ fn re_project(mut inner: SelectQuery, projection: Projection) -> SelectQuery { let limit = inner.limit; inner.limit = Limit::None; - SelectQuery { + use self::Projection::*; + + let nullable = match &projection { + &Columns(ref columns) => { + columns.iter().filter_map(|pc| { + match pc { + &ProjectedColumn(ColumnOrExpression::NullableAggregate(_, _), ref name) => { + Some(Constraint::IsNotNull { + value: ColumnOrExpression::ExistingColumn(name.clone()), + }) + }, + _ => None, + } + }).collect() + }, + &Star => vec![], + &One => vec![], + }; + + if nullable.is_empty() { + return SelectQuery { + distinct: outer_distinct, + projection: projection, + from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])), + constraints: vec![], + group_by: group_by, + order: order_by, + limit: limit, + }; + } + + // Our pattern is `SELECT * FROM (SELECT ...) WHERE (nullable aggregate) IS NOT NULL`. If + // there's an `ORDER BY` in the subselect, SQL does not guarantee that the outer select will + // respect that order. But `ORDER BY` is relevant to the subselect when we have a `LIMIT`. + // Thus we lift the `ORDER BY` if there’s no `LIMIT` in the subselect, and repeat the `ORDER BY` + // if there is. + let subselect = SelectQuery { distinct: outer_distinct, projection: projection, from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])), constraints: vec![], group_by: group_by, + order: match &limit { + &Limit::None => vec![], + &Limit::Fixed(_) | &Limit::Variable(_) => order_by.clone(), + }, + limit, + }; + + SelectQuery { + distinct: false, + projection: Projection::Star, + from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(subselect))])), + constraints: nullable, + group_by: vec![], order: order_by, - limit: limit, + limit: Limit::None, // Any limiting comes from the internal query. } } diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 1b2bb982..0e17e4a0 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -1091,14 +1091,17 @@ fn test_project_aggregates() { let SQLQuery { sql, args } = translate(&schema, query); // No outer DISTINCT: we aggregate or group by every variable. - assert_eq!(sql, "SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \ + assert_eq!(sql, "SELECT * \ FROM \ - (SELECT DISTINCT \ - `datoms00`.e AS `?e`, \ - `datoms00`.v AS `?t` \ - FROM `datoms` AS `datoms00` \ - WHERE `datoms00`.a = 99) \ - GROUP BY `?e`"); + (SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \ + FROM \ + (SELECT DISTINCT \ + `datoms00`.e AS `?e`, \ + `datoms00`.v AS `?t` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99) \ + GROUP BY `?e`) \ + WHERE `(max ?t)` IS NOT NULL"); assert_eq!(args, vec![]); let query = r#"[:find (max ?t) @@ -1106,7 +1109,75 @@ fn test_project_aggregates() { :where [?e :foo/bar ?t]]"#; let SQLQuery { sql, args } = translate(&schema, query); - assert_eq!(sql, "SELECT max(`?t`) AS `(max ?t)` \ + assert_eq!(sql, "SELECT * \ + FROM \ + (SELECT max(`?t`) AS `(max ?t)` \ + FROM \ + (SELECT DISTINCT \ + `datoms00`.v AS `?t`, \ + `datoms00`.e AS `?e` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99)\ + ) \ + WHERE `(max ?t)` IS NOT NULL"); + assert_eq!(args, vec![]); + + // ORDER BY lifted to outer query if there is no LIMIT. + let query = r#"[:find (max ?x) + :with ?e + :where + [?e ?a ?t] + [?t :foo/bar ?x] + :order ?a]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT * \ + FROM \ + (SELECT max(`?x`) AS `(max ?x)`, `?a` AS `?a` \ + FROM \ + (SELECT DISTINCT \ + `datoms01`.v AS `?x`, \ + `datoms00`.a AS `?a`, \ + `datoms00`.e AS `?e` \ + FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 99 AND `datoms00`.v = `datoms01`.e) \ + GROUP BY `?a`) \ + WHERE `(max ?x)` IS NOT NULL \ + ORDER BY `?a` ASC"); + assert_eq!(args, vec![]); + + // ORDER BY duplicated in outer query if there is a LIMIT. + let query = r#"[:find (max ?x) + :with ?e + :where + [?e ?a ?t] + [?t :foo/bar ?x] + :order (desc ?a) + :limit 10]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT * \ + FROM \ + (SELECT max(`?x`) AS `(max ?x)`, `?a` AS `?a` \ + FROM \ + (SELECT DISTINCT \ + `datoms01`.v AS `?x`, \ + `datoms00`.a AS `?a`, \ + `datoms00`.e AS `?e` \ + FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 99 AND `datoms00`.v = `datoms01`.e) \ + GROUP BY `?a` \ + ORDER BY `?a` DESC \ + LIMIT 10) \ + WHERE `(max ?x)` IS NOT NULL \ + ORDER BY `?a` DESC"); + assert_eq!(args, vec![]); + + // No outer SELECT * for non-nullable aggregates. + let query = r#"[:find (count ?t) + :with ?e + :where + [?e :foo/bar ?t]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT count(`?t`) AS `(count ?t)` \ FROM \ (SELECT DISTINCT \ `datoms00`.v AS `?t`, \ @@ -1116,6 +1187,28 @@ fn test_project_aggregates() { assert_eq!(args, vec![]); } +#[test] +fn test_project_the() { + let schema = prepopulated_typed_schema(ValueType::Long); + let query = r#"[:find (the ?e) (max ?t) + :where + [?e :foo/bar ?t]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + + // We shouldn't NULL-check (the). + assert_eq!(sql, "SELECT * \ + FROM \ + (SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \ + FROM \ + (SELECT DISTINCT \ + `datoms00`.e AS `?e`, \ + `datoms00`.v AS `?t` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99)) \ + WHERE `(max ?t)` IS NOT NULL"); + assert_eq!(args, vec![]); +} + #[test] fn test_tx_before_and_after() { let schema = prepopulated_typed_schema(ValueType::Long); diff --git a/tests/query.rs b/tests/query.rs index 3cdb72c9..edca7983 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -1193,6 +1193,94 @@ fn test_aggregate_the() { TypedValue::typed_string("(1) Facebook")]].into()); } +#[test] +fn test_null_aggregates() { + let store = Store::open("").expect("opened"); + + let rel = + store.q_once(r#" + [:find (count ?tx) (max ?txInstant) + :where [_ _ _ ?tx] + [?tx :db/txInstant ?txInstant] + [(< ?txInstant #inst "2016-01-01T11:00:00.000Z")] + ]"#, None) + .into_rel_result() + .expect("no results"); + + // (count ?tx) is 0, but (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. + // We reject the entire row containing NULL aggregates. + assert_eq!(0, rel.row_count()); + + let rel_pull = + store.q_once(r#" + [:find (count ?tx) (max ?txInstant) (pull ?tx [*]) + :where [_ _ _ ?tx] + [?tx :db/txInstant ?txInstant] + [(< ?txInstant #inst "2016-01-01T11:00:00.000Z")] + ]"#, None) + .into_rel_result() + .expect("no results"); + + // Same logic as above -- just verifying that `RelTwoStagePullProjector` handles NULL. + assert_eq!(0, rel_pull.row_count()); + + let coll = + store.q_once(r#" + [:find [(max ?txInstant) ...] + :where [_ _ _ ?tx] + [?tx :db/txInstant ?txInstant] + [(< ?txInstant #inst "2016-01-01T11:00:00.000Z")] + ]"#, None) + .into_coll_result() + .expect("no results"); + + // (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. We reject the entire + // row containing NULL aggregates, yielding an empty vector of results. + assert_eq!(coll, vec![]); + + let tuple = + store.q_once(r#" + [:find [(count ?tx) (max ?txInstant)] + :where [_ _ _ ?tx] + [?tx :db/txInstant ?txInstant] + [(< ?txInstant #inst "2016-01-01T11:00:00.000Z")] + ]"#, None) + .into_tuple_result() + .expect("no results"); + + // (count ?tx) is 0, but (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. + // We reject the entire row containing NULL aggregates, yielding no tuple result at all. + assert_eq!(tuple, None); + + + let tuple_pull = + store.q_once(r#" + [:find [(count ?tx) (max ?txInstant) (pull ?tx [*])] + :where [_ _ _ ?tx] + [?tx :db/txInstant ?txInstant] + [(< ?txInstant #inst "2016-01-01T11:00:00.000Z")] + ]"#, None) + .into_tuple_result() + .expect("no results"); + + // Same logic as above -- just verifying that `CollTwoStagePullProjector` handles NULL. + assert_eq!(tuple_pull, None); + + let scalar = + store.q_once(r#" + [:find (max ?txInstant) . + :where [_ _ _ ?tx] + [?tx :db/txInstant ?txInstant] + [(< ?txInstant #inst "2016-01-01T11:00:00.000Z")] + ]"#, None) + .into_scalar_result() + .expect("no results"); + + // (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. We reject the entire + // row containing NULL aggregates, yielding no scalar result at all. + assert_eq!(scalar, None); +} + #[test] fn test_aggregation_implicit_grouping() { let mut store = Store::open("").expect("opened");