This uses a `SELECT *` from an inner subselect to filter potentially `NULL` aggregates. The alternative is to handle `NULL` values throughout the projector, which is simple but loses a valuable invariant: Mentat SQL queries produce values that are not `NULL`.
This commit is contained in:
parent
2a025916fe
commit
d8d18a1731
6 changed files with 283 additions and 14 deletions
|
@ -330,7 +330,7 @@ impl Debug for QueryValue {
|
|||
|
||||
/// Represents an entry in the ORDER BY list: a variable or a variable's type tag.
|
||||
/// (We require order vars to be projected, so we can simply use a variable here.)
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct OrderBy(pub Direction, pub VariableColumn);
|
||||
|
||||
impl From<Order> for OrderBy {
|
||||
|
|
|
@ -164,6 +164,15 @@ impl SimpleAggregate {
|
|||
Count | Sum => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return `true` if this aggregate can be `NULL` over 0 rows.
|
||||
pub(crate) fn is_nullable(&self) -> bool {
|
||||
use self::SimpleAggregationOp::*;
|
||||
match self.op {
|
||||
Avg | Max | Min => true,
|
||||
Count | Sum => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait SimpleAggregation {
|
||||
|
@ -201,8 +210,12 @@ pub(crate) fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc
|
|||
sql_op: simple.op.to_sql(),
|
||||
arg: ColumnOrExpression::Value(value),
|
||||
};
|
||||
if simple.is_nullable() {
|
||||
ColumnOrExpression::NullableAggregate(Box::new(expression), return_type)
|
||||
} else {
|
||||
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// The common case: the values are bound during execution.
|
||||
let name = VariableColumn::Variable(simple.var.clone()).column_name();
|
||||
|
@ -210,7 +223,11 @@ pub(crate) fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc
|
|||
sql_op: simple.op.to_sql(),
|
||||
arg: ColumnOrExpression::ExistingColumn(name),
|
||||
};
|
||||
if simple.is_nullable() {
|
||||
ColumnOrExpression::NullableAggregate(Box::new(expression), return_type)
|
||||
} else {
|
||||
ColumnOrExpression::Expression(Box::new(expression), return_type)
|
||||
}
|
||||
};
|
||||
Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type))
|
||||
}
|
||||
|
|
|
@ -62,6 +62,9 @@ pub enum ColumnOrExpression {
|
|||
Integer(i32), // We use these for type codes etc.
|
||||
Long(i64),
|
||||
Value(TypedValue),
|
||||
// Some aggregates (`min`, `max`, `avg`) can be over 0 rows, and therefore can be `NULL`; that
|
||||
// needs special treatment.
|
||||
NullableAggregate(Box<Expression>, ValueType), // Track the return type.
|
||||
Expression(Box<Expression>, ValueType), // Track the return type.
|
||||
}
|
||||
|
||||
|
@ -130,6 +133,12 @@ pub enum Constraint {
|
|||
left: ColumnOrExpression,
|
||||
list: Vec<ColumnOrExpression>,
|
||||
},
|
||||
IsNull {
|
||||
value: ColumnOrExpression,
|
||||
},
|
||||
IsNotNull {
|
||||
value: ColumnOrExpression,
|
||||
},
|
||||
NotExists {
|
||||
subquery: TableOrSubquery,
|
||||
},
|
||||
|
@ -278,6 +287,7 @@ impl QueryFragment for ColumnOrExpression {
|
|||
&Value(ref v) => {
|
||||
out.push_typed_value(v)
|
||||
},
|
||||
&NullableAggregate(ref e, _) |
|
||||
&Expression(ref e, _) => {
|
||||
e.push_sql(out)
|
||||
},
|
||||
|
@ -343,6 +353,18 @@ impl QueryFragment for Constraint {
|
|||
right.push_sql(out)
|
||||
},
|
||||
|
||||
&IsNull { ref value } => {
|
||||
value.push_sql(out)?;
|
||||
out.push_sql(" IS NULL");
|
||||
Ok(())
|
||||
},
|
||||
|
||||
&IsNotNull { ref value } => {
|
||||
value.push_sql(out)?;
|
||||
out.push_sql(" IS NOT NULL");
|
||||
Ok(())
|
||||
},
|
||||
|
||||
&And { ref constraints } => {
|
||||
// An empty intersection is true.
|
||||
if constraints.is_empty() {
|
||||
|
|
|
@ -427,7 +427,27 @@ fn re_project(mut inner: SelectQuery, projection: Projection) -> SelectQuery {
|
|||
let limit = inner.limit;
|
||||
inner.limit = Limit::None;
|
||||
|
||||
SelectQuery {
|
||||
use self::Projection::*;
|
||||
|
||||
let nullable = match &projection {
|
||||
&Columns(ref columns) => {
|
||||
columns.iter().filter_map(|pc| {
|
||||
match pc {
|
||||
&ProjectedColumn(ColumnOrExpression::NullableAggregate(_, _), ref name) => {
|
||||
Some(Constraint::IsNotNull {
|
||||
value: ColumnOrExpression::ExistingColumn(name.clone()),
|
||||
})
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}).collect()
|
||||
},
|
||||
&Star => vec![],
|
||||
&One => vec![],
|
||||
};
|
||||
|
||||
if nullable.is_empty() {
|
||||
return SelectQuery {
|
||||
distinct: outer_distinct,
|
||||
projection: projection,
|
||||
from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])),
|
||||
|
@ -435,6 +455,35 @@ fn re_project(mut inner: SelectQuery, projection: Projection) -> SelectQuery {
|
|||
group_by: group_by,
|
||||
order: order_by,
|
||||
limit: limit,
|
||||
};
|
||||
}
|
||||
|
||||
// Our pattern is `SELECT * FROM (SELECT ...) WHERE (nullable aggregate) IS NOT NULL`. If
|
||||
// there's an `ORDER BY` in the subselect, SQL does not guarantee that the outer select will
|
||||
// respect that order. But `ORDER BY` is relevant to the subselect when we have a `LIMIT`.
|
||||
// Thus we lift the `ORDER BY` if there’s no `LIMIT` in the subselect, and repeat the `ORDER BY`
|
||||
// if there is.
|
||||
let subselect = SelectQuery {
|
||||
distinct: outer_distinct,
|
||||
projection: projection,
|
||||
from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(inner))])),
|
||||
constraints: vec![],
|
||||
group_by: group_by,
|
||||
order: match &limit {
|
||||
&Limit::None => vec![],
|
||||
&Limit::Fixed(_) | &Limit::Variable(_) => order_by.clone(),
|
||||
},
|
||||
limit,
|
||||
};
|
||||
|
||||
SelectQuery {
|
||||
distinct: false,
|
||||
projection: Projection::Star,
|
||||
from: FromClause::TableList(TableList(vec![TableOrSubquery::Subquery(Box::new(subselect))])),
|
||||
constraints: nullable,
|
||||
group_by: vec![],
|
||||
order: order_by,
|
||||
limit: Limit::None, // Any limiting comes from the internal query.
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1091,14 +1091,17 @@ fn test_project_aggregates() {
|
|||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
|
||||
// No outer DISTINCT: we aggregate or group by every variable.
|
||||
assert_eq!(sql, "SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \
|
||||
assert_eq!(sql, "SELECT * \
|
||||
FROM \
|
||||
(SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \
|
||||
FROM \
|
||||
(SELECT DISTINCT \
|
||||
`datoms00`.e AS `?e`, \
|
||||
`datoms00`.v AS `?t` \
|
||||
FROM `datoms` AS `datoms00` \
|
||||
WHERE `datoms00`.a = 99) \
|
||||
GROUP BY `?e`");
|
||||
GROUP BY `?e`) \
|
||||
WHERE `(max ?t)` IS NOT NULL");
|
||||
assert_eq!(args, vec![]);
|
||||
|
||||
let query = r#"[:find (max ?t)
|
||||
|
@ -1106,7 +1109,75 @@ fn test_project_aggregates() {
|
|||
:where
|
||||
[?e :foo/bar ?t]]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
assert_eq!(sql, "SELECT max(`?t`) AS `(max ?t)` \
|
||||
assert_eq!(sql, "SELECT * \
|
||||
FROM \
|
||||
(SELECT max(`?t`) AS `(max ?t)` \
|
||||
FROM \
|
||||
(SELECT DISTINCT \
|
||||
`datoms00`.v AS `?t`, \
|
||||
`datoms00`.e AS `?e` \
|
||||
FROM `datoms` AS `datoms00` \
|
||||
WHERE `datoms00`.a = 99)\
|
||||
) \
|
||||
WHERE `(max ?t)` IS NOT NULL");
|
||||
assert_eq!(args, vec![]);
|
||||
|
||||
// ORDER BY lifted to outer query if there is no LIMIT.
|
||||
let query = r#"[:find (max ?x)
|
||||
:with ?e
|
||||
:where
|
||||
[?e ?a ?t]
|
||||
[?t :foo/bar ?x]
|
||||
:order ?a]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
assert_eq!(sql, "SELECT * \
|
||||
FROM \
|
||||
(SELECT max(`?x`) AS `(max ?x)`, `?a` AS `?a` \
|
||||
FROM \
|
||||
(SELECT DISTINCT \
|
||||
`datoms01`.v AS `?x`, \
|
||||
`datoms00`.a AS `?a`, \
|
||||
`datoms00`.e AS `?e` \
|
||||
FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` \
|
||||
WHERE `datoms01`.a = 99 AND `datoms00`.v = `datoms01`.e) \
|
||||
GROUP BY `?a`) \
|
||||
WHERE `(max ?x)` IS NOT NULL \
|
||||
ORDER BY `?a` ASC");
|
||||
assert_eq!(args, vec![]);
|
||||
|
||||
// ORDER BY duplicated in outer query if there is a LIMIT.
|
||||
let query = r#"[:find (max ?x)
|
||||
:with ?e
|
||||
:where
|
||||
[?e ?a ?t]
|
||||
[?t :foo/bar ?x]
|
||||
:order (desc ?a)
|
||||
:limit 10]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
assert_eq!(sql, "SELECT * \
|
||||
FROM \
|
||||
(SELECT max(`?x`) AS `(max ?x)`, `?a` AS `?a` \
|
||||
FROM \
|
||||
(SELECT DISTINCT \
|
||||
`datoms01`.v AS `?x`, \
|
||||
`datoms00`.a AS `?a`, \
|
||||
`datoms00`.e AS `?e` \
|
||||
FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` \
|
||||
WHERE `datoms01`.a = 99 AND `datoms00`.v = `datoms01`.e) \
|
||||
GROUP BY `?a` \
|
||||
ORDER BY `?a` DESC \
|
||||
LIMIT 10) \
|
||||
WHERE `(max ?x)` IS NOT NULL \
|
||||
ORDER BY `?a` DESC");
|
||||
assert_eq!(args, vec![]);
|
||||
|
||||
// No outer SELECT * for non-nullable aggregates.
|
||||
let query = r#"[:find (count ?t)
|
||||
:with ?e
|
||||
:where
|
||||
[?e :foo/bar ?t]]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
assert_eq!(sql, "SELECT count(`?t`) AS `(count ?t)` \
|
||||
FROM \
|
||||
(SELECT DISTINCT \
|
||||
`datoms00`.v AS `?t`, \
|
||||
|
@ -1116,6 +1187,28 @@ fn test_project_aggregates() {
|
|||
assert_eq!(args, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_project_the() {
|
||||
let schema = prepopulated_typed_schema(ValueType::Long);
|
||||
let query = r#"[:find (the ?e) (max ?t)
|
||||
:where
|
||||
[?e :foo/bar ?t]]"#;
|
||||
let SQLQuery { sql, args } = translate(&schema, query);
|
||||
|
||||
// We shouldn't NULL-check (the).
|
||||
assert_eq!(sql, "SELECT * \
|
||||
FROM \
|
||||
(SELECT `?e` AS `?e`, max(`?t`) AS `(max ?t)` \
|
||||
FROM \
|
||||
(SELECT DISTINCT \
|
||||
`datoms00`.e AS `?e`, \
|
||||
`datoms00`.v AS `?t` \
|
||||
FROM `datoms` AS `datoms00` \
|
||||
WHERE `datoms00`.a = 99)) \
|
||||
WHERE `(max ?t)` IS NOT NULL");
|
||||
assert_eq!(args, vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tx_before_and_after() {
|
||||
let schema = prepopulated_typed_schema(ValueType::Long);
|
||||
|
|
|
@ -1193,6 +1193,94 @@ fn test_aggregate_the() {
|
|||
TypedValue::typed_string("(1) Facebook")]].into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_null_aggregates() {
|
||||
let store = Store::open("").expect("opened");
|
||||
|
||||
let rel =
|
||||
store.q_once(r#"
|
||||
[:find (count ?tx) (max ?txInstant)
|
||||
:where [_ _ _ ?tx]
|
||||
[?tx :db/txInstant ?txInstant]
|
||||
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||
]"#, None)
|
||||
.into_rel_result()
|
||||
.expect("no results");
|
||||
|
||||
// (count ?tx) is 0, but (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows.
|
||||
// We reject the entire row containing NULL aggregates.
|
||||
assert_eq!(0, rel.row_count());
|
||||
|
||||
let rel_pull =
|
||||
store.q_once(r#"
|
||||
[:find (count ?tx) (max ?txInstant) (pull ?tx [*])
|
||||
:where [_ _ _ ?tx]
|
||||
[?tx :db/txInstant ?txInstant]
|
||||
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||
]"#, None)
|
||||
.into_rel_result()
|
||||
.expect("no results");
|
||||
|
||||
// Same logic as above -- just verifying that `RelTwoStagePullProjector` handles NULL.
|
||||
assert_eq!(0, rel_pull.row_count());
|
||||
|
||||
let coll =
|
||||
store.q_once(r#"
|
||||
[:find [(max ?txInstant) ...]
|
||||
:where [_ _ _ ?tx]
|
||||
[?tx :db/txInstant ?txInstant]
|
||||
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||
]"#, None)
|
||||
.into_coll_result()
|
||||
.expect("no results");
|
||||
|
||||
// (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. We reject the entire
|
||||
// row containing NULL aggregates, yielding an empty vector of results.
|
||||
assert_eq!(coll, vec![]);
|
||||
|
||||
let tuple =
|
||||
store.q_once(r#"
|
||||
[:find [(count ?tx) (max ?txInstant)]
|
||||
:where [_ _ _ ?tx]
|
||||
[?tx :db/txInstant ?txInstant]
|
||||
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||
]"#, None)
|
||||
.into_tuple_result()
|
||||
.expect("no results");
|
||||
|
||||
// (count ?tx) is 0, but (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows.
|
||||
// We reject the entire row containing NULL aggregates, yielding no tuple result at all.
|
||||
assert_eq!(tuple, None);
|
||||
|
||||
|
||||
let tuple_pull =
|
||||
store.q_once(r#"
|
||||
[:find [(count ?tx) (max ?txInstant) (pull ?tx [*])]
|
||||
:where [_ _ _ ?tx]
|
||||
[?tx :db/txInstant ?txInstant]
|
||||
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||
]"#, None)
|
||||
.into_tuple_result()
|
||||
.expect("no results");
|
||||
|
||||
// Same logic as above -- just verifying that `CollTwoStagePullProjector` handles NULL.
|
||||
assert_eq!(tuple_pull, None);
|
||||
|
||||
let scalar =
|
||||
store.q_once(r#"
|
||||
[:find (max ?txInstant) .
|
||||
:where [_ _ _ ?tx]
|
||||
[?tx :db/txInstant ?txInstant]
|
||||
[(< ?txInstant #inst "2016-01-01T11:00:00.000Z")]
|
||||
]"#, None)
|
||||
.into_scalar_result()
|
||||
.expect("no results");
|
||||
|
||||
// (max ?txInstant) is over 0 SQL rows, yielding a NULL in the SQL rows. We reject the entire
|
||||
// row containing NULL aggregates, yielding no scalar result at all.
|
||||
assert_eq!(scalar, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_aggregation_implicit_grouping() {
|
||||
let mut store = Store::open("").expect("opened");
|
||||
|
|
Loading…
Reference in a new issue