Correctly generate DISTINCT and LIMIT. (#386) r=nalexander

This commit is contained in:
Richard Newman 2017-03-22 14:02:00 -07:00 committed by GitHub
parent 5e971f3b22
commit 88df7b3b33
5 changed files with 96 additions and 51 deletions

View file

@ -85,11 +85,12 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result<AlgebraicQuery> {
}
}
let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None };
Ok(AlgebraicQuery {
default_source: parsed.default_source,
find_spec: parsed.find_spec,
has_aggregates: false, // TODO: we don't parse them yet.
limit: None,
limit: limit,
cc: cc,
})
}

View file

@ -280,6 +280,7 @@ impl ScalarProjector {
CombinedProjection {
sql_projection: sql,
datalog_projector: Box::new(ScalarProjector::with_template(template)),
distinct: false,
}
}
}
@ -324,6 +325,7 @@ impl TupleProjector {
CombinedProjection {
sql_projection: sql,
datalog_projector: Box::new(p),
distinct: false,
}
}
}
@ -371,6 +373,7 @@ impl RelProjector {
CombinedProjection {
sql_projection: sql,
datalog_projector: Box::new(p),
distinct: true,
}
}
}
@ -405,6 +408,7 @@ impl CollProjector {
CombinedProjection {
sql_projection: sql,
datalog_projector: Box::new(CollProjector::with_template(template)),
distinct: true,
}
}
}
@ -431,6 +435,18 @@ pub struct CombinedProjection {
/// A Datalog projection. This consumes rows of the appropriate shape (as defined by
/// the SQL projection) to yield one of the four kinds of Datalog query result.
pub datalog_projector: Box<Projector>,
/// True if this query requires the SQL query to include DISTINCT.
pub distinct: bool,
}
impl CombinedProjection {
fn flip_distinct_for_limit(mut self, limit: Option<u64>) -> Self {
if limit == Some(1) {
self.distinct = false;
}
self
}
}
/// Compute a suitable SQL projection for an algebrized query.
@ -450,14 +466,14 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection {
let constant_projector = ConstantProjector::new(empty);
CombinedProjection {
sql_projection: Projection::One,
datalog_projector: Box::new(constant_projector),
distinct: false,
}
} else {
match query.find_spec {
FindColl(ref element) => {
let (cols, templates) = project_elements(1, iter::once(element), query);
CollProjector::combine(cols, templates)
CollProjector::combine(cols, templates).flip_distinct_for_limit(query.limit)
},
FindScalar(ref element) => {
@ -468,7 +484,7 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection {
FindRel(ref elements) => {
let column_count = query.find_spec.expected_column_count();
let (cols, templates) = project_elements(column_count, elements, query);
RelProjector::combine(column_count, cols, templates)
RelProjector::combine(column_count, cols, templates).flip_distinct_for_limit(query.limit)
},
FindTuple(ref elements) => {
@ -478,5 +494,4 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection {
},
}
}
}
}

View file

@ -142,6 +142,7 @@ pub enum FromClause {
}
pub struct SelectQuery {
pub distinct: bool,
pub projection: Projection,
pub from: FromClause,
pub constraints: Vec<Constraint>,
@ -348,7 +349,11 @@ impl QueryFragment for FromClause {
impl QueryFragment for SelectQuery {
fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult {
out.push_sql("SELECT ");
if self.distinct {
out.push_sql("SELECT DISTINCT ");
} else {
out.push_sql("SELECT ");
}
self.projection.push_sql(out)?;
self.from.push_sql(out)?;
@ -444,7 +449,6 @@ mod tests {
#[test]
fn test_end_to_end() {
// [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]]
let datoms00 = "datoms00".to_string();
let datoms01 = "datoms01".to_string();
@ -453,7 +457,9 @@ mod tests {
SourceAlias(DatomsTable::Datoms, datoms00.clone()),
SourceAlias(DatomsTable::Datoms, datoms01.clone()),
];
let query = SelectQuery {
let mut query = SelectQuery {
distinct: true,
projection: Projection::Columns(
vec![
ProjectedColumn(
@ -481,9 +487,17 @@ mod tests {
limit: None,
};
let SQLQuery { sql, args } = query.to_sql_query().unwrap();
println!("{}", sql);
assert_eq!("SELECT DISTINCT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql);
assert!(args.is_empty());
// And without distinct…
query.distinct = false;
let SQLQuery { sql, args } = query.to_sql_query().unwrap();
println!("{}", sql);
assert_eq!("SELECT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql);
assert!(args.is_empty());
}
}

View file

@ -135,20 +135,23 @@ pub struct ProjectedSelect{
/// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a
/// query that runs SQL. The next level up the call stack can check for known-empty queries if
/// needed.
fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: ConjoiningClauses, limit: T) -> SelectQuery {
fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: ConjoiningClauses, distinct: bool, limit: T) -> SelectQuery {
let from = if cc.from.is_empty() {
FromClause::Nothing
} else {
FromClause::TableList(TableList(cc.from))
};
let limit = if cc.is_known_empty { Some(0) } else { limit.into() };
SelectQuery {
distinct: distinct,
projection: projection,
from: from,
constraints: cc.wheres
.into_iter()
.map(|c| c.to_constraint())
.collect(),
limit: if cc.is_known_empty { Some(0) } else { limit.into() },
limit: limit,
}
}
@ -158,28 +161,25 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery {
if cc.is_known_empty {
// In this case we can produce a very simple query that returns no results.
SelectQuery {
distinct: false,
projection: Projection::One,
from: FromClause::Nothing,
constraints: vec![],
limit: Some(0),
}
} else {
cc_to_select_query(Projection::One, cc, 1)
}
}
/// Consume a provided `ConjoiningClauses` to yield a new
/// `ProjectedSelect`. A projection list must also be provided.
fn cc_to_select(projection: CombinedProjection, cc: ConjoiningClauses, limit: Option<u64>) -> ProjectedSelect {
let CombinedProjection { sql_projection, datalog_projector } = projection;
ProjectedSelect {
query: cc_to_select_query(sql_projection, cc, limit),
projector: datalog_projector,
cc_to_select_query(Projection::One, cc, false, 1)
}
}
/// Consume a provided `AlgebraicQuery` to yield a new
/// `ProjectedSelect`.
pub fn query_to_select(query: AlgebraicQuery) -> ProjectedSelect {
cc_to_select(query_projection(&query), query.cc, query.limit)
// TODO: we can't pass `query.limit` here if we aggregate during projection.
// SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine.
let CombinedProjection { sql_projection, datalog_projector, distinct } = query_projection(&query);
ProjectedSelect {
query: cc_to_select_query(sql_projection, query.cc, distinct, query.limit),
projector: datalog_projector,
}
}

View file

@ -49,48 +49,63 @@ fn translate<T: Into<Option<u64>>>(schema: &Schema, input: &'static str, limit:
select.query.to_sql_query().unwrap()
}
#[test]
fn test_coll() {
fn prepopulated_schema() -> Schema {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::String,
..Default::default()
});
schema
}
#[test]
fn test_scalar() {
let schema = prepopulated_schema();
let input = r#"[:find ?x . :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
}
#[test]
fn test_tuple() {
let schema = prepopulated_schema();
let input = r#"[:find [?x] :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
}
#[test]
fn test_coll() {
let schema = prepopulated_schema();
let input = r#"[:find [?x ...] :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
}
#[test]
fn test_rel() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::String,
..Default::default()
});
let schema = prepopulated_schema();
let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
}
#[test]
fn test_limit() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::String,
..Default::default()
});
let schema = prepopulated_schema();
let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, 5);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 5");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 5");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
}
@ -102,7 +117,7 @@ fn test_unknown_attribute_keyword_value() {
let SQLQuery { sql, args } = translate(&schema, input, None);
// Only match keywords, not strings: tag = 13.
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13");
assert_eq!(args, vec![("$v0".to_string(), ":ab/yyy".to_string())]);
}
@ -115,7 +130,7 @@ fn test_unknown_attribute_string_value() {
// We expect all_datoms because we're querying for a string. Magic, that.
// We don't want keywords etc., so tag = 10.
assert_eq!(sql, "SELECT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10");
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10");
assert_eq!(args, vec![("$v0".to_string(), "horses".to_string())]);
}
@ -128,7 +143,7 @@ fn test_unknown_attribute_double_value() {
// In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to
// make sure we only match numbers.
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95 AND `datoms00`.value_type_tag = 5");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95 AND `datoms00`.value_type_tag = 5");
assert_eq!(args, vec![]);
}
@ -143,22 +158,22 @@ fn test_unknown_attribute_integer_value() {
// Can't match boolean; no need to filter it out.
let SQLQuery { sql, args } = translate(&schema, negative, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = -1");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = -1");
assert_eq!(args, vec![]);
// Excludes booleans.
let SQLQuery { sql, args } = translate(&schema, zero, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 0 AND `datoms00`.value_type_tag <> 1)");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 0 AND `datoms00`.value_type_tag <> 1)");
assert_eq!(args, vec![]);
// Excludes booleans.
let SQLQuery { sql, args } = translate(&schema, one, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 1 AND `datoms00`.value_type_tag <> 1)");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 1 AND `datoms00`.value_type_tag <> 1)");
assert_eq!(args, vec![]);
// Can't match boolean; no need to filter it out.
let SQLQuery { sql, args } = translate(&schema, two, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 2");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 2");
assert_eq!(args, vec![]);
}
@ -188,7 +203,7 @@ fn test_numeric_less_than_unknown_attribute() {
// Although we infer numericness from numeric predicates, we've already assigned a table to the
// first pattern, and so this is _still_ `all_datoms`.
assert_eq!(sql, "SELECT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v < 10");
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v < 10");
assert_eq!(args, vec![]);
}
@ -203,7 +218,7 @@ fn test_numeric_gte_known_attribute() {
let input = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9");
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9");
assert_eq!(args, vec![]);
}
@ -216,8 +231,8 @@ fn test_numeric_not_equals_known_attribute() {
..Default::default()
});
let input = r#"[:find ?x :where [?x :foo/bar ?y] [(!= ?y 12)]]"#;
let input = r#"[:find ?x . :where [?x :foo/bar ?y] [(!= ?y 12)]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12");
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1");
assert_eq!(args, vec![]);
}