Correctly generate DISTINCT and LIMIT. (#386) r=nalexander

This commit is contained in:
Richard Newman 2017-03-22 14:02:00 -07:00 committed by GitHub
parent 5e971f3b22
commit 88df7b3b33
5 changed files with 96 additions and 51 deletions

View file

@ -85,11 +85,12 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result<AlgebraicQuery> {
} }
} }
let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None };
Ok(AlgebraicQuery { Ok(AlgebraicQuery {
default_source: parsed.default_source, default_source: parsed.default_source,
find_spec: parsed.find_spec, find_spec: parsed.find_spec,
has_aggregates: false, // TODO: we don't parse them yet. has_aggregates: false, // TODO: we don't parse them yet.
limit: None, limit: limit,
cc: cc, cc: cc,
}) })
} }

View file

@ -280,6 +280,7 @@ impl ScalarProjector {
CombinedProjection { CombinedProjection {
sql_projection: sql, sql_projection: sql,
datalog_projector: Box::new(ScalarProjector::with_template(template)), datalog_projector: Box::new(ScalarProjector::with_template(template)),
distinct: false,
} }
} }
} }
@ -324,6 +325,7 @@ impl TupleProjector {
CombinedProjection { CombinedProjection {
sql_projection: sql, sql_projection: sql,
datalog_projector: Box::new(p), datalog_projector: Box::new(p),
distinct: false,
} }
} }
} }
@ -371,6 +373,7 @@ impl RelProjector {
CombinedProjection { CombinedProjection {
sql_projection: sql, sql_projection: sql,
datalog_projector: Box::new(p), datalog_projector: Box::new(p),
distinct: true,
} }
} }
} }
@ -405,6 +408,7 @@ impl CollProjector {
CombinedProjection { CombinedProjection {
sql_projection: sql, sql_projection: sql,
datalog_projector: Box::new(CollProjector::with_template(template)), datalog_projector: Box::new(CollProjector::with_template(template)),
distinct: true,
} }
} }
} }
@ -431,6 +435,18 @@ pub struct CombinedProjection {
/// A Datalog projection. This consumes rows of the appropriate shape (as defined by /// A Datalog projection. This consumes rows of the appropriate shape (as defined by
/// the SQL projection) to yield one of the four kinds of Datalog query result. /// the SQL projection) to yield one of the four kinds of Datalog query result.
pub datalog_projector: Box<Projector>, pub datalog_projector: Box<Projector>,
/// True if this query requires the SQL query to include DISTINCT.
pub distinct: bool,
}
impl CombinedProjection {
fn flip_distinct_for_limit(mut self, limit: Option<u64>) -> Self {
if limit == Some(1) {
self.distinct = false;
}
self
}
} }
/// Compute a suitable SQL projection for an algebrized query. /// Compute a suitable SQL projection for an algebrized query.
@ -450,14 +466,14 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection {
let constant_projector = ConstantProjector::new(empty); let constant_projector = ConstantProjector::new(empty);
CombinedProjection { CombinedProjection {
sql_projection: Projection::One, sql_projection: Projection::One,
datalog_projector: Box::new(constant_projector), datalog_projector: Box::new(constant_projector),
distinct: false,
} }
} else { } else {
match query.find_spec { match query.find_spec {
FindColl(ref element) => { FindColl(ref element) => {
let (cols, templates) = project_elements(1, iter::once(element), query); let (cols, templates) = project_elements(1, iter::once(element), query);
CollProjector::combine(cols, templates) CollProjector::combine(cols, templates).flip_distinct_for_limit(query.limit)
}, },
FindScalar(ref element) => { FindScalar(ref element) => {
@ -468,7 +484,7 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection {
FindRel(ref elements) => { FindRel(ref elements) => {
let column_count = query.find_spec.expected_column_count(); let column_count = query.find_spec.expected_column_count();
let (cols, templates) = project_elements(column_count, elements, query); let (cols, templates) = project_elements(column_count, elements, query);
RelProjector::combine(column_count, cols, templates) RelProjector::combine(column_count, cols, templates).flip_distinct_for_limit(query.limit)
}, },
FindTuple(ref elements) => { FindTuple(ref elements) => {
@ -478,5 +494,4 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection {
}, },
} }
} }
} }

View file

@ -142,6 +142,7 @@ pub enum FromClause {
} }
pub struct SelectQuery { pub struct SelectQuery {
pub distinct: bool,
pub projection: Projection, pub projection: Projection,
pub from: FromClause, pub from: FromClause,
pub constraints: Vec<Constraint>, pub constraints: Vec<Constraint>,
@ -348,7 +349,11 @@ impl QueryFragment for FromClause {
impl QueryFragment for SelectQuery { impl QueryFragment for SelectQuery {
fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult {
out.push_sql("SELECT "); if self.distinct {
out.push_sql("SELECT DISTINCT ");
} else {
out.push_sql("SELECT ");
}
self.projection.push_sql(out)?; self.projection.push_sql(out)?;
self.from.push_sql(out)?; self.from.push_sql(out)?;
@ -444,7 +449,6 @@ mod tests {
#[test] #[test]
fn test_end_to_end() { fn test_end_to_end() {
// [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]] // [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]]
let datoms00 = "datoms00".to_string(); let datoms00 = "datoms00".to_string();
let datoms01 = "datoms01".to_string(); let datoms01 = "datoms01".to_string();
@ -453,7 +457,9 @@ mod tests {
SourceAlias(DatomsTable::Datoms, datoms00.clone()), SourceAlias(DatomsTable::Datoms, datoms00.clone()),
SourceAlias(DatomsTable::Datoms, datoms01.clone()), SourceAlias(DatomsTable::Datoms, datoms01.clone()),
]; ];
let query = SelectQuery {
let mut query = SelectQuery {
distinct: true,
projection: Projection::Columns( projection: Projection::Columns(
vec![ vec![
ProjectedColumn( ProjectedColumn(
@ -481,9 +487,17 @@ mod tests {
limit: None, limit: None,
}; };
let SQLQuery { sql, args } = query.to_sql_query().unwrap();
println!("{}", sql);
assert_eq!("SELECT DISTINCT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql);
assert!(args.is_empty());
// And without distinct…
query.distinct = false;
let SQLQuery { sql, args } = query.to_sql_query().unwrap(); let SQLQuery { sql, args } = query.to_sql_query().unwrap();
println!("{}", sql); println!("{}", sql);
assert_eq!("SELECT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql); assert_eq!("SELECT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql);
assert!(args.is_empty()); assert!(args.is_empty());
} }
} }

View file

@ -135,20 +135,23 @@ pub struct ProjectedSelect{
/// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a /// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a
/// query that runs SQL. The next level up the call stack can check for known-empty queries if /// query that runs SQL. The next level up the call stack can check for known-empty queries if
/// needed. /// needed.
fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: ConjoiningClauses, limit: T) -> SelectQuery { fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: ConjoiningClauses, distinct: bool, limit: T) -> SelectQuery {
let from = if cc.from.is_empty() { let from = if cc.from.is_empty() {
FromClause::Nothing FromClause::Nothing
} else { } else {
FromClause::TableList(TableList(cc.from)) FromClause::TableList(TableList(cc.from))
}; };
let limit = if cc.is_known_empty { Some(0) } else { limit.into() };
SelectQuery { SelectQuery {
distinct: distinct,
projection: projection, projection: projection,
from: from, from: from,
constraints: cc.wheres constraints: cc.wheres
.into_iter() .into_iter()
.map(|c| c.to_constraint()) .map(|c| c.to_constraint())
.collect(), .collect(),
limit: if cc.is_known_empty { Some(0) } else { limit.into() }, limit: limit,
} }
} }
@ -158,28 +161,25 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery {
if cc.is_known_empty { if cc.is_known_empty {
// In this case we can produce a very simple query that returns no results. // In this case we can produce a very simple query that returns no results.
SelectQuery { SelectQuery {
distinct: false,
projection: Projection::One, projection: Projection::One,
from: FromClause::Nothing, from: FromClause::Nothing,
constraints: vec![], constraints: vec![],
limit: Some(0), limit: Some(0),
} }
} else { } else {
cc_to_select_query(Projection::One, cc, 1) cc_to_select_query(Projection::One, cc, false, 1)
}
}
/// Consume a provided `ConjoiningClauses` to yield a new
/// `ProjectedSelect`. A projection list must also be provided.
fn cc_to_select(projection: CombinedProjection, cc: ConjoiningClauses, limit: Option<u64>) -> ProjectedSelect {
let CombinedProjection { sql_projection, datalog_projector } = projection;
ProjectedSelect {
query: cc_to_select_query(sql_projection, cc, limit),
projector: datalog_projector,
} }
} }
/// Consume a provided `AlgebraicQuery` to yield a new /// Consume a provided `AlgebraicQuery` to yield a new
/// `ProjectedSelect`. /// `ProjectedSelect`.
pub fn query_to_select(query: AlgebraicQuery) -> ProjectedSelect { pub fn query_to_select(query: AlgebraicQuery) -> ProjectedSelect {
cc_to_select(query_projection(&query), query.cc, query.limit) // TODO: we can't pass `query.limit` here if we aggregate during projection.
// SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine.
let CombinedProjection { sql_projection, datalog_projector, distinct } = query_projection(&query);
ProjectedSelect {
query: cc_to_select_query(sql_projection, query.cc, distinct, query.limit),
projector: datalog_projector,
}
} }

View file

@ -49,48 +49,63 @@ fn translate<T: Into<Option<u64>>>(schema: &Schema, input: &'static str, limit:
select.query.to_sql_query().unwrap() select.query.to_sql_query().unwrap()
} }
#[test] fn prepopulated_schema() -> Schema {
fn test_coll() {
let mut schema = Schema::default(); let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute { add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::String, value_type: ValueType::String,
..Default::default() ..Default::default()
}); });
schema
}
#[test]
fn test_scalar() {
let schema = prepopulated_schema();
let input = r#"[:find ?x . :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
}
#[test]
fn test_tuple() {
let schema = prepopulated_schema();
let input = r#"[:find [?x] :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
}
#[test]
fn test_coll() {
let schema = prepopulated_schema();
let input = r#"[:find [?x ...] :where [?x :foo/bar "yyy"]]"#; let input = r#"[:find [?x ...] :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None); let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
} }
#[test] #[test]
fn test_rel() { fn test_rel() {
let mut schema = Schema::default(); let schema = prepopulated_schema();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::String,
..Default::default()
});
let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#; let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None); let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
} }
#[test] #[test]
fn test_limit() { fn test_limit() {
let mut schema = Schema::default(); let schema = prepopulated_schema();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::String,
..Default::default()
});
let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#; let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#;
let SQLQuery { sql, args } = translate(&schema, input, 5); let SQLQuery { sql, args } = translate(&schema, input, 5);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 5"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 5");
assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]);
} }
@ -102,7 +117,7 @@ fn test_unknown_attribute_keyword_value() {
let SQLQuery { sql, args } = translate(&schema, input, None); let SQLQuery { sql, args } = translate(&schema, input, None);
// Only match keywords, not strings: tag = 13. // Only match keywords, not strings: tag = 13.
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13");
assert_eq!(args, vec![("$v0".to_string(), ":ab/yyy".to_string())]); assert_eq!(args, vec![("$v0".to_string(), ":ab/yyy".to_string())]);
} }
@ -115,7 +130,7 @@ fn test_unknown_attribute_string_value() {
// We expect all_datoms because we're querying for a string. Magic, that. // We expect all_datoms because we're querying for a string. Magic, that.
// We don't want keywords etc., so tag = 10. // We don't want keywords etc., so tag = 10.
assert_eq!(sql, "SELECT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10"); assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10");
assert_eq!(args, vec![("$v0".to_string(), "horses".to_string())]); assert_eq!(args, vec![("$v0".to_string(), "horses".to_string())]);
} }
@ -128,7 +143,7 @@ fn test_unknown_attribute_double_value() {
// In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to // In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to
// make sure we only match numbers. // make sure we only match numbers.
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95 AND `datoms00`.value_type_tag = 5"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95 AND `datoms00`.value_type_tag = 5");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -143,22 +158,22 @@ fn test_unknown_attribute_integer_value() {
// Can't match boolean; no need to filter it out. // Can't match boolean; no need to filter it out.
let SQLQuery { sql, args } = translate(&schema, negative, None); let SQLQuery { sql, args } = translate(&schema, negative, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = -1"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = -1");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
// Excludes booleans. // Excludes booleans.
let SQLQuery { sql, args } = translate(&schema, zero, None); let SQLQuery { sql, args } = translate(&schema, zero, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 0 AND `datoms00`.value_type_tag <> 1)"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 0 AND `datoms00`.value_type_tag <> 1)");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
// Excludes booleans. // Excludes booleans.
let SQLQuery { sql, args } = translate(&schema, one, None); let SQLQuery { sql, args } = translate(&schema, one, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 1 AND `datoms00`.value_type_tag <> 1)"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 1 AND `datoms00`.value_type_tag <> 1)");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
// Can't match boolean; no need to filter it out. // Can't match boolean; no need to filter it out.
let SQLQuery { sql, args } = translate(&schema, two, None); let SQLQuery { sql, args } = translate(&schema, two, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 2"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 2");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -188,7 +203,7 @@ fn test_numeric_less_than_unknown_attribute() {
// Although we infer numericness from numeric predicates, we've already assigned a table to the // Although we infer numericness from numeric predicates, we've already assigned a table to the
// first pattern, and so this is _still_ `all_datoms`. // first pattern, and so this is _still_ `all_datoms`.
assert_eq!(sql, "SELECT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v < 10"); assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v < 10");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -203,7 +218,7 @@ fn test_numeric_gte_known_attribute() {
let input = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#; let input = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None); let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9"); assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
@ -216,8 +231,8 @@ fn test_numeric_not_equals_known_attribute() {
..Default::default() ..Default::default()
}); });
let input = r#"[:find ?x :where [?x :foo/bar ?y] [(!= ?y 12)]]"#; let input = r#"[:find ?x . :where [?x :foo/bar ?y] [(!= ?y 12)]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None); let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12"); assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }