From 88df7b3b33cefff9a10a84d53e320f0dc9ab18bb Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Wed, 22 Mar 2017 14:02:00 -0700 Subject: [PATCH] Correctly generate DISTINCT and LIMIT. (#386) r=nalexander --- query-algebrizer/src/lib.rs | 3 +- query-projector/src/lib.rs | 25 ++++++++-- query-sql/src/lib.rs | 20 ++++++-- query-translator/src/translate.rs | 28 ++++++------ query-translator/tests/translate.rs | 71 +++++++++++++++++------------ 5 files changed, 96 insertions(+), 51 deletions(-) diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index eb5a670a..a5660944 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -85,11 +85,12 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { } } + let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None }; Ok(AlgebraicQuery { default_source: parsed.default_source, find_spec: parsed.find_spec, has_aggregates: false, // TODO: we don't parse them yet. - limit: None, + limit: limit, cc: cc, }) } diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index bdfcb2e7..9ec181cd 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -280,6 +280,7 @@ impl ScalarProjector { CombinedProjection { sql_projection: sql, datalog_projector: Box::new(ScalarProjector::with_template(template)), + distinct: false, } } } @@ -324,6 +325,7 @@ impl TupleProjector { CombinedProjection { sql_projection: sql, datalog_projector: Box::new(p), + distinct: false, } } } @@ -371,6 +373,7 @@ impl RelProjector { CombinedProjection { sql_projection: sql, datalog_projector: Box::new(p), + distinct: true, } } } @@ -405,6 +408,7 @@ impl CollProjector { CombinedProjection { sql_projection: sql, datalog_projector: Box::new(CollProjector::with_template(template)), + distinct: true, } } } @@ -431,6 +435,18 @@ pub struct CombinedProjection { /// A Datalog projection. This consumes rows of the appropriate shape (as defined by /// the SQL projection) to yield one of the four kinds of Datalog query result. pub datalog_projector: Box, + + /// True if this query requires the SQL query to include DISTINCT. + pub distinct: bool, +} + +impl CombinedProjection { + fn flip_distinct_for_limit(mut self, limit: Option) -> Self { + if limit == Some(1) { + self.distinct = false; + } + self + } } /// Compute a suitable SQL projection for an algebrized query. @@ -450,14 +466,14 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection { let constant_projector = ConstantProjector::new(empty); CombinedProjection { sql_projection: Projection::One, - datalog_projector: Box::new(constant_projector), + distinct: false, } } else { match query.find_spec { FindColl(ref element) => { let (cols, templates) = project_elements(1, iter::once(element), query); - CollProjector::combine(cols, templates) + CollProjector::combine(cols, templates).flip_distinct_for_limit(query.limit) }, FindScalar(ref element) => { @@ -468,7 +484,7 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection { FindRel(ref elements) => { let column_count = query.find_spec.expected_column_count(); let (cols, templates) = project_elements(column_count, elements, query); - RelProjector::combine(column_count, cols, templates) + RelProjector::combine(column_count, cols, templates).flip_distinct_for_limit(query.limit) }, FindTuple(ref elements) => { @@ -478,5 +494,4 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection { }, } } -} - +} \ No newline at end of file diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index cd1578b0..d8b7c343 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -142,6 +142,7 @@ pub enum FromClause { } pub struct SelectQuery { + pub distinct: bool, pub projection: Projection, pub from: FromClause, pub constraints: Vec, @@ -348,7 +349,11 @@ impl QueryFragment for FromClause { impl QueryFragment for SelectQuery { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { - out.push_sql("SELECT "); + if self.distinct { + out.push_sql("SELECT DISTINCT "); + } else { + out.push_sql("SELECT "); + } self.projection.push_sql(out)?; self.from.push_sql(out)?; @@ -444,7 +449,6 @@ mod tests { #[test] fn test_end_to_end() { - // [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]] let datoms00 = "datoms00".to_string(); let datoms01 = "datoms01".to_string(); @@ -453,7 +457,9 @@ mod tests { SourceAlias(DatomsTable::Datoms, datoms00.clone()), SourceAlias(DatomsTable::Datoms, datoms01.clone()), ]; - let query = SelectQuery { + + let mut query = SelectQuery { + distinct: true, projection: Projection::Columns( vec![ ProjectedColumn( @@ -481,9 +487,17 @@ mod tests { limit: None, }; + let SQLQuery { sql, args } = query.to_sql_query().unwrap(); + println!("{}", sql); + assert_eq!("SELECT DISTINCT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql); + assert!(args.is_empty()); + + // And without distinct… + query.distinct = false; let SQLQuery { sql, args } = query.to_sql_query().unwrap(); println!("{}", sql); assert_eq!("SELECT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql); assert!(args.is_empty()); + } } diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 632b42dc..7831a450 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -135,20 +135,23 @@ pub struct ProjectedSelect{ /// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a /// query that runs SQL. The next level up the call stack can check for known-empty queries if /// needed. -fn cc_to_select_query>>(projection: Projection, cc: ConjoiningClauses, limit: T) -> SelectQuery { +fn cc_to_select_query>>(projection: Projection, cc: ConjoiningClauses, distinct: bool, limit: T) -> SelectQuery { let from = if cc.from.is_empty() { FromClause::Nothing } else { FromClause::TableList(TableList(cc.from)) }; + + let limit = if cc.is_known_empty { Some(0) } else { limit.into() }; SelectQuery { + distinct: distinct, projection: projection, from: from, constraints: cc.wheres .into_iter() .map(|c| c.to_constraint()) .collect(), - limit: if cc.is_known_empty { Some(0) } else { limit.into() }, + limit: limit, } } @@ -158,28 +161,25 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery { if cc.is_known_empty { // In this case we can produce a very simple query that returns no results. SelectQuery { + distinct: false, projection: Projection::One, from: FromClause::Nothing, constraints: vec![], limit: Some(0), } } else { - cc_to_select_query(Projection::One, cc, 1) - } -} - -/// Consume a provided `ConjoiningClauses` to yield a new -/// `ProjectedSelect`. A projection list must also be provided. -fn cc_to_select(projection: CombinedProjection, cc: ConjoiningClauses, limit: Option) -> ProjectedSelect { - let CombinedProjection { sql_projection, datalog_projector } = projection; - ProjectedSelect { - query: cc_to_select_query(sql_projection, cc, limit), - projector: datalog_projector, + cc_to_select_query(Projection::One, cc, false, 1) } } /// Consume a provided `AlgebraicQuery` to yield a new /// `ProjectedSelect`. pub fn query_to_select(query: AlgebraicQuery) -> ProjectedSelect { - cc_to_select(query_projection(&query), query.cc, query.limit) + // TODO: we can't pass `query.limit` here if we aggregate during projection. + // SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine. + let CombinedProjection { sql_projection, datalog_projector, distinct } = query_projection(&query); + ProjectedSelect { + query: cc_to_select_query(sql_projection, query.cc, distinct, query.limit), + projector: datalog_projector, + } } diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 145fddfd..bcb0710a 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -49,48 +49,63 @@ fn translate>>(schema: &Schema, input: &'static str, limit: select.query.to_sql_query().unwrap() } -#[test] -fn test_coll() { +fn prepopulated_schema() -> Schema { let mut schema = Schema::default(); associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); add_attribute(&mut schema, 99, Attribute { value_type: ValueType::String, ..Default::default() }); + schema +} + +#[test] +fn test_scalar() { + let schema = prepopulated_schema(); + + let input = r#"[:find ?x . :where [?x :foo/bar "yyy"]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); +} + +#[test] +fn test_tuple() { + let schema = prepopulated_schema(); + + let input = r#"[:find [?x] :where [?x :foo/bar "yyy"]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); +} + +#[test] +fn test_coll() { + let schema = prepopulated_schema(); let input = r#"[:find [?x ...] :where [?x :foo/bar "yyy"]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0"); assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); } #[test] fn test_rel() { - let mut schema = Schema::default(); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::String, - ..Default::default() - }); + let schema = prepopulated_schema(); let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0"); assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); } #[test] fn test_limit() { - let mut schema = Schema::default(); - associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99); - add_attribute(&mut schema, 99, Attribute { - value_type: ValueType::String, - ..Default::default() - }); + let schema = prepopulated_schema(); let input = r#"[:find ?x :where [?x :foo/bar "yyy"]]"#; let SQLQuery { sql, args } = translate(&schema, input, 5); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 5"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 5"); assert_eq!(args, vec![("$v0".to_string(), "yyy".to_string())]); } @@ -102,7 +117,7 @@ fn test_unknown_attribute_keyword_value() { let SQLQuery { sql, args } = translate(&schema, input, None); // Only match keywords, not strings: tag = 13. - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = $v0 AND `datoms00`.value_type_tag = 13"); assert_eq!(args, vec![("$v0".to_string(), ":ab/yyy".to_string())]); } @@ -115,7 +130,7 @@ fn test_unknown_attribute_string_value() { // We expect all_datoms because we're querying for a string. Magic, that. // We don't want keywords etc., so tag = 10. - assert_eq!(sql, "SELECT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10"); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v = $v0 AND `all_datoms00`.value_type_tag = 10"); assert_eq!(args, vec![("$v0".to_string(), "horses".to_string())]); } @@ -128,7 +143,7 @@ fn test_unknown_attribute_double_value() { // In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to // make sure we only match numbers. - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95 AND `datoms00`.value_type_tag = 5"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95 AND `datoms00`.value_type_tag = 5"); assert_eq!(args, vec![]); } @@ -143,22 +158,22 @@ fn test_unknown_attribute_integer_value() { // Can't match boolean; no need to filter it out. let SQLQuery { sql, args } = translate(&schema, negative, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = -1"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = -1"); assert_eq!(args, vec![]); // Excludes booleans. let SQLQuery { sql, args } = translate(&schema, zero, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 0 AND `datoms00`.value_type_tag <> 1)"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 0 AND `datoms00`.value_type_tag <> 1)"); assert_eq!(args, vec![]); // Excludes booleans. let SQLQuery { sql, args } = translate(&schema, one, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 1 AND `datoms00`.value_type_tag <> 1)"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE (`datoms00`.v = 1 AND `datoms00`.value_type_tag <> 1)"); assert_eq!(args, vec![]); // Can't match boolean; no need to filter it out. let SQLQuery { sql, args } = translate(&schema, two, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 2"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 2"); assert_eq!(args, vec![]); } @@ -188,7 +203,7 @@ fn test_numeric_less_than_unknown_attribute() { // Although we infer numericness from numeric predicates, we've already assigned a table to the // first pattern, and so this is _still_ `all_datoms`. - assert_eq!(sql, "SELECT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v < 10"); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v < 10"); assert_eq!(args, vec![]); } @@ -203,7 +218,7 @@ fn test_numeric_gte_known_attribute() { let input = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9"); assert_eq!(args, vec![]); } @@ -216,8 +231,8 @@ fn test_numeric_not_equals_known_attribute() { ..Default::default() }); - let input = r#"[:find ?x :where [?x :foo/bar ?y] [(!= ?y 12)]]"#; + let input = r#"[:find ?x . :where [?x :foo/bar ?y] [(!= ?y 12)]]"#; let SQLQuery { sql, args } = translate(&schema, input, None); - assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12"); + assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1"); assert_eq!(args, vec![]); } \ No newline at end of file