From 35d73d55417c0b30a6c0be97d27cd9a389a7999c Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Fri, 14 Apr 2017 16:10:56 -0700 Subject: [PATCH] Implement :order. (#415) (#416) r=nalexander This adds an `:order` keyword to `:find`. If present, the results of the query will be an ordered set, rather than an unordered set; rows will appear in an ordered defined by each `:order` entry. Each can be one of three things: - A var, `?x`, meaning "order by ?x ascending". - A pair, `(asc ?x)`, meaning "order by ?x ascending". - A pair, `(desc ?x)`, meaning "order by ?x descending". Values will be ordered in this sequence for asc, and in reverse for desc: 1. Entity IDs, in ascending numerical order. 2. Booleans, false then true. 3. Timestamps, in ascending numerical order. 4. Longs and doubles, intermixed, in ascending numerical order. 5. Strings, in ascending lexicographic order. 6. Keywords, in ascending lexicographic order, considering the entire ns/name pair as a single string separated by '/'. Subcommits: Pre: make bound_value public. Pre: generalize ErrorKind::UnboundVariable for use in order. Part 1: parse (direction, var) pairs. Part 2: parse :order clause into FindQuery. Part 3: include order variables in algebrized query. We add order variables to :with, so we can reuse its type tag projection logic, and so that we can phrase ordering in terms of variables rather than datoms columns. Part 4: produce SQL for order clauses. --- query-algebrizer/src/clauses/mod.rs | 2 +- query-algebrizer/src/errors.rs | 2 +- query-algebrizer/src/lib.rs | 46 +++++++++++++++++++++++++++-- query-algebrizer/src/types.rs | 13 ++++++++ query-parser/src/parse.rs | 36 +++++++++++++++++++++- query-parser/tests/find_tests.rs | 26 ++++++++++++++++ query-sql/src/lib.rs | 43 ++++++++++++++++++++------- query-translator/src/translate.rs | 22 +++++++++++--- query-translator/tests/translate.rs | 21 +++++++++++++ query/src/lib.rs | 11 +++++++ 10 files changed, 202 insertions(+), 20 deletions(-) diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 4c85f25b..c8407583 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -272,7 +272,7 @@ impl ConjoiningClauses { } impl ConjoiningClauses { - fn bound_value(&self, var: &Variable) -> Option { + pub fn bound_value(&self, var: &Variable) -> Option { self.value_bindings.get(var).cloned() } diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index dff08556..b699039a 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -31,7 +31,7 @@ error_chain! { } UnboundVariable(name: PlainSymbol) { - description("unbound variable in function call") + description("unbound variable in order clause or function call") display("unbound variable: {}", name) } diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 831a0023..344ee1d2 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -30,6 +30,7 @@ use mentat_core::counter::RcCounter; use mentat_query::{ FindQuery, FindSpec, + Order, SrcVar, Variable, }; @@ -44,8 +45,9 @@ pub use errors::{ pub struct AlgebraicQuery { default_source: SrcVar, pub find_spec: FindSpec, - pub with: BTreeSet, has_aggregates: bool, + pub with: BTreeSet, + pub order: Option>, pub limit: Option, pub cc: clauses::ConjoiningClauses, } @@ -84,6 +86,42 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { algebrize_with_cc(schema, parsed, clauses::ConjoiningClauses::default()) } +/// Take an ordering list. Any variables that aren't fixed by the query are used to produce +/// a vector of `OrderBy` instances, including type comparisons if necessary. This function also +/// returns a set of variables that should be added to the `with` clause to make the ordering +/// clauses possible. +fn validate_and_simplify_order(cc: &ConjoiningClauses, order: Option>) + -> Result<(Option>, BTreeSet)> { + match order { + None => Ok((None, BTreeSet::default())), + Some(order) => { + let mut order_bys: Vec = Vec::with_capacity(order.len() * 2); // Space for tags. + let mut vars: BTreeSet = BTreeSet::default(); + + for Order(direction, var) in order.into_iter() { + // Eliminate any ordering clauses that are bound to fixed values. + if cc.bound_value(&var).is_some() { + continue; + } + + // Fail if the var isn't bound by the query. + if !cc.column_bindings.contains_key(&var) { + bail!(ErrorKind::UnboundVariable(var.name())); + } + + // Otherwise, determine if we also need to order by type… + if cc.known_type(&var).is_none() { + order_bys.push(OrderBy(direction.clone(), VariableColumn::VariableTypeTag(var.clone()))); + } + order_bys.push(OrderBy(direction, VariableColumn::Variable(var.clone()))); + vars.insert(var.clone()); + } + + Ok((if order_bys.is_empty() { None } else { Some(order_bys) }, vars)) + } + } +} + #[allow(dead_code)] pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningClauses) -> Result { // TODO: integrate default source into pattern processing. @@ -95,12 +133,15 @@ pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningC cc.expand_column_bindings(); cc.prune_extracted_types(); + let (order, extra_vars) = validate_and_simplify_order(&cc, parsed.order)?; + let with: BTreeSet = parsed.with.into_iter().chain(extra_vars.into_iter()).collect(); let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None }; Ok(AlgebraicQuery { default_source: parsed.default_source, find_spec: parsed.find_spec, has_aggregates: false, // TODO: we don't parse them yet. - with: parsed.with.into_iter().collect(), + with: with, + order: order, limit: limit, cc: cc, }) @@ -120,6 +161,7 @@ pub use types::{ ComputedTable, DatomsColumn, DatomsTable, + OrderBy, QualifiedAlias, QueryValue, SourceAlias, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 27a93faf..8652d793 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -24,7 +24,9 @@ use mentat_core::{ }; use mentat_query::{ + Direction, NamespacedKeyword, + Order, Variable, }; @@ -220,6 +222,17 @@ impl Debug for QueryValue { } } +/// Represents an entry in the ORDER BY list: a variable or a variable's type tag. +/// (We require order vars to be projected, so we can simply use a variable here.) +pub struct OrderBy(pub Direction, pub VariableColumn); + +impl From for OrderBy { + fn from(item: Order) -> OrderBy { + let Order(direction, variable) = item; + OrderBy(direction, VariableColumn::Variable(variable)) + } +} + #[derive(Copy, Clone, PartialEq, Eq)] /// Define the different numeric inequality operators that we support. /// Note that we deliberately don't just use "<=" and friends as strings: diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index 892b3f63..5b4d879d 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -35,11 +35,13 @@ use self::mentat_parser_utils::value_and_span::{ }; use self::mentat_query::{ + Direction, Element, FindQuery, FindSpec, FnArg, FromValue, + Order, OrJoin, OrWhereClause, Pattern, @@ -119,6 +121,28 @@ def_parser!(Query, arguments, Vec, { (many::, _>(Query::fn_arg())) }); +def_parser!(Query, direction, Direction, { + satisfy_map(|v: edn::ValueAndSpan| { + match v.inner { + edn::SpannedValue::PlainSymbol(ref s) => { + let name = s.0.as_str(); + match name { + "asc" => Some(Direction::Ascending), + "desc" => Some(Direction::Descending), + _ => None, + } + }, + _ => None, + } + }) +}); + +def_parser!(Query, order, Order, { + seq().of_exactly((Query::direction(), Query::variable())) + .map(|(d, v)| Order(d, v)) + .or(Query::variable().map(|v| Order(Direction::Ascending, v))) +}); + pub struct Where; def_parser!(Where, pattern_value_place, PatternValuePlace, { @@ -308,11 +332,14 @@ def_matches_keyword!(Find, literal_with, "with"); def_matches_keyword!(Find, literal_where, "where"); +def_matches_keyword!(Find, literal_order, "order"); + /// Express something close to a builder pattern for a `FindQuery`. enum FindQueryPart { FindSpec(FindSpec), With(Vec), WhereClauses(Vec), + Order(Vec), } /// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find @@ -328,22 +355,28 @@ def_parser!(Find, query, FindQuery, { let p_where_clauses = Find::literal_where() .with(vector().of_exactly(Where::clauses().map(FindQueryPart::WhereClauses))).expected(":where clauses"); + let p_order_clauses = Find::literal_order() + .with(vector().of_exactly(many1(Query::order()).map(FindQueryPart::Order))); + (or(map(), keyword_map())) - .of_exactly(many(choice::<[&mut Parser; 3], _>([ + .of_exactly(many(choice::<[&mut Parser; 4], _>([ &mut try(p_find_spec), &mut try(p_with_vars), &mut try(p_where_clauses), + &mut try(p_order_clauses), ]))) .and_then(|parts: Vec| -> std::result::Result> { let mut find_spec = None; let mut with_vars = None; let mut where_clauses = None; + let mut order_clauses = None; for part in parts { match part { FindQueryPart::FindSpec(x) => find_spec = Some(x), FindQueryPart::With(x) => with_vars = Some(x), FindQueryPart::WhereClauses(x) => where_clauses = Some(x), + FindQueryPart::Order(x) => order_clauses = Some(x), } } @@ -353,6 +386,7 @@ def_parser!(Find, query, FindQuery, { with: with_vars.unwrap_or(vec![]), in_vars: vec![], // TODO in_sources: vec![], // TODO + order: order_clauses, where_clauses: where_clauses.ok_or(combine::primitives::Error::Unexpected("expected :where".into()))?, }) }) diff --git a/query-parser/tests/find_tests.rs b/query-parser/tests/find_tests.rs index 03db6bd0..00d09afd 100644 --- a/query-parser/tests/find_tests.rs +++ b/query-parser/tests/find_tests.rs @@ -18,9 +18,11 @@ use edn::{ }; use mentat_query::{ + Direction, Element, FindSpec, FnArg, + Order, OrJoin, OrWhereClause, Pattern, @@ -208,3 +210,27 @@ fn can_parse_simple_or_and_join() { )), ]); } + +#[test] +fn can_parse_order_by() { + let invalid = "[:find ?x :where [?x :foo/baz ?y] :order]"; + assert!(parse_find_string(invalid).is_err()); + + // Defaults to ascending. + let default = "[:find ?x :where [?x :foo/baz ?y] :order ?y]"; + assert_eq!(parse_find_string(default).unwrap().order, + Some(vec![Order(Direction::Ascending, Variable::from_valid_name("?y"))])); + + let ascending = "[:find ?x :where [?x :foo/baz ?y] :order (asc ?y)]"; + assert_eq!(parse_find_string(ascending).unwrap().order, + Some(vec![Order(Direction::Ascending, Variable::from_valid_name("?y"))])); + + let descending = "[:find ?x :where [?x :foo/baz ?y] :order (desc ?y)]"; + assert_eq!(parse_find_string(descending).unwrap().order, + Some(vec![Order(Direction::Descending, Variable::from_valid_name("?y"))])); + + let mixed = "[:find ?x :where [?x :foo/baz ?y] :order (desc ?y) (asc ?x)]"; + assert_eq!(parse_find_string(mixed).unwrap().order, + Some(vec![Order(Direction::Descending, Variable::from_valid_name("?y")), + Order(Direction::Ascending, Variable::from_valid_name("?x"))])); +} diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 0a44a6f3..86ff8636 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -18,8 +18,13 @@ use mentat_core::{ TypedValue, }; +use mentat_query::{ + Direction, +}; + use mentat_query_algebrizer::{ Column, + OrderBy, QualifiedAlias, QueryValue, SourceAlias, @@ -152,25 +157,28 @@ pub struct SelectQuery { pub projection: Projection, pub from: FromClause, pub constraints: Vec, + pub order: Vec, pub limit: Option, } +fn push_variable_column(qb: &mut QueryBuilder, vc: &VariableColumn) -> BuildQueryResult { + match vc { + &VariableColumn::Variable(ref v) => { + qb.push_identifier(v.as_str()) + }, + &VariableColumn::VariableTypeTag(ref v) => { + qb.push_identifier(format!("{}_value_type_tag", v.name()).as_str()) + }, + } +} + fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult { match col { &Column::Fixed(ref d) => { qb.push_sql(d.as_str()); Ok(()) }, - &Column::Variable(ref vc) => { - match vc { - &VariableColumn::Variable(ref v) => { - qb.push_identifier(v.as_str()) - }, - &VariableColumn::VariableTypeTag(ref v) => { - qb.push_identifier(format!("{}_value_type_tag", v.name()).as_str()) - }, - } - }, + &Column::Variable(ref vc) => push_variable_column(qb, vc), } } @@ -195,7 +203,7 @@ fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult { /// /// without producing an intermediate string sequence. macro_rules! interpose { - ( $name: ident, $across: expr, $body: block, $inter: block ) => { + ( $name: pat, $across: expr, $body: block, $inter: block ) => { let mut seq = $across.iter(); if let Some($name) = seq.next() { $body; @@ -410,6 +418,18 @@ impl QueryFragment for SelectQuery { { out.push_sql(" AND ") }); } + if !self.order.is_empty() { + out.push_sql(" ORDER BY "); + interpose!(&OrderBy(ref dir, ref var), self.order, + { push_variable_column(out, var)?; + match dir { + &Direction::Ascending => { out.push_sql(" ASC"); }, + &Direction::Descending => { out.push_sql(" DESC"); }, + }; + }, + { out.push_sql(", ") }); + } + // Guaranteed to be positive: u64. if let Some(limit) = self.limit { out.push_sql(" LIMIT "); @@ -533,6 +553,7 @@ mod tests { right: ColumnOrExpression::Entid(65536), }, ], + order: vec![], limit: None, }; diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index b90d9083..ded2a78c 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -14,6 +14,11 @@ use mentat_core::{ ValueType, }; +use mentat_query::{ + Direction, + Variable, +}; + use mentat_query_algebrizer::{ AlgebraicQuery, ColumnAlternation, @@ -25,6 +30,7 @@ use mentat_query_algebrizer::{ ConjoiningClauses, DatomsColumn, DatomsTable, + OrderBy, QualifiedAlias, QueryValue, SourceAlias, @@ -223,7 +229,7 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq // Each arm simply turns into a subquery. // The SQL translation will stuff "UNION" between each arm. let projection = Projection::Columns(columns); - cc_to_select_query(projection, cc, false, None) + cc_to_select_query(projection, cc, false, None, None) }).collect(), alias) }, @@ -233,7 +239,11 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq /// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a /// query that runs SQL. The next level up the call stack can check for known-empty queries if /// needed. -fn cc_to_select_query>>(projection: Projection, cc: ConjoiningClauses, distinct: bool, limit: T) -> SelectQuery { +fn cc_to_select_query(projection: Projection, + cc: ConjoiningClauses, + distinct: bool, + order: Option>, + limit: T) -> SelectQuery where T: Into> { let from = if cc.from.is_empty() { FromClause::Nothing } else { @@ -261,6 +271,8 @@ fn cc_to_select_query>>(projection: Projection, cc: Conjoini FromClause::TableList(TableList(tables.collect())) }; + // Turn the query-centric order clauses into column-orders. + let order = order.map_or(vec![], |vec| { vec.into_iter().map(|o| o.into()).collect() }); let limit = if cc.empty_because.is_some() { Some(0) } else { limit.into() }; SelectQuery { distinct: distinct, @@ -270,6 +282,7 @@ fn cc_to_select_query>>(projection: Projection, cc: Conjoini .into_iter() .map(|c| c.to_constraint()) .collect(), + order: order, limit: limit, } } @@ -284,10 +297,11 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery { projection: Projection::One, from: FromClause::Nothing, constraints: vec![], + order: vec![], limit: Some(0), } } else { - cc_to_select_query(Projection::One, cc, false, 1) + cc_to_select_query(Projection::One, cc, false, None, 1) } } @@ -298,7 +312,7 @@ pub fn query_to_select(query: AlgebraicQuery) -> ProjectedSelect { // SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine. let CombinedProjection { sql_projection, datalog_projector, distinct } = query_projection(&query); ProjectedSelect { - query: cc_to_select_query(sql_projection, query.cc, distinct, query.limit), + query: cc_to_select_query(sql_projection, query.cc, distinct, query.order, query.limit), projector: datalog_projector, } } diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index e0af84e5..f0c4d6f9 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -369,4 +369,25 @@ fn test_with_without_aggregate() { let SQLQuery { sql, args } = translate(&schema, input, None); assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, `all_datoms00`.v AS `?y`, `all_datoms00`.value_type_tag AS `?y_value_type_tag` FROM `all_datoms` AS `all_datoms00`"); assert_eq!(args, vec![]); +} + +#[test] +fn test_order_by() { + let schema = prepopulated_schema(); + + // Known type. + let input = r#"[:find ?x :where [?x :foo/bar ?y] :order (desc ?y)]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x`, `datoms00`.v AS `?y` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 99 \ + ORDER BY `?y` DESC"); + + // Unknown type. + let input = r#"[:find ?x :with ?y :where [?x _ ?y] :order ?y ?x]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, `all_datoms00`.v AS `?y`, \ + `all_datoms00`.value_type_tag AS `?y_value_type_tag` \ + FROM `all_datoms` AS `all_datoms00` \ + ORDER BY `?y_value_type_tag` ASC, `?y` ASC, `?x` ASC"); } \ No newline at end of file diff --git a/query/src/lib.rs b/query/src/lib.rs index e348030b..3252c447 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -128,6 +128,16 @@ impl PredicateFn { } } +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Direction { + Ascending, + Descending, +} + +/// An abstract declaration of ordering: direction and variable. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Order(pub Direction, pub Variable); // Future: Element instead of Variable? + #[derive(Clone, Debug, Eq, PartialEq)] pub enum SrcVar { DefaultSrc, @@ -594,6 +604,7 @@ pub struct FindQuery { pub in_vars: Vec, pub in_sources: Vec, pub where_clauses: Vec, + pub order: Option>, // TODO: in_rules; }