Implement :order. (#415) (#416) r=nalexander

This adds an `:order` keyword to `:find`.

If present, the results of the query will be an ordered set, rather than
an unordered set; rows will appear in an ordered defined by each
`:order` entry.

Each can be one of three things:

- A var, `?x`, meaning "order by ?x ascending".
- A pair, `(asc ?x)`, meaning "order by ?x ascending".
- A pair, `(desc ?x)`, meaning "order by ?x descending".

Values will be ordered in this sequence for asc, and in reverse for desc:

1. Entity IDs, in ascending numerical order.
2. Booleans, false then true.
3. Timestamps, in ascending numerical order.
4. Longs and doubles, intermixed, in ascending numerical order.
5. Strings, in ascending lexicographic order.
6. Keywords, in ascending lexicographic order, considering the entire
   ns/name pair as a single string separated by '/'.

Subcommits:

Pre: make bound_value public.
Pre: generalize ErrorKind::UnboundVariable for use in order.
Part 1: parse (direction, var) pairs.
Part 2: parse :order clause into FindQuery.
Part 3: include order variables in algebrized query.

We add order variables to :with, so we can reuse its type tag projection
logic, and so that we can phrase ordering in terms of variables rather
than datoms columns.

Part 4: produce SQL for order clauses.
This commit is contained in:
Richard Newman 2017-04-14 16:10:56 -07:00
parent 64acc6a7ee
commit 35d73d5541
10 changed files with 202 additions and 20 deletions

View file

@ -272,7 +272,7 @@ impl ConjoiningClauses {
} }
impl ConjoiningClauses { impl ConjoiningClauses {
fn bound_value(&self, var: &Variable) -> Option<TypedValue> { pub fn bound_value(&self, var: &Variable) -> Option<TypedValue> {
self.value_bindings.get(var).cloned() self.value_bindings.get(var).cloned()
} }

View file

@ -31,7 +31,7 @@ error_chain! {
} }
UnboundVariable(name: PlainSymbol) { UnboundVariable(name: PlainSymbol) {
description("unbound variable in function call") description("unbound variable in order clause or function call")
display("unbound variable: {}", name) display("unbound variable: {}", name)
} }

View file

@ -30,6 +30,7 @@ use mentat_core::counter::RcCounter;
use mentat_query::{ use mentat_query::{
FindQuery, FindQuery,
FindSpec, FindSpec,
Order,
SrcVar, SrcVar,
Variable, Variable,
}; };
@ -44,8 +45,9 @@ pub use errors::{
pub struct AlgebraicQuery { pub struct AlgebraicQuery {
default_source: SrcVar, default_source: SrcVar,
pub find_spec: FindSpec, pub find_spec: FindSpec,
pub with: BTreeSet<Variable>,
has_aggregates: bool, has_aggregates: bool,
pub with: BTreeSet<Variable>,
pub order: Option<Vec<OrderBy>>,
pub limit: Option<u64>, pub limit: Option<u64>,
pub cc: clauses::ConjoiningClauses, pub cc: clauses::ConjoiningClauses,
} }
@ -84,6 +86,42 @@ pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result<AlgebraicQuery> {
algebrize_with_cc(schema, parsed, clauses::ConjoiningClauses::default()) algebrize_with_cc(schema, parsed, clauses::ConjoiningClauses::default())
} }
/// Take an ordering list. Any variables that aren't fixed by the query are used to produce
/// a vector of `OrderBy` instances, including type comparisons if necessary. This function also
/// returns a set of variables that should be added to the `with` clause to make the ordering
/// clauses possible.
fn validate_and_simplify_order(cc: &ConjoiningClauses, order: Option<Vec<Order>>)
-> Result<(Option<Vec<OrderBy>>, BTreeSet<Variable>)> {
match order {
None => Ok((None, BTreeSet::default())),
Some(order) => {
let mut order_bys: Vec<OrderBy> = Vec::with_capacity(order.len() * 2); // Space for tags.
let mut vars: BTreeSet<Variable> = BTreeSet::default();
for Order(direction, var) in order.into_iter() {
// Eliminate any ordering clauses that are bound to fixed values.
if cc.bound_value(&var).is_some() {
continue;
}
// Fail if the var isn't bound by the query.
if !cc.column_bindings.contains_key(&var) {
bail!(ErrorKind::UnboundVariable(var.name()));
}
// Otherwise, determine if we also need to order by type…
if cc.known_type(&var).is_none() {
order_bys.push(OrderBy(direction.clone(), VariableColumn::VariableTypeTag(var.clone())));
}
order_bys.push(OrderBy(direction, VariableColumn::Variable(var.clone())));
vars.insert(var.clone());
}
Ok((if order_bys.is_empty() { None } else { Some(order_bys) }, vars))
}
}
}
#[allow(dead_code)] #[allow(dead_code)]
pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningClauses) -> Result<AlgebraicQuery> { pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningClauses) -> Result<AlgebraicQuery> {
// TODO: integrate default source into pattern processing. // TODO: integrate default source into pattern processing.
@ -95,12 +133,15 @@ pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningC
cc.expand_column_bindings(); cc.expand_column_bindings();
cc.prune_extracted_types(); cc.prune_extracted_types();
let (order, extra_vars) = validate_and_simplify_order(&cc, parsed.order)?;
let with: BTreeSet<Variable> = parsed.with.into_iter().chain(extra_vars.into_iter()).collect();
let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None }; let limit = if parsed.find_spec.is_unit_limited() { Some(1) } else { None };
Ok(AlgebraicQuery { Ok(AlgebraicQuery {
default_source: parsed.default_source, default_source: parsed.default_source,
find_spec: parsed.find_spec, find_spec: parsed.find_spec,
has_aggregates: false, // TODO: we don't parse them yet. has_aggregates: false, // TODO: we don't parse them yet.
with: parsed.with.into_iter().collect(), with: with,
order: order,
limit: limit, limit: limit,
cc: cc, cc: cc,
}) })
@ -120,6 +161,7 @@ pub use types::{
ComputedTable, ComputedTable,
DatomsColumn, DatomsColumn,
DatomsTable, DatomsTable,
OrderBy,
QualifiedAlias, QualifiedAlias,
QueryValue, QueryValue,
SourceAlias, SourceAlias,

View file

@ -24,7 +24,9 @@ use mentat_core::{
}; };
use mentat_query::{ use mentat_query::{
Direction,
NamespacedKeyword, NamespacedKeyword,
Order,
Variable, Variable,
}; };
@ -220,6 +222,17 @@ impl Debug for QueryValue {
} }
} }
/// Represents an entry in the ORDER BY list: a variable or a variable's type tag.
/// (We require order vars to be projected, so we can simply use a variable here.)
pub struct OrderBy(pub Direction, pub VariableColumn);
impl From<Order> for OrderBy {
fn from(item: Order) -> OrderBy {
let Order(direction, variable) = item;
OrderBy(direction, VariableColumn::Variable(variable))
}
}
#[derive(Copy, Clone, PartialEq, Eq)] #[derive(Copy, Clone, PartialEq, Eq)]
/// Define the different numeric inequality operators that we support. /// Define the different numeric inequality operators that we support.
/// Note that we deliberately don't just use "<=" and friends as strings: /// Note that we deliberately don't just use "<=" and friends as strings:

View file

@ -35,11 +35,13 @@ use self::mentat_parser_utils::value_and_span::{
}; };
use self::mentat_query::{ use self::mentat_query::{
Direction,
Element, Element,
FindQuery, FindQuery,
FindSpec, FindSpec,
FnArg, FnArg,
FromValue, FromValue,
Order,
OrJoin, OrJoin,
OrWhereClause, OrWhereClause,
Pattern, Pattern,
@ -119,6 +121,28 @@ def_parser!(Query, arguments, Vec<FnArg>, {
(many::<Vec<FnArg>, _>(Query::fn_arg())) (many::<Vec<FnArg>, _>(Query::fn_arg()))
}); });
def_parser!(Query, direction, Direction, {
satisfy_map(|v: edn::ValueAndSpan| {
match v.inner {
edn::SpannedValue::PlainSymbol(ref s) => {
let name = s.0.as_str();
match name {
"asc" => Some(Direction::Ascending),
"desc" => Some(Direction::Descending),
_ => None,
}
},
_ => None,
}
})
});
def_parser!(Query, order, Order, {
seq().of_exactly((Query::direction(), Query::variable()))
.map(|(d, v)| Order(d, v))
.or(Query::variable().map(|v| Order(Direction::Ascending, v)))
});
pub struct Where; pub struct Where;
def_parser!(Where, pattern_value_place, PatternValuePlace, { def_parser!(Where, pattern_value_place, PatternValuePlace, {
@ -308,11 +332,14 @@ def_matches_keyword!(Find, literal_with, "with");
def_matches_keyword!(Find, literal_where, "where"); def_matches_keyword!(Find, literal_where, "where");
def_matches_keyword!(Find, literal_order, "order");
/// Express something close to a builder pattern for a `FindQuery`. /// Express something close to a builder pattern for a `FindQuery`.
enum FindQueryPart { enum FindQueryPart {
FindSpec(FindSpec), FindSpec(FindSpec),
With(Vec<Variable>), With(Vec<Variable>),
WhereClauses(Vec<WhereClause>), WhereClauses(Vec<WhereClause>),
Order(Vec<Order>),
} }
/// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find /// This is awkward, but will do for now. We use `keyword_map()` to optionally accept vector find
@ -328,22 +355,28 @@ def_parser!(Find, query, FindQuery, {
let p_where_clauses = Find::literal_where() let p_where_clauses = Find::literal_where()
.with(vector().of_exactly(Where::clauses().map(FindQueryPart::WhereClauses))).expected(":where clauses"); .with(vector().of_exactly(Where::clauses().map(FindQueryPart::WhereClauses))).expected(":where clauses");
let p_order_clauses = Find::literal_order()
.with(vector().of_exactly(many1(Query::order()).map(FindQueryPart::Order)));
(or(map(), keyword_map())) (or(map(), keyword_map()))
.of_exactly(many(choice::<[&mut Parser<Input = ValueStream, Output = FindQueryPart>; 3], _>([ .of_exactly(many(choice::<[&mut Parser<Input = ValueStream, Output = FindQueryPart>; 4], _>([
&mut try(p_find_spec), &mut try(p_find_spec),
&mut try(p_with_vars), &mut try(p_with_vars),
&mut try(p_where_clauses), &mut try(p_where_clauses),
&mut try(p_order_clauses),
]))) ])))
.and_then(|parts: Vec<FindQueryPart>| -> std::result::Result<FindQuery, combine::primitives::Error<edn::ValueAndSpan, edn::ValueAndSpan>> { .and_then(|parts: Vec<FindQueryPart>| -> std::result::Result<FindQuery, combine::primitives::Error<edn::ValueAndSpan, edn::ValueAndSpan>> {
let mut find_spec = None; let mut find_spec = None;
let mut with_vars = None; let mut with_vars = None;
let mut where_clauses = None; let mut where_clauses = None;
let mut order_clauses = None;
for part in parts { for part in parts {
match part { match part {
FindQueryPart::FindSpec(x) => find_spec = Some(x), FindQueryPart::FindSpec(x) => find_spec = Some(x),
FindQueryPart::With(x) => with_vars = Some(x), FindQueryPart::With(x) => with_vars = Some(x),
FindQueryPart::WhereClauses(x) => where_clauses = Some(x), FindQueryPart::WhereClauses(x) => where_clauses = Some(x),
FindQueryPart::Order(x) => order_clauses = Some(x),
} }
} }
@ -353,6 +386,7 @@ def_parser!(Find, query, FindQuery, {
with: with_vars.unwrap_or(vec![]), with: with_vars.unwrap_or(vec![]),
in_vars: vec![], // TODO in_vars: vec![], // TODO
in_sources: vec![], // TODO in_sources: vec![], // TODO
order: order_clauses,
where_clauses: where_clauses.ok_or(combine::primitives::Error::Unexpected("expected :where".into()))?, where_clauses: where_clauses.ok_or(combine::primitives::Error::Unexpected("expected :where".into()))?,
}) })
}) })

View file

@ -18,9 +18,11 @@ use edn::{
}; };
use mentat_query::{ use mentat_query::{
Direction,
Element, Element,
FindSpec, FindSpec,
FnArg, FnArg,
Order,
OrJoin, OrJoin,
OrWhereClause, OrWhereClause,
Pattern, Pattern,
@ -208,3 +210,27 @@ fn can_parse_simple_or_and_join() {
)), )),
]); ]);
} }
#[test]
fn can_parse_order_by() {
let invalid = "[:find ?x :where [?x :foo/baz ?y] :order]";
assert!(parse_find_string(invalid).is_err());
// Defaults to ascending.
let default = "[:find ?x :where [?x :foo/baz ?y] :order ?y]";
assert_eq!(parse_find_string(default).unwrap().order,
Some(vec![Order(Direction::Ascending, Variable::from_valid_name("?y"))]));
let ascending = "[:find ?x :where [?x :foo/baz ?y] :order (asc ?y)]";
assert_eq!(parse_find_string(ascending).unwrap().order,
Some(vec![Order(Direction::Ascending, Variable::from_valid_name("?y"))]));
let descending = "[:find ?x :where [?x :foo/baz ?y] :order (desc ?y)]";
assert_eq!(parse_find_string(descending).unwrap().order,
Some(vec![Order(Direction::Descending, Variable::from_valid_name("?y"))]));
let mixed = "[:find ?x :where [?x :foo/baz ?y] :order (desc ?y) (asc ?x)]";
assert_eq!(parse_find_string(mixed).unwrap().order,
Some(vec![Order(Direction::Descending, Variable::from_valid_name("?y")),
Order(Direction::Ascending, Variable::from_valid_name("?x"))]));
}

View file

@ -18,8 +18,13 @@ use mentat_core::{
TypedValue, TypedValue,
}; };
use mentat_query::{
Direction,
};
use mentat_query_algebrizer::{ use mentat_query_algebrizer::{
Column, Column,
OrderBy,
QualifiedAlias, QualifiedAlias,
QueryValue, QueryValue,
SourceAlias, SourceAlias,
@ -152,25 +157,28 @@ pub struct SelectQuery {
pub projection: Projection, pub projection: Projection,
pub from: FromClause, pub from: FromClause,
pub constraints: Vec<Constraint>, pub constraints: Vec<Constraint>,
pub order: Vec<OrderBy>,
pub limit: Option<u64>, pub limit: Option<u64>,
} }
fn push_variable_column(qb: &mut QueryBuilder, vc: &VariableColumn) -> BuildQueryResult {
match vc {
&VariableColumn::Variable(ref v) => {
qb.push_identifier(v.as_str())
},
&VariableColumn::VariableTypeTag(ref v) => {
qb.push_identifier(format!("{}_value_type_tag", v.name()).as_str())
},
}
}
fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult { fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult {
match col { match col {
&Column::Fixed(ref d) => { &Column::Fixed(ref d) => {
qb.push_sql(d.as_str()); qb.push_sql(d.as_str());
Ok(()) Ok(())
}, },
&Column::Variable(ref vc) => { &Column::Variable(ref vc) => push_variable_column(qb, vc),
match vc {
&VariableColumn::Variable(ref v) => {
qb.push_identifier(v.as_str())
},
&VariableColumn::VariableTypeTag(ref v) => {
qb.push_identifier(format!("{}_value_type_tag", v.name()).as_str())
},
}
},
} }
} }
@ -195,7 +203,7 @@ fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult {
/// ///
/// without producing an intermediate string sequence. /// without producing an intermediate string sequence.
macro_rules! interpose { macro_rules! interpose {
( $name: ident, $across: expr, $body: block, $inter: block ) => { ( $name: pat, $across: expr, $body: block, $inter: block ) => {
let mut seq = $across.iter(); let mut seq = $across.iter();
if let Some($name) = seq.next() { if let Some($name) = seq.next() {
$body; $body;
@ -410,6 +418,18 @@ impl QueryFragment for SelectQuery {
{ out.push_sql(" AND ") }); { out.push_sql(" AND ") });
} }
if !self.order.is_empty() {
out.push_sql(" ORDER BY ");
interpose!(&OrderBy(ref dir, ref var), self.order,
{ push_variable_column(out, var)?;
match dir {
&Direction::Ascending => { out.push_sql(" ASC"); },
&Direction::Descending => { out.push_sql(" DESC"); },
};
},
{ out.push_sql(", ") });
}
// Guaranteed to be positive: u64. // Guaranteed to be positive: u64.
if let Some(limit) = self.limit { if let Some(limit) = self.limit {
out.push_sql(" LIMIT "); out.push_sql(" LIMIT ");
@ -533,6 +553,7 @@ mod tests {
right: ColumnOrExpression::Entid(65536), right: ColumnOrExpression::Entid(65536),
}, },
], ],
order: vec![],
limit: None, limit: None,
}; };

View file

@ -14,6 +14,11 @@ use mentat_core::{
ValueType, ValueType,
}; };
use mentat_query::{
Direction,
Variable,
};
use mentat_query_algebrizer::{ use mentat_query_algebrizer::{
AlgebraicQuery, AlgebraicQuery,
ColumnAlternation, ColumnAlternation,
@ -25,6 +30,7 @@ use mentat_query_algebrizer::{
ConjoiningClauses, ConjoiningClauses,
DatomsColumn, DatomsColumn,
DatomsTable, DatomsTable,
OrderBy,
QualifiedAlias, QualifiedAlias,
QueryValue, QueryValue,
SourceAlias, SourceAlias,
@ -223,7 +229,7 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq
// Each arm simply turns into a subquery. // Each arm simply turns into a subquery.
// The SQL translation will stuff "UNION" between each arm. // The SQL translation will stuff "UNION" between each arm.
let projection = Projection::Columns(columns); let projection = Projection::Columns(columns);
cc_to_select_query(projection, cc, false, None) cc_to_select_query(projection, cc, false, None, None)
}).collect(), }).collect(),
alias) alias)
}, },
@ -233,7 +239,11 @@ fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubq
/// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a /// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a
/// query that runs SQL. The next level up the call stack can check for known-empty queries if /// query that runs SQL. The next level up the call stack can check for known-empty queries if
/// needed. /// needed.
fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: ConjoiningClauses, distinct: bool, limit: T) -> SelectQuery { fn cc_to_select_query<T>(projection: Projection,
cc: ConjoiningClauses,
distinct: bool,
order: Option<Vec<OrderBy>>,
limit: T) -> SelectQuery where T: Into<Option<u64>> {
let from = if cc.from.is_empty() { let from = if cc.from.is_empty() {
FromClause::Nothing FromClause::Nothing
} else { } else {
@ -261,6 +271,8 @@ fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: Conjoini
FromClause::TableList(TableList(tables.collect())) FromClause::TableList(TableList(tables.collect()))
}; };
// Turn the query-centric order clauses into column-orders.
let order = order.map_or(vec![], |vec| { vec.into_iter().map(|o| o.into()).collect() });
let limit = if cc.empty_because.is_some() { Some(0) } else { limit.into() }; let limit = if cc.empty_because.is_some() { Some(0) } else { limit.into() };
SelectQuery { SelectQuery {
distinct: distinct, distinct: distinct,
@ -270,6 +282,7 @@ fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: Conjoini
.into_iter() .into_iter()
.map(|c| c.to_constraint()) .map(|c| c.to_constraint())
.collect(), .collect(),
order: order,
limit: limit, limit: limit,
} }
} }
@ -284,10 +297,11 @@ pub fn cc_to_exists(cc: ConjoiningClauses) -> SelectQuery {
projection: Projection::One, projection: Projection::One,
from: FromClause::Nothing, from: FromClause::Nothing,
constraints: vec![], constraints: vec![],
order: vec![],
limit: Some(0), limit: Some(0),
} }
} else { } else {
cc_to_select_query(Projection::One, cc, false, 1) cc_to_select_query(Projection::One, cc, false, None, 1)
} }
} }
@ -298,7 +312,7 @@ pub fn query_to_select(query: AlgebraicQuery) -> ProjectedSelect {
// SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine. // SQL-based aggregation -- `SELECT SUM(datoms00.e)` -- is fine.
let CombinedProjection { sql_projection, datalog_projector, distinct } = query_projection(&query); let CombinedProjection { sql_projection, datalog_projector, distinct } = query_projection(&query);
ProjectedSelect { ProjectedSelect {
query: cc_to_select_query(sql_projection, query.cc, distinct, query.limit), query: cc_to_select_query(sql_projection, query.cc, distinct, query.order, query.limit),
projector: datalog_projector, projector: datalog_projector,
} }
} }

View file

@ -370,3 +370,24 @@ fn test_with_without_aggregate() {
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, `all_datoms00`.v AS `?y`, `all_datoms00`.value_type_tag AS `?y_value_type_tag` FROM `all_datoms` AS `all_datoms00`"); assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, `all_datoms00`.v AS `?y`, `all_datoms00`.value_type_tag AS `?y_value_type_tag` FROM `all_datoms` AS `all_datoms00`");
assert_eq!(args, vec![]); assert_eq!(args, vec![]);
} }
#[test]
fn test_order_by() {
let schema = prepopulated_schema();
// Known type.
let input = r#"[:find ?x :where [?x :foo/bar ?y] :order (desc ?y)]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x`, `datoms00`.v AS `?y` \
FROM `datoms` AS `datoms00` \
WHERE `datoms00`.a = 99 \
ORDER BY `?y` DESC");
// Unknown type.
let input = r#"[:find ?x :with ?y :where [?x _ ?y] :order ?y ?x]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT DISTINCT `all_datoms00`.e AS `?x`, `all_datoms00`.v AS `?y`, \
`all_datoms00`.value_type_tag AS `?y_value_type_tag` \
FROM `all_datoms` AS `all_datoms00` \
ORDER BY `?y_value_type_tag` ASC, `?y` ASC, `?x` ASC");
}

View file

@ -128,6 +128,16 @@ impl PredicateFn {
} }
} }
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Direction {
Ascending,
Descending,
}
/// An abstract declaration of ordering: direction and variable.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Order(pub Direction, pub Variable); // Future: Element instead of Variable?
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub enum SrcVar { pub enum SrcVar {
DefaultSrc, DefaultSrc,
@ -594,6 +604,7 @@ pub struct FindQuery {
pub in_vars: Vec<Variable>, pub in_vars: Vec<Variable>,
pub in_sources: Vec<SrcVar>, pub in_sources: Vec<SrcVar>,
pub where_clauses: Vec<WhereClause>, pub where_clauses: Vec<WhereClause>,
pub order: Option<Vec<Order>>,
// TODO: in_rules; // TODO: in_rules;
} }