Algebrize and translate numeric constraints. (#306) r=nalexander

This commit is contained in:
Richard Newman 2017-03-16 19:23:48 +00:00
parent d83c8620cd
commit 97749833d0
9 changed files with 321 additions and 74 deletions

View file

@ -30,11 +30,13 @@ use self::mentat_core::{
};
use self::mentat_query::{
FnArg,
NamespacedKeyword,
NonIntegerConstant,
Pattern,
PatternNonValuePlace,
PatternValuePlace,
Predicate,
SrcVar,
Variable,
};
@ -46,9 +48,12 @@ use errors::{
};
use types::{
ColumnConstraint,
DatomsColumn,
DatomsTable,
NumericComparison,
QualifiedAlias,
QueryValue,
SourceAlias,
TableAlias,
};
@ -65,46 +70,6 @@ pub fn default_table_aliaser() -> TableAliaser {
})
}
#[derive(PartialEq, Eq)]
pub enum ColumnConstraint {
EqualsColumn(QualifiedAlias, QualifiedAlias),
EqualsEntity(QualifiedAlias, Entid),
EqualsValue(QualifiedAlias, TypedValue),
// This is different: a numeric value can only apply to the 'v' column, and it implicitly
// constrains the `value_type_tag` column. For instance, a primitive long on `datoms00` of `5`
// cannot be a boolean, so `datoms00.value_type_tag` must be in the set `#{0, 4, 5}`.
// Note that `5 = 5.0` in SQLite, and we preserve that here.
EqualsPrimitiveLong(TableAlias, i64),
HasType(TableAlias, ValueType),
}
impl Debug for ColumnConstraint {
fn fmt(&self, f: &mut Formatter) -> Result {
use self::ColumnConstraint::*;
match self {
&EqualsColumn(ref qa1, ref qa2) => {
write!(f, "{:?} = {:?}", qa1, qa2)
}
&EqualsEntity(ref qa, ref entid) => {
write!(f, "{:?} = entity({:?})", qa, entid)
}
&EqualsValue(ref qa, ref typed_value) => {
write!(f, "{:?} = value({:?})", qa, typed_value)
}
&EqualsPrimitiveLong(ref qa, value) => {
write!(f, "{:?}.v = primitive({:?})", qa, value)
}
&HasType(ref qa, value_type) => {
write!(f, "{:?}.value_type_tag = {:?}", qa, value_type)
}
}
}
}
trait OptionEffect<T> {
fn when_not<F: FnOnce()>(self, f: F) -> Option<T>;
}
@ -340,11 +305,11 @@ impl ConjoiningClauses {
}
pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) {
self.wheres.push(ColumnConstraint::EqualsValue(QualifiedAlias(table, column), constant))
self.wheres.push(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant)))
}
pub fn constrain_column_to_entity(&mut self, table: TableAlias, column: DatomsColumn, entity: Entid) {
self.wheres.push(ColumnConstraint::EqualsEntity(QualifiedAlias(table, column), entity))
self.wheres.push(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity)))
}
pub fn constrain_attribute(&mut self, table: TableAlias, attribute: Entid) {
@ -352,7 +317,9 @@ impl ConjoiningClauses {
}
pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) {
self.wheres.push(ColumnConstraint::EqualsPrimitiveLong(table, value))
self.wheres.push(ColumnConstraint::Equals(
QualifiedAlias(table, DatomsColumn::Value),
QueryValue::PrimitiveLong(value)))
}
/// Constrains the var if there's no existing type.
@ -565,7 +532,7 @@ impl ConjoiningClauses {
// TODO: if both primary and secondary are .v, should we make sure
// the type tag columns also match?
// We don't do so in the ClojureScript version.
self.wheres.push(ColumnConstraint::EqualsColumn(primary.clone(), secondary.clone()));
self.wheres.push(ColumnConstraint::Equals(primary.clone(), QueryValue::Column(secondary.clone())));
}
}
}
@ -822,6 +789,79 @@ impl ConjoiningClauses {
return;
}
}
/// Take a function argument and turn it into a `QueryValue` suitable for use in a concrete
/// constraint.
fn resolve_argument(&self, arg: FnArg) -> Result<QueryValue> {
use self::FnArg::*;
match arg {
FnArg::Variable(var) => {
self.column_bindings
.get(&var)
.and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone())))
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var)))
},
EntidOrInteger(i) => Ok(QueryValue::PrimitiveLong(i)),
Ident(ref i) => unimplemented!(), // TODO
Constant(NonIntegerConstant::Boolean(val)) => Ok(QueryValue::TypedValue(TypedValue::Boolean(val))),
Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))),
Constant(NonIntegerConstant::Text(s)) => Ok(QueryValue::TypedValue(TypedValue::String(s.clone()))),
Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(),
SrcVar(_) => unimplemented!(),
}
}
/// There are several kinds of predicates/functions in our Datalog:
/// - A limited set of binary comparison operators: < > <= >= !=.
/// These are converted into SQLite binary comparisons and some type constraints.
/// - A set of predicates like `fulltext` and `get-else` that are translated into
/// SQL `MATCH`es or joins, yielding bindings.
/// - In the future, some predicates that are implemented via function calls in SQLite.
///
/// At present we have implemented only the five built-in comparison binary operators.
pub fn apply_predicate<'s, 'p>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> {
// Because we'll be growing the set of built-in predicates, handling each differently,
// and ultimately allowing user-specified predicates, we match on the predicate name first.
if let Some(op) = NumericComparison::from_datalog_operator(predicate.operator.0.as_str()) {
self.apply_numeric_predicate(schema, op, predicate)
} else {
bail!(ErrorKind::UnknownFunction(predicate.operator.clone()))
}
}
/// This function:
/// - Resolves variables and converts types to those more amenable to SQL.
/// - Ensures that the predicate functions name a known operator.
/// - Accumulates a `NumericInequality` constraint into the `wheres` list.
#[allow(unused_variables)]
pub fn apply_numeric_predicate<'s, 'p>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> {
if predicate.args.len() != 2 {
bail!(ErrorKind::InvalidNumberOfArguments(predicate.operator.clone(), predicate.args.len(), 2));
}
// Go from arguments -- parser output -- to columns or values.
// Any variables that aren't bound by this point in the linear processing of clauses will
// cause the application of the predicate to fail.
let mut args = predicate.args.into_iter();
let left = self.resolve_argument(args.next().unwrap())?;
let right = self.resolve_argument(args.next().unwrap())?;
// These arguments must be variables or numeric constants.
// TODO: generalize argument resolution and validation for different kinds of predicates:
// as we process `(< ?x 5)` we are able to check or deduce that `?x` is numeric, and either
// simplify the pattern or optimize the rest of the query.
// To do so needs a slightly more sophisticated representation of type constraints — a set,
// not a single `Option`.
// TODO: static evaluation. #383.
let constraint = ColumnConstraint::NumericInequality {
operator: comparison,
left: left,
right: right,
};
self.wheres.push(constraint);
Ok(())
}
}
#[cfg(test)]
@ -914,8 +954,8 @@ mod testing {
// - datoms0.v = true
// No need for a type tag constraint, because the attribute is known.
assert_eq!(cc.wheres, vec![
ColumnConstraint::EqualsEntity(d0_a, 99),
ColumnConstraint::EqualsValue(d0_v, TypedValue::Boolean(true)),
ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)),
ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))),
]);
}
@ -952,7 +992,7 @@ mod testing {
// - datoms0.value_type_tag = boolean
// TODO: implement expand_type_tags.
assert_eq!(cc.wheres, vec![
ColumnConstraint::EqualsValue(d0_v, TypedValue::Boolean(true)),
ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))),
ColumnConstraint::HasType("datoms00".to_string(), ValueType::Boolean),
]);
}
@ -996,7 +1036,7 @@ mod testing {
// ?x is bound to datoms0.e.
assert_eq!(cc.column_bindings.get(&x).unwrap(), &vec![d0_e.clone()]);
assert_eq!(cc.wheres, vec![
ColumnConstraint::EqualsEntity(d0_a, 99),
ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)),
]);
}
@ -1092,7 +1132,7 @@ mod testing {
// - datoms0.value_type_tag = string
// TODO: implement expand_type_tags.
assert_eq!(cc.wheres, vec![
ColumnConstraint::EqualsValue(d0_v, TypedValue::String("hello".to_string())),
ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("hello".to_string()))),
ColumnConstraint::HasType("all_datoms00".to_string(), ValueType::String),
]);
}
@ -1163,10 +1203,10 @@ mod testing {
// - datoms1.a = 99 (:foo/bar)
// - datoms1.e = datoms0.e
assert_eq!(cc.wheres, vec![
ColumnConstraint::EqualsEntity(d0_a, 98),
ColumnConstraint::EqualsValue(d0_v, TypedValue::String("idgoeshere".to_string())),
ColumnConstraint::EqualsEntity(d1_a, 99),
ColumnConstraint::EqualsColumn(d0_e, d1_e),
ColumnConstraint::Equals(d0_a, QueryValue::Entid(98)),
ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::String("idgoeshere".to_string()))),
ColumnConstraint::Equals(d1_a, QueryValue::Entid(99)),
ColumnConstraint::Equals(d0_e, QueryValue::Column(d1_e)),
]);
}
@ -1201,8 +1241,8 @@ mod testing {
// ?y has been expanded into `true`.
assert_eq!(cc.wheres, vec![
ColumnConstraint::EqualsEntity(d0_a, 99),
ColumnConstraint::EqualsValue(d0_v, TypedValue::Boolean(true)),
ColumnConstraint::Equals(d0_a, QueryValue::Entid(99)),
ColumnConstraint::Equals(d0_v, QueryValue::TypedValue(TypedValue::Boolean(true))),
]);
// There is no binding for ?y.

View file

@ -29,6 +29,12 @@ use mentat_query::{
WhereClause,
};
pub use errors::{
Error,
ErrorKind,
Result,
};
#[allow(dead_code)]
pub struct AlgebraicQuery {
default_source: SrcVar,
@ -62,37 +68,42 @@ impl AlgebraicQuery {
}
#[allow(dead_code)]
pub fn algebrize(schema: &Schema, parsed: FindQuery) -> AlgebraicQuery {
pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result<AlgebraicQuery> {
// TODO: integrate default source into pattern processing.
// TODO: flesh out the rest of find-into-context.
let mut cc = cc::ConjoiningClauses::default();
let where_clauses = parsed.where_clauses;
for where_clause in where_clauses {
if let WhereClause::Pattern(p) = where_clause {
cc.apply_pattern(schema, p);
} else {
unimplemented!();
match where_clause {
WhereClause::Pattern(p) => {
cc.apply_pattern(schema, p);
},
WhereClause::Pred(p) => {
cc.apply_predicate(schema, p)?;
},
_ => unimplemented!(),
}
}
AlgebraicQuery {
Ok(AlgebraicQuery {
default_source: parsed.default_source,
find_spec: parsed.find_spec,
has_aggregates: false, // TODO: we don't parse them yet.
limit: None,
cc: cc,
}
})
}
pub use cc::{
ColumnConstraint,
ConjoiningClauses,
};
pub use types::{
ColumnConstraint,
DatomsColumn,
DatomsTable,
QualifiedAlias,
QueryValue,
SourceAlias,
TableAlias,
};

View file

@ -14,6 +14,11 @@ use std::fmt::{
Result,
};
use mentat_core::{
Entid,
TypedValue,
ValueType,
};
/// This enum models the fixed set of default tables we have -- two
/// tables and two views.
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
@ -85,4 +90,117 @@ impl QualifiedAlias {
pub fn for_type_tag(&self) -> QualifiedAlias {
QualifiedAlias(self.0.clone(), DatomsColumn::ValueTypeTag)
}
}
}
#[derive(PartialEq, Eq)]
pub enum QueryValue {
Column(QualifiedAlias),
Entid(Entid),
TypedValue(TypedValue),
// This is different: a numeric value can only apply to the 'v' column, and it implicitly
// constrains the `value_type_tag` column. For instance, a primitive long on `datoms00` of `5`
// cannot be a boolean, so `datoms00.value_type_tag` must be in the set `#{0, 4, 5}`.
// Note that `5 = 5.0` in SQLite, and we preserve that here.
PrimitiveLong(i64),
}
impl Debug for QueryValue {
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
use self::QueryValue::*;
match self {
&Column(ref qa) => {
write!(f, "{:?}", qa)
},
&Entid(ref entid) => {
write!(f, "entity({:?})", entid)
},
&TypedValue(ref typed_value) => {
write!(f, "value({:?})", typed_value)
},
&PrimitiveLong(value) => {
write!(f, "primitive({:?})", value)
},
}
}
}
#[derive(Copy, Clone, PartialEq, Eq)]
/// Define the different numeric inequality operators that we support.
/// Note that we deliberately don't just use "<=" and friends as strings:
/// Datalog and SQL don't use the same operators (e.g., `<>` and `!=`).
pub enum NumericComparison {
LessThan,
LessThanOrEquals,
GreaterThan,
GreaterThanOrEquals,
NotEquals,
}
impl NumericComparison {
pub fn to_sql_operator(self) -> &'static str {
use self::NumericComparison::*;
match self {
LessThan => "<",
LessThanOrEquals => "<=",
GreaterThan => ">",
GreaterThanOrEquals => ">=",
NotEquals => "<>",
}
}
pub fn from_datalog_operator(s: &str) -> Option<NumericComparison> {
match s {
"<" => Some(NumericComparison::LessThan),
"<=" => Some(NumericComparison::LessThanOrEquals),
">" => Some(NumericComparison::GreaterThan),
">=" => Some(NumericComparison::GreaterThanOrEquals),
"!=" => Some(NumericComparison::NotEquals),
_ => None,
}
}
}
impl Debug for NumericComparison {
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
use self::NumericComparison::*;
f.write_str(match self {
&LessThan => "<",
&LessThanOrEquals => "<=",
&GreaterThan => ">",
&GreaterThanOrEquals => ">=",
&NotEquals => "!=", // Datalog uses !=. SQL uses <>.
})
}
}
#[derive(PartialEq, Eq)]
pub enum ColumnConstraint {
Equals(QualifiedAlias, QueryValue),
NumericInequality {
operator: NumericComparison,
left: QueryValue,
right: QueryValue,
},
HasType(TableAlias, ValueType),
}
impl Debug for ColumnConstraint {
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
use self::ColumnConstraint::*;
match self {
&Equals(ref qa1, ref thing) => {
write!(f, "{:?} = {:?}", qa1, thing)
},
&NumericInequality { operator, ref left, ref right } => {
write!(f, "{:?} {:?} {:?}", left, operator, right)
},
&HasType(ref qa, value_type) => {
write!(f, "{:?}.value_type_tag = {:?}", qa, value_type)
},
}
}
}

View file

@ -21,6 +21,7 @@ use mentat_core::{
use mentat_query_algebrizer::{
DatomsColumn,
QualifiedAlias,
QueryValue,
SourceAlias,
};
@ -45,9 +46,22 @@ pub enum ColumnOrExpression {
Column(QualifiedAlias),
Entid(Entid), // Because it's so common.
Integer(i32), // We use these for type codes etc.
Long(i64),
Value(TypedValue),
}
/// `QueryValue` and `ColumnOrExpression` are almost identical… merge somehow?
impl From<QueryValue> for ColumnOrExpression {
fn from(v: QueryValue) -> Self {
match v {
QueryValue::Column(c) => ColumnOrExpression::Column(c),
QueryValue::Entid(e) => ColumnOrExpression::Entid(e),
QueryValue::PrimitiveLong(v) => ColumnOrExpression::Long(v),
QueryValue::TypedValue(v) => ColumnOrExpression::Value(v),
}
}
}
pub type Name = String;
pub struct ProjectedColumn(pub ColumnOrExpression, pub Name);
@ -59,7 +73,7 @@ pub enum Projection {
}
#[derive(Copy, Clone)]
pub struct Op(&'static str); // TODO: we can do better than this!
pub struct Op(pub &'static str); // TODO: we can do better than this!
pub enum Constraint {
Infix {
@ -189,6 +203,10 @@ impl QueryFragment for ColumnOrExpression {
out.push_sql(integer.to_string().as_str());
Ok(())
},
&Long(long) => {
out.push_sql(long.to_string().as_str());
Ok(())
},
&Value(ref v) => {
out.push_typed_value(v)
},

View file

@ -30,6 +30,7 @@ use mentat_query_algebrizer::{
DatomsColumn,
DatomsTable,
QualifiedAlias,
QueryValue,
SourceAlias,
};
@ -44,6 +45,7 @@ use mentat_query_sql::{
Constraint,
FromClause,
Name,
Op,
Projection,
ProjectedColumn,
SelectQuery,
@ -68,18 +70,18 @@ impl ToConstraint for ColumnConstraint {
fn to_constraint(self) -> Constraint {
use self::ColumnConstraint::*;
match self {
EqualsEntity(qa, entid) =>
Equals(qa, QueryValue::Entid(entid)) =>
Constraint::equal(qa.to_column(), ColumnOrExpression::Entid(entid)),
EqualsValue(qa, tv) =>
Equals(qa, QueryValue::TypedValue(tv)) =>
Constraint::equal(qa.to_column(), ColumnOrExpression::Value(tv)),
EqualsColumn(left, right) =>
Equals(left, QueryValue::Column(right)) =>
Constraint::equal(left.to_column(), right.to_column()),
EqualsPrimitiveLong(table, value) => {
let value_column = QualifiedAlias(table.clone(), DatomsColumn::Value).to_column();
let tag_column = QualifiedAlias(table, DatomsColumn::ValueTypeTag).to_column();
Equals(qa, QueryValue::PrimitiveLong(value)) => {
let tag_column = qa.for_type_tag().to_column();
let value_column = qa.to_column();
/// A bare long in a query might match a ref, an instant, a long (obviously), or a
/// double. If it's negative, it can't match a ref, but that's OK -- it won't!
@ -109,6 +111,14 @@ impl ToConstraint for ColumnConstraint {
}
},
NumericInequality { operator, left, right } => {
Constraint::Infix {
op: Op(operator.to_sql_operator()),
left: left.into(),
right: right.into(),
}
},
HasType(table, value_type) => {
let column = QualifiedAlias(table, DatomsColumn::ValueTypeTag).to_column();
Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag()))

View file

@ -43,7 +43,7 @@ fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) {
fn translate<T: Into<Option<u64>>>(schema: &Schema, input: &'static str, limit: T) -> SQLQuery {
let parsed = parse_find_string(input).expect("parse failed");
let mut algebrized = algebrize(schema, parsed);
let mut algebrized = algebrize(schema, parsed).expect("algebrize failed");
algebrized.apply_limit(limit.into());
let select = query_to_select(algebrized);
select.query.to_sql_query().unwrap()
@ -168,7 +168,7 @@ fn test_unknown_ident() {
let impossible = r#"[:find ?x :where [?x :db/ident :no/exist]]"#;
let parsed = parse_find_string(impossible).expect("parse failed");
let algebrized = algebrize(&schema, parsed);
let algebrized = algebrize(&schema, parsed).expect("algebrize failed");
// This query cannot return results: the ident doesn't resolve for a ref-typed attribute.
assert!(algebrized.is_known_empty());
@ -178,3 +178,50 @@ fn test_unknown_ident() {
let sql = select.query.to_sql_query().unwrap().sql;
assert_eq!("SELECT 1 LIMIT 0", sql);
}
#[test]
fn test_numeric_less_than_unknown_attribute() {
let schema = Schema::default();
let input = r#"[:find ?x :where [?x _ ?y] [(< ?y 10)]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
// TODO: we don't infer numeric types from numeric predicates, because the _SQL_ type code
// is a single value (5), but the Datalog types are a set (Double and Long).
// When we do, this will correctly use `datoms` instead of `all_datoms`.
assert_eq!(sql, "SELECT `all_datoms00`.e AS `?x` FROM `all_datoms` AS `all_datoms00` WHERE `all_datoms00`.v < 10");
assert_eq!(args, vec![]);
}
#[test]
fn test_numeric_gte_known_attribute() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::Double,
..Default::default()
});
let input = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9");
assert_eq!(args, vec![]);
}
#[test]
fn test_numeric_not_equals_known_attribute() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 99);
add_attribute(&mut schema, 99, Attribute {
value_type: ValueType::Long,
..Default::default()
});
let input = r#"[:find ?x :where [?x :foo/bar ?y] [(!= ?y 12)]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12");
assert_eq!(args, vec![]);
}

View file

@ -158,6 +158,7 @@ impl FromValue<FnArg> for FnArg {
.or_else(||
match v {
&edn::Value::Integer(i) => Some(FnArg::EntidOrInteger(i)),
&edn::Value::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))),
_ => unimplemented!(),
})
}

View file

@ -14,6 +14,7 @@ use rusqlite;
use edn;
use mentat_db;
use mentat_query_algebrizer;
use mentat_query_parser;
use mentat_query_projector;
use mentat_sql;
@ -31,6 +32,7 @@ error_chain! {
links {
DbError(mentat_db::Error, mentat_db::ErrorKind);
QueryError(mentat_query_algebrizer::Error, mentat_query_algebrizer::ErrorKind); // Let's not leak the term 'algebrizer'.
QueryParseError(mentat_query_parser::Error, mentat_query_parser::ErrorKind);
ProjectorError(mentat_query_projector::Error, mentat_query_projector::ErrorKind);
SqlError(mentat_sql::Error, mentat_sql::ErrorKind);

View file

@ -65,7 +65,7 @@ pub fn q_once<'sqlite, 'schema, 'query, T, U>
// TODO: validate inputs.
let parsed = parse_find_string(query)?;
let mut algebrized = algebrize(schema, parsed);
let mut algebrized = algebrize(schema, parsed)?;
if algebrized.is_known_empty() {
// We don't need to do any SQL work at all.