Implement complex or joins. (#410) r=nalexander

This commit is contained in:
Richard Newman 2017-04-12 19:23:40 -07:00
commit d8f761993d
11 changed files with 768 additions and 170 deletions

View file

@ -20,11 +20,28 @@ pub struct RcCounter {
c: Rc<AtomicUsize>, c: Rc<AtomicUsize>,
} }
/// A simple shared counter.
impl RcCounter { impl RcCounter {
pub fn with_initial(value: usize) -> Self {
RcCounter { c: Rc::new(AtomicUsize::new(value)) }
}
pub fn new() -> Self { pub fn new() -> Self {
RcCounter { c: Rc::new(AtomicUsize::new(0)) } RcCounter { c: Rc::new(AtomicUsize::new(0)) }
} }
/// Return the next value in the sequence.
///
/// ```
/// use mentat_core::counter::RcCounter;
///
/// let c = RcCounter::with_initial(3);
/// assert_eq!(c.next(), 3);
/// assert_eq!(c.next(), 4);
/// let d = c.clone();
/// assert_eq!(d.next(), 5);
/// assert_eq!(c.next(), 6);
/// ```
pub fn next(&self) -> usize { pub fn next(&self) -> usize {
self.c.fetch_add(1, Ordering::SeqCst) self.c.fetch_add(1, Ordering::SeqCst)
} }

View file

@ -48,6 +48,8 @@ use errors::{
use types::{ use types::{
ColumnConstraint, ColumnConstraint,
ColumnIntersection, ColumnIntersection,
ComputedTable,
Column,
DatomsColumn, DatomsColumn,
DatomsTable, DatomsTable,
EmptyBecause, EmptyBecause,
@ -141,6 +143,10 @@ pub struct ConjoiningClauses {
/// A vector of source/alias pairs used to construct a SQL `FROM` list. /// A vector of source/alias pairs used to construct a SQL `FROM` list.
pub from: Vec<SourceAlias>, pub from: Vec<SourceAlias>,
/// A vector of computed tables (typically subqueries). The index into this vector is used as
/// an identifier in a `DatomsTable::Computed(c)` table reference.
pub computed_tables: Vec<ComputedTable>,
/// A list of fragments that can be joined by `AND`. /// A list of fragments that can be joined by `AND`.
pub wheres: ColumnIntersection, pub wheres: ColumnIntersection,
@ -171,7 +177,7 @@ pub struct ConjoiningClauses {
/// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime. /// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime.
/// If a var isn't present in `known_types`, it should be present here. /// If a var isn't present in `known_types`, it should be present here.
extracted_types: BTreeMap<Variable, QualifiedAlias>, pub extracted_types: BTreeMap<Variable, QualifiedAlias>,
} }
impl Debug for ConjoiningClauses { impl Debug for ConjoiningClauses {
@ -196,6 +202,7 @@ impl Default for ConjoiningClauses {
empty_because: None, empty_because: None,
alias_counter: RcCounter::new(), alias_counter: RcCounter::new(),
from: vec![], from: vec![],
computed_tables: vec![],
wheres: ColumnIntersection::default(), wheres: ColumnIntersection::default(),
input_variables: BTreeSet::new(), input_variables: BTreeSet::new(),
column_bindings: BTreeMap::new(), column_bindings: BTreeMap::new(),
@ -206,33 +213,44 @@ impl Default for ConjoiningClauses {
} }
} }
impl ConjoiningClauses {
/// Construct a new `ConjoiningClauses` with the provided alias counter. This allows a caller
/// to share a counter with an enclosing scope, and to start counting at a particular offset
/// for testing.
pub fn with_alias_counter(counter: RcCounter) -> ConjoiningClauses {
ConjoiningClauses {
alias_counter: counter,
..Default::default()
}
}
}
/// Cloning. /// Cloning.
impl ConjoiningClauses { impl ConjoiningClauses {
fn make_receptacle(&self) -> ConjoiningClauses { fn make_receptacle(&self) -> ConjoiningClauses {
let mut concrete = ConjoiningClauses::default(); ConjoiningClauses {
concrete.empty_because = self.empty_because.clone(); alias_counter: self.alias_counter.clone(),
empty_because: self.empty_because.clone(),
concrete.input_variables = self.input_variables.clone(); input_variables: self.input_variables.clone(),
concrete.value_bindings = self.value_bindings.clone(); value_bindings: self.value_bindings.clone(),
concrete.known_types = self.known_types.clone(); known_types: self.known_types.clone(),
concrete.extracted_types = self.extracted_types.clone(); extracted_types: self.extracted_types.clone(),
..Default::default()
concrete }
} }
/// Make a new CC populated with the relevant variable associations in this CC. /// Make a new CC populated with the relevant variable associations in this CC.
/// The CC shares an alias count with all of its copies. /// The CC shares an alias count with all of its copies.
fn use_as_template(&self, vars: &BTreeSet<Variable>) -> ConjoiningClauses { fn use_as_template(&self, vars: &BTreeSet<Variable>) -> ConjoiningClauses {
let mut template = ConjoiningClauses::default(); ConjoiningClauses {
template.alias_counter = self.alias_counter.clone(); // Rc ftw. alias_counter: self.alias_counter.clone(),
template.empty_because = self.empty_because.clone(); empty_because: self.empty_because.clone(),
input_variables: self.input_variables.intersection(vars).cloned().collect(),
template.input_variables = self.input_variables.intersection(vars).cloned().collect(); value_bindings: self.value_bindings.with_intersected_keys(&vars),
template.value_bindings = self.value_bindings.with_intersected_keys(&vars); known_types: self.known_types.with_intersected_keys(&vars),
template.known_types = self.known_types.with_intersected_keys(&vars); extracted_types: self.extracted_types.with_intersected_keys(&vars),
template.extracted_types = self.extracted_types.with_intersected_keys(&vars); ..Default::default()
}
template
} }
} }
@ -269,35 +287,62 @@ impl ConjoiningClauses {
} }
} }
pub fn bind_column_to_var(&mut self, schema: &Schema, table: TableAlias, column: DatomsColumn, var: Variable) { pub fn bind_column_to_var<C: Into<Column>>(&mut self, schema: &Schema, table: TableAlias, column: C, var: Variable) {
let column = column.into();
// Do we have an external binding for this? // Do we have an external binding for this?
if let Some(bound_val) = self.bound_value(&var) { if let Some(bound_val) = self.bound_value(&var) {
// Great! Use that instead. // Great! Use that instead.
// We expect callers to do things like bind keywords here; we need to translate these // We expect callers to do things like bind keywords here; we need to translate these
// before they hit our constraints. // before they hit our constraints.
// TODO: recognize when the valueType might be a ref and also translate entids there. match column {
if column == DatomsColumn::Value { Column::Variable(_) => {
self.constrain_column_to_constant(table, column, bound_val); // We don't need to handle expansion of attributes here. The subquery that
} else { // produces the variable projection will do so.
match bound_val { self.constrain_column_to_constant(table, column, bound_val);
TypedValue::Keyword(ref kw) => { },
if let Some(entid) = self.entid_for_ident(schema, kw) {
Column::Fixed(DatomsColumn::ValueTypeTag) => {
// I'm pretty sure this is meaningless right now, because we will never bind
// a type tag to a variable -- there's no syntax for doing so.
// In the future we might expose a way to do so, perhaps something like:
// ```
// [:find ?x
// :where [?x _ ?y]
// [(= (typeof ?y) :db.valueType/double)]]
// ```
unimplemented!();
},
// TODO: recognize when the valueType might be a ref and also translate entids there.
Column::Fixed(DatomsColumn::Value) => {
self.constrain_column_to_constant(table, column, bound_val);
},
// These columns can only be entities, so attempt to translate keywords. If we can't
// get an entity out of the bound value, the pattern cannot produce results.
Column::Fixed(DatomsColumn::Attribute) |
Column::Fixed(DatomsColumn::Entity) |
Column::Fixed(DatomsColumn::Tx) => {
match bound_val {
TypedValue::Keyword(ref kw) => {
if let Some(entid) = self.entid_for_ident(schema, kw) {
self.constrain_column_to_entity(table, column, entid);
} else {
// Impossible.
// For attributes this shouldn't occur, because we check the binding in
// `table_for_places`/`alias_table`, and if it didn't resolve to a valid
// attribute then we should have already marked the pattern as empty.
self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.cloned()));
}
},
TypedValue::Ref(entid) => {
self.constrain_column_to_entity(table, column, entid); self.constrain_column_to_entity(table, column, entid);
} else { },
// Impossible. _ => {
// For attributes this shouldn't occur, because we check the binding in // One can't bind an e, a, or tx to something other than an entity.
// `table_for_places`/`alias_table`, and if it didn't resolve to a valid self.mark_known_empty(EmptyBecause::InvalidBinding(column, bound_val));
// attribute then we should have already marked the pattern as empty. },
self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.cloned())); }
}
},
TypedValue::Ref(entid) => {
self.constrain_column_to_entity(table, column, entid);
},
_ => {
// One can't bind an e, a, or tx to something other than an entity.
self.mark_known_empty(EmptyBecause::InvalidBinding(column, bound_val));
},
} }
} }
@ -311,10 +356,12 @@ impl ConjoiningClauses {
// If this is a value, and we don't already know its type or where // If this is a value, and we don't already know its type or where
// to get its type, record that we can get it from this table. // to get its type, record that we can get it from this table.
let needs_type_extraction = let needs_type_extraction =
!late_binding && // Never need to extract for bound vars. !late_binding && // Never need to extract for bound vars.
column == DatomsColumn::Value && // Never need to extract types for refs. // Never need to extract types for refs, and var columns are handled elsewhere:
self.known_type(&var).is_none() && // Don't need to extract if we know a single type. // a subquery will be projecting a type tag.
!self.extracted_types.contains_key(&var); // We're already extracting the type. column == Column::Fixed(DatomsColumn::Value) &&
self.known_type(&var).is_none() && // Don't need to extract if we know a single type.
!self.extracted_types.contains_key(&var); // We're already extracting the type.
let alias = QualifiedAlias(table, column); let alias = QualifiedAlias(table, column);
@ -326,11 +373,13 @@ impl ConjoiningClauses {
self.column_bindings.entry(var).or_insert(vec![]).push(alias); self.column_bindings.entry(var).or_insert(vec![]).push(alias);
} }
pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) { pub fn constrain_column_to_constant<C: Into<Column>>(&mut self, table: TableAlias, column: C, constant: TypedValue) {
let column = column.into();
self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant)))
} }
pub fn constrain_column_to_entity(&mut self, table: TableAlias, column: DatomsColumn, entity: Entid) { pub fn constrain_column_to_entity<C: Into<Column>>(&mut self, table: TableAlias, column: C, entity: Entid) {
let column = column.into();
self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity)))
} }
@ -340,7 +389,7 @@ impl ConjoiningClauses {
pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) { pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) {
self.wheres.add_intersection(ColumnConstraint::Equals( self.wheres.add_intersection(ColumnConstraint::Equals(
QualifiedAlias(table, DatomsColumn::Value), QualifiedAlias(table, Column::Fixed(DatomsColumn::Value)),
QueryValue::PrimitiveLong(value))) QueryValue::PrimitiveLong(value)))
} }
@ -569,7 +618,7 @@ impl ConjoiningClauses {
Some(v) => { Some(v) => {
// This pattern cannot match: the caller has bound a non-entity value to an // This pattern cannot match: the caller has bound a non-entity value to an
// attribute place. // attribute place.
Err(EmptyBecause::InvalidBinding(DatomsColumn::Attribute, v.clone())) Err(EmptyBecause::InvalidBinding(Column::Fixed(DatomsColumn::Attribute), v.clone()))
}, },
} }
}, },
@ -577,7 +626,12 @@ impl ConjoiningClauses {
} }
pub fn next_alias_for_table(&mut self, table: DatomsTable) -> TableAlias { pub fn next_alias_for_table(&mut self, table: DatomsTable) -> TableAlias {
format!("{}{:02}", table.name(), self.alias_counter.next()) match table {
DatomsTable::Computed(u) =>
format!("{}{:02}", table.name(), u),
_ =>
format!("{}{:02}", table.name(), self.alias_counter.next()),
}
} }
/// Produce a (table, alias) pair to handle the provided pattern. /// Produce a (table, alias) pair to handle the provided pattern.

View file

@ -36,8 +36,12 @@ use types::{
ColumnConstraintOrAlternation, ColumnConstraintOrAlternation,
ColumnAlternation, ColumnAlternation,
ColumnIntersection, ColumnIntersection,
ComputedTable,
DatomsTable, DatomsTable,
EmptyBecause, EmptyBecause,
QualifiedAlias,
SourceAlias,
VariableColumn,
}; };
/// Return true if both left and right are the same variable or both are non-variable. /// Return true if both left and right are the same variable or both are non-variable.
@ -66,6 +70,18 @@ fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePla
} }
} }
trait PushComputed {
fn push_computed(&mut self, item: ComputedTable) -> DatomsTable;
}
impl PushComputed for Vec<ComputedTable> {
fn push_computed(&mut self, item: ComputedTable) -> DatomsTable {
let next_index = self.len();
self.push(item);
DatomsTable::Computed(next_index)
}
}
pub enum DeconstructedOrJoin { pub enum DeconstructedOrJoin {
KnownSuccess, KnownSuccess,
KnownEmpty(EmptyBecause), KnownEmpty(EmptyBecause),
@ -176,6 +192,8 @@ impl ConjoiningClauses {
/// to be called _only_ by `deconstruct_or_join`. /// to be called _only_ by `deconstruct_or_join`.
fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin { fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin {
// Preconditions enforced by `deconstruct_or_join`. // Preconditions enforced by `deconstruct_or_join`.
// Note that a fully unified explicit `or-join` can arrive here, and might leave as
// an implicit `or`.
assert!(or_join.is_fully_unified()); assert!(or_join.is_fully_unified());
assert!(or_join.clauses.len() >= 2); assert!(or_join.clauses.len() >= 2);
@ -193,7 +211,7 @@ impl ConjoiningClauses {
let mut empty_because: Option<EmptyBecause> = None; let mut empty_because: Option<EmptyBecause> = None;
// Walk each clause in turn, bailing as soon as we know this can't be simple. // Walk each clause in turn, bailing as soon as we know this can't be simple.
let (join_clauses, mentioned_vars) = or_join.dismember(); let (join_clauses, _unify_vars, mentioned_vars) = or_join.dismember();
let mut clauses = join_clauses.into_iter(); let mut clauses = join_clauses.into_iter();
while let Some(clause) = clauses.next() { while let Some(clause) = clauses.next() {
// If we fail half-way through processing, we want to reconstitute the input. // If we fail half-way through processing, we want to reconstitute the input.
@ -306,9 +324,9 @@ impl ConjoiningClauses {
// using a single table alias. // using a single table alias.
self.apply_simple_or_join(schema, patterns, mentioned_vars) self.apply_simple_or_join(schema, patterns, mentioned_vars)
}, },
DeconstructedOrJoin::Complex(_) => { DeconstructedOrJoin::Complex(or_join) => {
// Do this the hard way. TODO // Do this the hard way.
unimplemented!(); self.apply_complex_or_join(schema, or_join)
}, },
} }
} }
@ -341,7 +359,11 @@ impl ConjoiningClauses {
/// OR (datoms00.a = 98 AND datoms00.v = 'Peter') /// OR (datoms00.a = 98 AND datoms00.v = 'Peter')
/// ``` /// ```
/// ///
fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec<Pattern>, mentioned_vars: BTreeSet<Variable>) -> Result<()> { fn apply_simple_or_join(&mut self,
schema: &Schema,
patterns: Vec<Pattern>,
mentioned_vars: BTreeSet<Variable>)
-> Result<()> {
if self.is_known_empty() { if self.is_known_empty() {
return Ok(()) return Ok(())
} }
@ -481,6 +503,175 @@ impl ConjoiningClauses {
self.narrow_types(cc.known_types); self.narrow_types(cc.known_types);
Ok(()) Ok(())
} }
/// Apply a provided `or` or `or-join` to this `ConjoiningClauses`. If you're calling this
/// rather than another `or`-applier, it's assumed that the contents of the `or` are relatively
/// complex: perhaps its arms consist of more than just patterns, or perhaps each arm includes
/// different variables in different places.
///
/// Step one (not yet implemented): any clauses that are standalone patterns might differ only
/// in attribute. In that case, we can treat them as a 'simple or' -- a single pattern with a
/// WHERE clause that alternates on the attribute. Pull those out first.
///
/// Step two: for each cluster of patterns, and for each `and`, recursively build a CC and
/// simple projection. The projection must be the same for each CC, because we will concatenate
/// these with a `UNION`. This is one reason why we require each pattern in the `or` to unify
/// the same variables!
///
/// Finally, we alias this entire UNION block as a FROM; it can be stitched into the outer query
/// by looking at the projection.
///
/// For example,
///
/// ```edn
/// [:find ?page :in $ ?string :where
/// (or [?page :page/title ?string]
/// [?page :page/excerpt ?string]
/// (and [?save :save/string ?string]
/// [?page :page/save ?save]))]
/// ```edn
///
/// would expand to something like
///
/// ```sql
/// SELECT or123.page AS page FROM
/// (SELECT datoms124.e AS page FROM datoms AS datoms124
/// WHERE datoms124.v = ? AND
/// (datoms124.a = :page/title OR
/// datoms124.a = :page/excerpt)
/// UNION
/// SELECT datoms126.e AS page FROM datoms AS datoms125, datoms AS datoms126
/// WHERE datoms125.a = :save/string AND
/// datoms125.v = ? AND
/// datoms126.v = datoms125.e AND
/// datoms126.a = :page/save)
/// AS or123
/// ```
///
/// Note that a top-level standalone `or` doesn't really need to be aliased, but
/// it shouldn't do any harm.
fn apply_complex_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> {
// N.B., a solitary pattern here *cannot* be simply applied to the enclosing CC. We don't
// want to join all the vars, and indeed if it were safe to do so, we wouldn't have ended up
// in this function!
let (join_clauses, unify_vars, mentioned_vars) = or_join.dismember();
let projected = match unify_vars {
UnifyVars::Implicit => mentioned_vars.into_iter().collect(),
UnifyVars::Explicit(vs) => vs.into_iter().collect(),
};
let template = self.use_as_template(&projected);
let mut acc = Vec::with_capacity(join_clauses.len());
let mut empty_because: Option<EmptyBecause> = None;
for clause in join_clauses.into_iter() {
let mut receptacle = template.make_receptacle();
match clause {
OrWhereClause::And(clauses) => {
for clause in clauses {
receptacle.apply_clause(&schema, clause)?;
}
},
OrWhereClause::Clause(clause) => {
receptacle.apply_clause(&schema, clause)?;
},
}
if receptacle.is_known_empty() {
empty_because = receptacle.empty_because;
} else {
receptacle.expand_column_bindings();
receptacle.prune_extracted_types();
acc.push(receptacle);
}
}
if acc.is_empty() {
self.mark_known_empty(empty_because.expect("empty for a reason"));
return Ok(());
}
// TODO: optimize the case of a single element in `acc`?
// Now `acc` contains a sequence of CCs that were all prepared with the same types,
// each ready to project the same variables.
// At this point we can lift out any common type information (and even constraints) to the
// destination CC.
// We must also contribute type extraction information for any variables that aren't
// concretely typed for all union arms.
//
// We walk the list of variables to unify -- which will become our projection
// list -- to find out its type info in each CC. We might:
//
// 1. Know the type concretely from the enclosing CC. Don't project a type tag from the
// union. Example:
// ```
// [:find ?x ?y
// :where [?x :foo/int ?y]
// (or [(< ?y 10)]
// [_ :foo/verified ?y])]
// ```
// 2. Not know the type, but every CC bound it to the same single type. Don't project a type
// tag; we simply contribute the single type to the enclosing CC. Example:
// ```
// [:find ?x ?y
// :where (or [?x :foo/length ?y]
// [?x :foo/width ?y])]
// ```
// 3. (a) Have every CC come up with a non-unit type set for the var. Every CC will project
// a type tag column from one of its internal bindings, and the union will project it
// onwards. Example:
// ```
// [:find ?x ?y ?z
// :where [?x :foo/knows ?y]
// (or [?x _ ?z]
// [?y _ ?z])]
// ```
// 3. (b) Have some or all CCs come up with a unit type set. Every CC will project a type
// tag column, and those with a unit type set will project a fixed constant value.
// Again, the union will pass this on.
// ```
// [:find ?x ?y
// :where (or [?x :foo/length ?y]
// [?x _ ?y])]
// ```
let projection: BTreeSet<Variable> = projected.into_iter().collect();
let mut type_needed: BTreeSet<Variable> = BTreeSet::default();
// For any variable which has an imprecise type anywhere in the UNION, add it to the
// set that needs type extraction. All UNION arms must project the same columns.
for var in projection.iter() {
if acc.iter().any(|cc| !cc.known_type(var).is_some()) {
type_needed.insert(var.clone());
}
}
// Hang on to these so we can stuff them in our column bindings.
let var_associations: Vec<Variable>;
let type_associations: Vec<Variable>;
{
var_associations = projection.iter().cloned().collect();
type_associations = type_needed.iter().cloned().collect();
}
let union = ComputedTable::Union {
projection: projection,
type_extraction: type_needed,
arms: acc,
};
let table = self.computed_tables.push_computed(union);
let alias = self.next_alias_for_table(table);
// Stitch the computed table into column_bindings, so we get cross-linking.
for var in var_associations.into_iter() {
self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(var.clone()), var);
}
for var in type_associations.into_iter() {
self.extracted_types.insert(var.clone(), QualifiedAlias::new(alias.clone(), VariableColumn::VariableTypeTag(var)));
}
self.from.push(SourceAlias(table, alias));
Ok(())
}
} }
#[cfg(test)] #[cfg(test)]
@ -519,13 +710,23 @@ mod testing {
SourceAlias, SourceAlias,
}; };
use algebrize; use {
algebrize,
algebrize_with_counter,
};
fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { fn alg(schema: &Schema, input: &str) -> ConjoiningClauses {
let parsed = parse_find_string(input).expect("parse failed"); let parsed = parse_find_string(input).expect("parse failed");
algebrize(schema.into(), parsed).expect("algebrize failed").cc algebrize(schema.into(), parsed).expect("algebrize failed").cc
} }
/// Algebrize with a starting counter, so we can compare inner queries by algebrizing a
/// simpler version.
fn alg_c(schema: &Schema, counter: usize, input: &str) -> ConjoiningClauses {
let parsed = parse_find_string(input).expect("parse failed");
algebrize_with_counter(schema.into(), parsed, counter).expect("algebrize failed").cc
}
fn compare_ccs(left: ConjoiningClauses, right: ConjoiningClauses) { fn compare_ccs(left: ConjoiningClauses, right: ConjoiningClauses) {
assert_eq!(left.wheres, right.wheres); assert_eq!(left.wheres, right.wheres);
assert_eq!(left.from, right.from); assert_eq!(left.from, right.from);
@ -605,9 +806,9 @@ mod testing {
let cc = alg(&schema, query); let cc = alg(&schema, query);
let vx = Variable::from_valid_name("?x"); let vx = Variable::from_valid_name("?x");
let d0 = "datoms00".to_string(); let d0 = "datoms00".to_string();
let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity);
let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute);
let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value);
let knows = QueryValue::Entid(66); let knows = QueryValue::Entid(66);
let parent = QueryValue::Entid(67); let parent = QueryValue::Entid(67);
let john = QueryValue::TypedValue(TypedValue::typed_string("John")); let john = QueryValue::TypedValue(TypedValue::typed_string("John"));
@ -647,11 +848,11 @@ mod testing {
let vx = Variable::from_valid_name("?x"); let vx = Variable::from_valid_name("?x");
let d0 = "datoms00".to_string(); let d0 = "datoms00".to_string();
let d1 = "datoms01".to_string(); let d1 = "datoms01".to_string();
let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity);
let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute);
let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity);
let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute);
let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value);
let name = QueryValue::Entid(65); let name = QueryValue::Entid(65);
let knows = QueryValue::Entid(66); let knows = QueryValue::Entid(66);
let parent = QueryValue::Entid(67); let parent = QueryValue::Entid(67);
@ -697,12 +898,12 @@ mod testing {
let vx = Variable::from_valid_name("?x"); let vx = Variable::from_valid_name("?x");
let d0 = "datoms00".to_string(); let d0 = "datoms00".to_string();
let d1 = "datoms01".to_string(); let d1 = "datoms01".to_string();
let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity);
let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute);
let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value);
let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity);
let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute);
let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value);
let knows = QueryValue::Entid(66); let knows = QueryValue::Entid(66);
let age = QueryValue::Entid(68); let age = QueryValue::Entid(68);
let john = QueryValue::TypedValue(TypedValue::typed_string("John")); let john = QueryValue::TypedValue(TypedValue::typed_string("John"));
@ -737,7 +938,6 @@ mod testing {
// [:find ?x :where [?x :foo/bar ?y] (or-join [?x] [?x :foo/baz ?y])] // [:find ?x :where [?x :foo/bar ?y] (or-join [?x] [?x :foo/baz ?y])]
// [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]] // [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]]
#[test] #[test]
#[should_panic(expected = "not yet implemented")]
fn test_unit_or_join_doesnt_flatten() { fn test_unit_or_join_doesnt_flatten() {
let schema = prepopulated_schema(); let schema = prepopulated_schema();
let query = r#"[:find ?x let query = r#"[:find ?x
@ -747,29 +947,26 @@ mod testing {
let vx = Variable::from_valid_name("?x"); let vx = Variable::from_valid_name("?x");
let vy = Variable::from_valid_name("?y"); let vy = Variable::from_valid_name("?y");
let d0 = "datoms00".to_string(); let d0 = "datoms00".to_string();
let d1 = "datoms01".to_string(); let c0 = "c00".to_string();
let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); let c0x = QualifiedAlias::new(c0.clone(), VariableColumn::Variable(vx.clone()));
let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity);
let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute);
let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value);
let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute);
let knows = QueryValue::Entid(66); let knows = QueryValue::Entid(66);
let parent = QueryValue::Entid(67);
assert!(!cc.is_known_empty()); assert!(!cc.is_known_empty());
assert_eq!(cc.wheres, ColumnIntersection(vec![ assert_eq!(cc.wheres, ColumnIntersection(vec![
ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())), ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())),
ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), parent.clone())),
// The outer pattern joins against the `or` on the entity, but not value -- ?y means // The outer pattern joins against the `or` on the entity, but not value -- ?y means
// different things in each place. // different things in each place.
ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(c0x.clone()))),
])); ]));
assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, c0x]));
// ?y does not have a binding in the `or-join` pattern. // ?y does not have a binding in the `or-join` pattern.
assert_eq!(cc.column_bindings.get(&vy), Some(&vec![d0v])); assert_eq!(cc.column_bindings.get(&vy), Some(&vec![d0v]));
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0),
SourceAlias(DatomsTable::Datoms, d1)]); SourceAlias(DatomsTable::Computed(0), c0)]);
} }
// These two are equivalent: // These two are equivalent:
@ -810,8 +1007,6 @@ mod testing {
/// Strictly speaking this can be implemented with a `NOT EXISTS` clause for the second pattern, /// Strictly speaking this can be implemented with a `NOT EXISTS` clause for the second pattern,
/// but that would be a fair amount of analysis work, I think. /// but that would be a fair amount of analysis work, I think.
#[test] #[test]
#[should_panic(expected = "not yet implemented")]
#[allow(dead_code, unused_variables)]
fn test_alternation_with_and() { fn test_alternation_with_and() {
let schema = prepopulated_schema(); let schema = prepopulated_schema();
let query = r#" let query = r#"
@ -820,6 +1015,34 @@ mod testing {
[?x :foo/parent "Ámbar"]) [?x :foo/parent "Ámbar"])
[?x :foo/knows "Daphne"])]"#; [?x :foo/knows "Daphne"])]"#;
let cc = alg(&schema, query); let cc = alg(&schema, query);
let mut tables = cc.computed_tables.into_iter();
match (tables.next(), tables.next()) {
(Some(ComputedTable::Union { projection, type_extraction, arms }), None) => {
assert_eq!(projection, vec![Variable::from_valid_name("?x")].into_iter().collect());
assert!(type_extraction.is_empty());
let mut arms = arms.into_iter();
match (arms.next(), arms.next(), arms.next()) {
(Some(and), Some(pattern), None) => {
let expected_and = alg_c(&schema,
0, // The first pattern to be processed.
r#"[:find ?x :where [?x :foo/knows "John"] [?x :foo/parent "Ámbar"]]"#);
compare_ccs(and, expected_and);
let expected_pattern = alg_c(&schema,
2, // Two aliases taken by the other arm.
r#"[:find ?x :where [?x :foo/knows "Daphne"]]"#);
compare_ccs(pattern, expected_pattern);
},
_ => {
panic!("Expected two arms");
}
}
},
_ => {
panic!("Didn't get two inner tables.");
},
}
} }
#[test] #[test]

View file

@ -295,6 +295,7 @@ mod testing {
}; };
use types::{ use types::{
Column,
ColumnConstraint, ColumnConstraint,
DatomsTable, DatomsTable,
QualifiedAlias, QualifiedAlias,
@ -365,9 +366,9 @@ mod testing {
// println!("{:#?}", cc); // println!("{:#?}", cc);
let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity);
let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute);
let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value);
// After this, we know a lot of things: // After this, we know a lot of things:
assert!(!cc.is_known_empty()); assert!(!cc.is_known_empty());
@ -405,8 +406,8 @@ mod testing {
// println!("{:#?}", cc); // println!("{:#?}", cc);
let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity);
let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value);
assert!(!cc.is_known_empty()); assert!(!cc.is_known_empty());
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]);
@ -454,8 +455,8 @@ mod testing {
// println!("{:#?}", cc); // println!("{:#?}", cc);
let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity);
let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute);
assert!(!cc.is_known_empty()); assert!(!cc.is_known_empty());
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]);
@ -496,7 +497,7 @@ mod testing {
}); });
assert!(cc.is_known_empty()); assert!(cc.is_known_empty());
assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(DatomsColumn::Attribute, hello)); assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(Column::Fixed(DatomsColumn::Attribute), hello));
} }
@ -519,7 +520,7 @@ mod testing {
// println!("{:#?}", cc); // println!("{:#?}", cc);
let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); let d0_e = QualifiedAlias::new("all_datoms00".to_string(), DatomsColumn::Entity);
assert!(!cc.is_known_empty()); assert!(!cc.is_known_empty());
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]);
@ -549,8 +550,8 @@ mod testing {
// println!("{:#?}", cc); // println!("{:#?}", cc);
let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); let d0_e = QualifiedAlias::new("all_datoms00".to_string(), DatomsColumn::Entity);
let d0_v = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Value); let d0_v = QualifiedAlias::new("all_datoms00".to_string(), DatomsColumn::Value);
assert!(!cc.is_known_empty()); assert!(!cc.is_known_empty());
assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]);
@ -609,11 +610,11 @@ mod testing {
println!("{:#?}", cc); println!("{:#?}", cc);
let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity);
let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute);
let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value);
let d1_e = QualifiedAlias("datoms01".to_string(), DatomsColumn::Entity); let d1_e = QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Entity);
let d1_a = QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute); let d1_a = QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Attribute);
assert!(!cc.is_known_empty()); assert!(!cc.is_known_empty());
assert_eq!(cc.from, vec![ assert_eq!(cc.from, vec![
@ -669,9 +670,9 @@ mod testing {
tx: PatternNonValuePlace::Placeholder, tx: PatternNonValuePlace::Placeholder,
}); });
let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity);
let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute);
let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value);
// ?y has been expanded into `true`. // ?y has been expanded into `true`.
assert_eq!(cc.wheres, vec![ assert_eq!(cc.wheres, vec![

View file

@ -19,11 +19,12 @@ mod types;
mod validate; mod validate;
mod clauses; mod clauses;
use mentat_core::{ use mentat_core::{
Schema, Schema,
}; };
use mentat_core::counter::RcCounter;
use mentat_query::{ use mentat_query::{
FindQuery, FindQuery,
FindSpec, FindSpec,
@ -69,11 +70,20 @@ impl AlgebraicQuery {
} }
} }
#[allow(dead_code)] pub fn algebrize_with_counter(schema: &Schema, parsed: FindQuery, counter: usize) -> Result<AlgebraicQuery> {
let alias_counter = RcCounter::with_initial(counter);
let cc = clauses::ConjoiningClauses::with_alias_counter(alias_counter);
algebrize_with_cc(schema, parsed, cc)
}
pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result<AlgebraicQuery> { pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result<AlgebraicQuery> {
algebrize_with_cc(schema, parsed, clauses::ConjoiningClauses::default())
}
#[allow(dead_code)]
pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningClauses) -> Result<AlgebraicQuery> {
// TODO: integrate default source into pattern processing. // TODO: integrate default source into pattern processing.
// TODO: flesh out the rest of find-into-context. // TODO: flesh out the rest of find-into-context.
let mut cc = clauses::ConjoiningClauses::default();
let where_clauses = parsed.where_clauses; let where_clauses = parsed.where_clauses;
for where_clause in where_clauses { for where_clause in where_clauses {
cc.apply_clause(schema, where_clause)?; cc.apply_clause(schema, where_clause)?;
@ -96,15 +106,19 @@ pub use clauses::{
}; };
pub use types::{ pub use types::{
Column,
ColumnAlternation, ColumnAlternation,
ColumnConstraint, ColumnConstraint,
ColumnConstraintOrAlternation, ColumnConstraintOrAlternation,
ColumnIntersection, ColumnIntersection,
ColumnName,
ComputedTable,
DatomsColumn, DatomsColumn,
DatomsTable, DatomsTable,
QualifiedAlias, QualifiedAlias,
QueryValue, QueryValue,
SourceAlias, SourceAlias,
TableAlias, TableAlias,
VariableColumn,
}; };

View file

@ -8,6 +8,7 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the // CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License. // specific language governing permissions and limitations under the License.
use std::collections::BTreeSet;
use std::collections::HashSet; use std::collections::HashSet;
use std::fmt::{ use std::fmt::{
@ -28,13 +29,24 @@ use mentat_query::{
}; };
/// This enum models the fixed set of default tables we have -- two /// This enum models the fixed set of default tables we have -- two
/// tables and two views. /// tables and two views -- and computed tables defined in the enclosing CC.
#[derive(PartialEq, Eq, Clone, Copy, Debug)] #[derive(PartialEq, Eq, Clone, Copy, Debug)]
pub enum DatomsTable { pub enum DatomsTable {
Datoms, // The non-fulltext datoms table. Datoms, // The non-fulltext datoms table.
FulltextValues, // The virtual table mapping IDs to strings. FulltextValues, // The virtual table mapping IDs to strings.
FulltextDatoms, // The fulltext-datoms view. FulltextDatoms, // The fulltext-datoms view.
AllDatoms, // Fulltext and non-fulltext datoms. AllDatoms, // Fulltext and non-fulltext datoms.
Computed(usize), // A computed table, tracked elsewhere in the query.
}
/// A source of rows that isn't a named table -- typically a subquery or union.
pub enum ComputedTable {
// Subquery(BTreeSet<Variable>, ::clauses::ConjoiningClauses),
Union {
projection: BTreeSet<Variable>,
type_extraction: BTreeSet<Variable>,
arms: Vec<::clauses::ConjoiningClauses>,
},
} }
impl DatomsTable { impl DatomsTable {
@ -44,12 +56,17 @@ impl DatomsTable {
DatomsTable::FulltextValues => "fulltext_values", DatomsTable::FulltextValues => "fulltext_values",
DatomsTable::FulltextDatoms => "fulltext_datoms", DatomsTable::FulltextDatoms => "fulltext_datoms",
DatomsTable::AllDatoms => "all_datoms", DatomsTable::AllDatoms => "all_datoms",
DatomsTable::Computed(_) => "c",
} }
} }
} }
pub trait ColumnName {
fn column_name(&self) -> String;
}
/// One of the named columns of our tables. /// One of the named columns of our tables.
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(PartialEq, Eq, Clone)]
pub enum DatomsColumn { pub enum DatomsColumn {
Entity, Entity,
Attribute, Attribute,
@ -58,6 +75,30 @@ pub enum DatomsColumn {
ValueTypeTag, ValueTypeTag,
} }
#[derive(PartialEq, Eq, Clone)]
pub enum VariableColumn {
Variable(Variable),
VariableTypeTag(Variable),
}
#[derive(PartialEq, Eq, Clone)]
pub enum Column {
Fixed(DatomsColumn),
Variable(VariableColumn),
}
impl From<DatomsColumn> for Column {
fn from(from: DatomsColumn) -> Column {
Column::Fixed(from)
}
}
impl From<VariableColumn> for Column {
fn from(from: VariableColumn) -> Column {
Column::Variable(from)
}
}
impl DatomsColumn { impl DatomsColumn {
pub fn as_str(&self) -> &'static str { pub fn as_str(&self) -> &'static str {
use self::DatomsColumn::*; use self::DatomsColumn::*;
@ -71,6 +112,46 @@ impl DatomsColumn {
} }
} }
impl ColumnName for DatomsColumn {
fn column_name(&self) -> String {
self.as_str().to_string()
}
}
impl ColumnName for VariableColumn {
fn column_name(&self) -> String {
match self {
&VariableColumn::Variable(ref v) => v.to_string(),
&VariableColumn::VariableTypeTag(ref v) => format!("{}_value_type_tag", v.as_str()),
}
}
}
impl Debug for VariableColumn {
fn fmt(&self, f: &mut Formatter) -> Result {
match self {
// These should agree with VariableColumn::column_name.
&VariableColumn::Variable(ref v) => write!(f, "{}", v.as_str()),
&VariableColumn::VariableTypeTag(ref v) => write!(f, "{}_value_type_tag", v.as_str()),
}
}
}
impl Debug for DatomsColumn {
fn fmt(&self, f: &mut Formatter) -> Result {
write!(f, "{}", self.as_str())
}
}
impl Debug for Column {
fn fmt(&self, f: &mut Formatter) -> Result {
match self {
&Column::Fixed(ref c) => c.fmt(f),
&Column::Variable(ref v) => v.fmt(f),
}
}
}
/// A specific instance of a table within a query. E.g., "datoms123". /// A specific instance of a table within a query. E.g., "datoms123".
pub type TableAlias = String; pub type TableAlias = String;
@ -86,17 +167,22 @@ impl Debug for SourceAlias {
/// A particular column of a particular aliased table. E.g., "datoms123", Attribute. /// A particular column of a particular aliased table. E.g., "datoms123", Attribute.
#[derive(PartialEq, Eq, Clone)] #[derive(PartialEq, Eq, Clone)]
pub struct QualifiedAlias(pub TableAlias, pub DatomsColumn); pub struct QualifiedAlias(pub TableAlias, pub Column);
impl Debug for QualifiedAlias { impl Debug for QualifiedAlias {
fn fmt(&self, f: &mut Formatter) -> Result { fn fmt(&self, f: &mut Formatter) -> Result {
write!(f, "{}.{}", self.0, self.1.as_str()) write!(f, "{}.{:?}", self.0, self.1)
} }
} }
impl QualifiedAlias { impl QualifiedAlias {
pub fn new<C: Into<Column>>(table: TableAlias, column: C) -> Self {
QualifiedAlias(table, column.into())
}
pub fn for_type_tag(&self) -> QualifiedAlias { pub fn for_type_tag(&self) -> QualifiedAlias {
QualifiedAlias(self.0.clone(), DatomsColumn::ValueTypeTag) // TODO: this only makes sense for `DatomsColumn` tables.
QualifiedAlias(self.0.clone(), Column::Fixed(DatomsColumn::ValueTypeTag))
} }
} }
@ -319,7 +405,7 @@ pub enum EmptyBecause {
UnresolvedIdent(NamespacedKeyword), UnresolvedIdent(NamespacedKeyword),
InvalidAttributeIdent(NamespacedKeyword), InvalidAttributeIdent(NamespacedKeyword),
InvalidAttributeEntid(Entid), InvalidAttributeEntid(Entid),
InvalidBinding(DatomsColumn, TypedValue), InvalidBinding(Column, TypedValue),
ValueTypeMismatch(ValueType, TypedValue), ValueTypeMismatch(ValueType, TypedValue),
AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO
} }

View file

@ -37,33 +37,18 @@ use mentat_db::{
use mentat_query::{ use mentat_query::{
Element, Element,
FindSpec, FindSpec,
Variable,
}; };
use mentat_query_algebrizer::{ use mentat_query_algebrizer::{
AlgebraicQuery, AlgebraicQuery,
DatomsColumn, ColumnName,
QualifiedAlias, VariableColumn,
/*
ConjoiningClauses,
DatomsTable,
SourceAlias,
*/
}; };
use mentat_query_sql::{ use mentat_query_sql::{
ColumnOrExpression, ColumnOrExpression,
/*
Constraint,
FromClause,
*/
Name,
Projection, Projection,
ProjectedColumn, ProjectedColumn,
/*
SelectQuery,
TableList,
*/
}; };
error_chain! { error_chain! {
@ -169,14 +154,6 @@ impl TypedIndex {
} }
} }
fn column_name(var: &Variable) -> Name {
var.to_string()
}
fn value_type_tag_name(var: &Variable) -> Name {
format!("{}_value_type_tag", var.as_str())
}
/// Walk an iterator of `Element`s, collecting projector templates and columns. /// Walk an iterator of `Element`s, collecting projector templates and columns.
/// ///
/// Returns a pair: the SQL projection (which should always be a `Projection::Columns`) /// Returns a pair: the SQL projection (which should always be a `Projection::Columns`)
@ -213,7 +190,7 @@ fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
.expect("Every variable has a binding"); .expect("Every variable has a binding");
let qa = columns[0].clone(); let qa = columns[0].clone();
let name = column_name(var); let name = VariableColumn::Variable(var.clone()).column_name();
if let Some(t) = query.cc.known_type(var) { if let Some(t) = query.cc.known_type(var) {
cols.push(ProjectedColumn(ColumnOrExpression::Column(qa), name)); cols.push(ProjectedColumn(ColumnOrExpression::Column(qa), name));
@ -221,15 +198,17 @@ fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
templates.push(TypedIndex::Known(i, tag)); templates.push(TypedIndex::Known(i, tag));
i += 1; // We used one SQL column. i += 1; // We used one SQL column.
} else { } else {
let table = qa.0.clone();
cols.push(ProjectedColumn(ColumnOrExpression::Column(qa), name)); cols.push(ProjectedColumn(ColumnOrExpression::Column(qa), name));
templates.push(TypedIndex::Unknown(i, i + 1)); templates.push(TypedIndex::Unknown(i, i + 1));
i += 2; // We used two SQL columns. i += 2; // We used two SQL columns.
// Also project the type from the SQL query. // Also project the type from the SQL query.
let type_name = value_type_tag_name(var); let extracted_alias = query.cc
let type_qa = QualifiedAlias(table, DatomsColumn::ValueTypeTag); .extracted_types
cols.push(ProjectedColumn(ColumnOrExpression::Column(type_qa), type_name)); .get(var)
.expect("Every variable has a known type or an extracted type");
let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name();
cols.push(ProjectedColumn(ColumnOrExpression::Column(extracted_alias.clone()), type_name));
} }
} }
} }

View file

@ -19,10 +19,12 @@ use mentat_core::{
}; };
use mentat_query_algebrizer::{ use mentat_query_algebrizer::{
DatomsColumn, Column,
QualifiedAlias, QualifiedAlias,
QueryValue, QueryValue,
SourceAlias, SourceAlias,
TableAlias,
VariableColumn,
}; };
use mentat_sql::{ use mentat_sql::{
@ -117,7 +119,7 @@ enum JoinOp {
} }
// Short-hand for a list of tables all inner-joined. // Short-hand for a list of tables all inner-joined.
pub struct TableList(pub Vec<SourceAlias>); pub struct TableList(pub Vec<TableOrSubquery>);
impl TableList { impl TableList {
fn is_empty(&self) -> bool { fn is_empty(&self) -> bool {
@ -133,8 +135,9 @@ pub struct Join {
} }
#[allow(dead_code)] #[allow(dead_code)]
enum TableOrSubquery { pub enum TableOrSubquery {
Table(SourceAlias), Table(SourceAlias),
Union(Vec<SelectQuery>, TableAlias),
// TODO: Subquery. // TODO: Subquery.
} }
@ -152,9 +155,23 @@ pub struct SelectQuery {
pub limit: Option<u64>, pub limit: Option<u64>,
} }
// We know that DatomsColumns are safe to serialize. fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult {
fn push_column(qb: &mut QueryBuilder, col: &DatomsColumn) { match col {
qb.push_sql(col.as_str()); &Column::Fixed(ref d) => {
qb.push_sql(d.as_str());
Ok(())
},
&Column::Variable(ref vc) => {
match vc {
&VariableColumn::Variable(ref v) => {
qb.push_identifier(v.as_str())
},
&VariableColumn::VariableTypeTag(ref v) => {
qb.push_identifier(format!("{}_value_type_tag", v.name()).as_str())
},
}
},
}
} }
//--------------------------------------------------------- //---------------------------------------------------------
@ -196,8 +213,7 @@ impl QueryFragment for ColumnOrExpression {
&Column(QualifiedAlias(ref table, ref column)) => { &Column(QualifiedAlias(ref table, ref column)) => {
out.push_identifier(table.as_str())?; out.push_identifier(table.as_str())?;
out.push_sql("."); out.push_sql(".");
push_column(out, column); push_column(out, column)
Ok(())
}, },
&Entid(entid) => { &Entid(entid) => {
out.push_sql(entid.to_string().as_str()); out.push_sql(entid.to_string().as_str());
@ -324,8 +340,8 @@ impl QueryFragment for TableList {
return Ok(()); return Ok(());
} }
interpose!(sa, self.0, interpose!(t, self.0,
{ source_alias_push_sql(out, sa)? }, { t.push_sql(out)? },
{ out.push_sql(", ") }); { out.push_sql(", ") });
Ok(()) Ok(())
} }
@ -343,7 +359,15 @@ impl QueryFragment for TableOrSubquery {
fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult {
use self::TableOrSubquery::*; use self::TableOrSubquery::*;
match self { match self {
&Table(ref sa) => source_alias_push_sql(out, sa) &Table(ref sa) => source_alias_push_sql(out, sa),
&Union(ref subqueries, ref table_alias) => {
out.push_sql("(");
interpose!(subquery, subqueries,
{ subquery.push_sql(out)? },
{ out.push_sql(" UNION ") });
out.push_sql(") AS ");
out.push_identifier(table_alias.as_str())
},
} }
} }
} }
@ -406,7 +430,10 @@ impl SelectQuery {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use mentat_query_algebrizer::DatomsTable; use mentat_query_algebrizer::{
DatomsColumn,
DatomsTable,
};
fn build_constraint(c: Constraint) -> String { fn build_constraint(c: Constraint) -> String {
let mut builder = SQLiteQueryBuilder::new(); let mut builder = SQLiteQueryBuilder::new();
@ -418,19 +445,19 @@ mod tests {
#[test] #[test]
fn test_in_constraint() { fn test_in_constraint() {
let none = Constraint::In { let none = Constraint::In {
left: ColumnOrExpression::Column(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value)), left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Value)),
list: vec![], list: vec![],
}; };
let one = Constraint::In { let one = Constraint::In {
left: ColumnOrExpression::Column(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value)), left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Value)),
list: vec![ list: vec![
ColumnOrExpression::Entid(123), ColumnOrExpression::Entid(123),
], ],
}; };
let three = Constraint::In { let three = Constraint::In {
left: ColumnOrExpression::Column(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value)), left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Value)),
list: vec![ list: vec![
ColumnOrExpression::Entid(123), ColumnOrExpression::Entid(123),
ColumnOrExpression::Entid(456), ColumnOrExpression::Entid(456),
@ -476,8 +503,8 @@ mod tests {
let datoms01 = "datoms01".to_string(); let datoms01 = "datoms01".to_string();
let eq = Op("="); let eq = Op("=");
let source_aliases = vec![ let source_aliases = vec![
SourceAlias(DatomsTable::Datoms, datoms00.clone()), TableOrSubquery::Table(SourceAlias(DatomsTable::Datoms, datoms00.clone())),
SourceAlias(DatomsTable::Datoms, datoms01.clone()), TableOrSubquery::Table(SourceAlias(DatomsTable::Datoms, datoms01.clone())),
]; ];
let mut query = SelectQuery { let mut query = SelectQuery {
@ -485,24 +512,24 @@ mod tests {
projection: Projection::Columns( projection: Projection::Columns(
vec![ vec![
ProjectedColumn( ProjectedColumn(
ColumnOrExpression::Column(QualifiedAlias(datoms00.clone(), DatomsColumn::Entity)), ColumnOrExpression::Column(QualifiedAlias::new(datoms00.clone(), DatomsColumn::Entity)),
"x".to_string()), "x".to_string()),
]), ]),
from: FromClause::TableList(TableList(source_aliases)), from: FromClause::TableList(TableList(source_aliases)),
constraints: vec![ constraints: vec![
Constraint::Infix { Constraint::Infix {
op: eq.clone(), op: eq.clone(),
left: ColumnOrExpression::Column(QualifiedAlias(datoms01.clone(), DatomsColumn::Value)), left: ColumnOrExpression::Column(QualifiedAlias::new(datoms01.clone(), DatomsColumn::Value)),
right: ColumnOrExpression::Column(QualifiedAlias(datoms00.clone(), DatomsColumn::Value)), right: ColumnOrExpression::Column(QualifiedAlias::new(datoms00.clone(), DatomsColumn::Value)),
}, },
Constraint::Infix { Constraint::Infix {
op: eq.clone(), op: eq.clone(),
left: ColumnOrExpression::Column(QualifiedAlias(datoms00.clone(), DatomsColumn::Attribute)), left: ColumnOrExpression::Column(QualifiedAlias::new(datoms00.clone(), DatomsColumn::Attribute)),
right: ColumnOrExpression::Entid(65537), right: ColumnOrExpression::Entid(65537),
}, },
Constraint::Infix { Constraint::Infix {
op: eq.clone(), op: eq.clone(),
left: ColumnOrExpression::Column(QualifiedAlias(datoms01.clone(), DatomsColumn::Attribute)), left: ColumnOrExpression::Column(QualifiedAlias::new(datoms01.clone(), DatomsColumn::Attribute)),
right: ColumnOrExpression::Entid(65536), right: ColumnOrExpression::Entid(65536),
}, },
], ],

View file

@ -20,10 +20,16 @@ use mentat_query_algebrizer::{
ColumnConstraint, ColumnConstraint,
ColumnConstraintOrAlternation, ColumnConstraintOrAlternation,
ColumnIntersection, ColumnIntersection,
ColumnName,
ComputedTable,
ConjoiningClauses, ConjoiningClauses,
DatomsColumn, DatomsColumn,
DatomsTable,
QualifiedAlias, QualifiedAlias,
QueryValue, QueryValue,
SourceAlias,
TableAlias,
VariableColumn,
}; };
use mentat_query_projector::{ use mentat_query_projector::{
@ -37,9 +43,11 @@ use mentat_query_sql::{
Constraint, Constraint,
FromClause, FromClause,
Op, Op,
ProjectedColumn,
Projection, Projection,
SelectQuery, SelectQuery,
TableList, TableList,
TableOrSubquery,
}; };
trait ToConstraint { trait ToConstraint {
@ -136,7 +144,7 @@ impl ToConstraint for ColumnConstraint {
}, },
HasType(table, value_type) => { HasType(table, value_type) => {
let column = QualifiedAlias(table, DatomsColumn::ValueTypeTag).to_column(); let column = QualifiedAlias::new(table, DatomsColumn::ValueTypeTag).to_column();
Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag())) Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag()))
}, },
} }
@ -148,6 +156,80 @@ pub struct ProjectedSelect{
pub projector: Box<Projector>, pub projector: Box<Projector>,
} }
// Nasty little hack to let us move out of indexed context.
struct ConsumableVec<T> {
inner: Vec<Option<T>>,
}
impl<T> From<Vec<T>> for ConsumableVec<T> {
fn from(vec: Vec<T>) -> ConsumableVec<T> {
ConsumableVec { inner: vec.into_iter().map(|x| Some(x)).collect() }
}
}
impl<T> ConsumableVec<T> {
fn take_dangerously(&mut self, i: usize) -> T {
::std::mem::replace(&mut self.inner[i], None).expect("each value to only be fetched once")
}
}
fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubquery {
match computed {
ComputedTable::Union {
projection, type_extraction, arms,
} => {
// The projection list for each CC must have the same shape and the same names.
// The values we project might be fixed or they might be columns.
TableOrSubquery::Union(
arms.into_iter()
.map(|cc| {
// We're going to end up with the variables being projected and also some
// type tag columns.
let mut columns: Vec<ProjectedColumn> = Vec::with_capacity(projection.len() + type_extraction.len());
// For each variable, find out which column it maps to within this arm, and
// project it as the variable name.
// E.g., SELECT datoms03.v AS `?x`.
for var in projection.iter() {
let col = cc.column_bindings.get(&var).unwrap()[0].clone();
let proj = ProjectedColumn(ColumnOrExpression::Column(col), var.to_string());
columns.push(proj);
}
// Similarly, project type tags if they're not known conclusively in the
// outer query.
for var in type_extraction.iter() {
let expression =
if let Some(known) = cc.known_type(var) {
// If we know the type for sure, just project the constant.
// SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag`
ColumnOrExpression::Integer(known.value_type_tag())
} else {
// Otherwise, we'll have an established type binding! This'll be
// either a datoms table or, recursively, a subquery. Project
// this:
// SELECT datoms03.v AS `?x`,
// datoms03.value_type_tag AS `?x_value_type_tag`
let extract = cc.extracted_types
.get(var)
.expect("Expected variable to have a known type or an extracted type");
ColumnOrExpression::Column(extract.clone())
};
let type_column = VariableColumn::VariableTypeTag(var.clone());
let proj = ProjectedColumn(expression, type_column.column_name());
columns.push(proj);
}
// Each arm simply turns into a subquery.
// The SQL translation will stuff "UNION" between each arm.
let projection = Projection::Columns(columns);
cc_to_select_query(projection, cc, false, None)
}).collect(),
alias)
},
}
}
/// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a /// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a
/// query that runs SQL. The next level up the call stack can check for known-empty queries if /// query that runs SQL. The next level up the call stack can check for known-empty queries if
/// needed. /// needed.
@ -155,7 +237,28 @@ fn cc_to_select_query<T: Into<Option<u64>>>(projection: Projection, cc: Conjoini
let from = if cc.from.is_empty() { let from = if cc.from.is_empty() {
FromClause::Nothing FromClause::Nothing
} else { } else {
FromClause::TableList(TableList(cc.from)) // Move these out of the CC.
let from = cc.from;
let mut computed: ConsumableVec<_> = cc.computed_tables.into();
// Why do we put computed tables directly into the `FROM` clause? The alternative is to use
// a CTE (`WITH`). They're typically equivalent, but some SQL systems (notably Postgres)
// treat CTEs as optimization barriers, so a `WITH` can be significantly slower. Given that
// this is easy enough to change later, we'll opt for using direct inclusion in `FROM`.
let tables =
from.into_iter().map(|source_alias| {
match source_alias {
SourceAlias(DatomsTable::Computed(i), alias) => {
let comp = computed.take_dangerously(i);
table_for_computed(comp, alias)
},
_ => {
TableOrSubquery::Table(source_alias)
}
}
});
FromClause::TableList(TableList(tables.collect()))
}; };
let limit = if cc.empty_because.is_some() { Some(0) } else { limit.into() }; let limit = if cc.empty_because.is_some() { Some(0) } else { limit.into() };

View file

@ -259,3 +259,97 @@ fn test_simple_or_join() {
assert_eq!(sql, "SELECT `datoms01`.v AS `?url`, `datoms02`.v AS `?description` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE ((`datoms00`.a = 97 AND `datoms00`.v = $v0) OR (`datoms00`.a = 98 AND `datoms00`.v = $v1)) AND `datoms01`.a = 97 AND `datoms02`.a = 99 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e LIMIT 1"); assert_eq!(sql, "SELECT `datoms01`.v AS `?url`, `datoms02`.v AS `?description` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE ((`datoms00`.a = 97 AND `datoms00`.v = $v0) OR (`datoms00`.a = 98 AND `datoms00`.v = $v1)) AND `datoms01`.a = 97 AND `datoms02`.a = 99 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e LIMIT 1");
assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), make_arg("$v1", "Foo")]); assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), make_arg("$v1", "Foo")]);
} }
#[test]
fn test_complex_or_join() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("page", "save"), 95);
add_attribute(&mut schema, 95, Attribute {
value_type: ValueType::Ref,
..Default::default()
});
associate_ident(&mut schema, NamespacedKeyword::new("save", "title"), 96);
associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97);
associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98);
associate_ident(&mut schema, NamespacedKeyword::new("page", "description"), 99);
for x in 96..100 {
add_attribute(&mut schema, x, Attribute {
value_type: ValueType::String,
..Default::default()
});
}
let input = r#"[:find [?url ?description]
:where
(or-join [?page]
[?page :page/url "http://foo.com/"]
[?page :page/title "Foo"]
(and
[?page :page/save ?save]
[?save :save/title "Foo"]))
[?page :page/url ?url]
[?page :page/description ?description]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms04`.v AS `?url`, \
`datoms05`.v AS `?description` \
FROM (SELECT `datoms00`.e AS `?page` \
FROM `datoms` AS `datoms00` \
WHERE `datoms00`.a = 97 \
AND `datoms00`.v = $v0 \
UNION \
SELECT `datoms01`.e AS `?page` \
FROM `datoms` AS `datoms01` \
WHERE `datoms01`.a = 98 \
AND `datoms01`.v = $v1 \
UNION \
SELECT `datoms02`.e AS `?page` \
FROM `datoms` AS `datoms02`, \
`datoms` AS `datoms03` \
WHERE `datoms02`.a = 95 \
AND `datoms03`.a = 96 \
AND `datoms03`.v = $v2 \
AND `datoms02`.v = `datoms03`.e) AS `c00`, \
`datoms` AS `datoms04`, \
`datoms` AS `datoms05` \
WHERE `datoms04`.a = 97 \
AND `datoms05`.a = 99 \
AND `c00`.`?page` = `datoms04`.e \
AND `c00`.`?page` = `datoms05`.e \
LIMIT 1");
assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"),
make_arg("$v1", "Foo"),
make_arg("$v2", "Foo")]);
}
#[test]
fn test_complex_or_join_type_projection() {
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98);
add_attribute(&mut schema, 98, Attribute {
value_type: ValueType::String,
..Default::default()
});
let input = r#"[:find [?y]
:where
(or
[6 :page/title ?y]
[5 _ ?y])]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `c00`.`?y` AS `?y`, \
`c00`.`?y_value_type_tag` AS `?y_value_type_tag` \
FROM (SELECT `datoms00`.v AS `?y`, \
10 AS `?y_value_type_tag` \
FROM `datoms` AS `datoms00` \
WHERE `datoms00`.e = 6 \
AND `datoms00`.a = 98 \
UNION \
SELECT `all_datoms01`.v AS `?y`, \
`all_datoms01`.value_type_tag AS `?y_value_type_tag` \
FROM `all_datoms` AS `all_datoms01` \
WHERE `all_datoms01`.e = 5) AS `c00` \
LIMIT 1");
assert_eq!(args, vec![]);
}

View file

@ -670,12 +670,12 @@ impl ContainsVariables for OrJoin {
} }
impl OrJoin { impl OrJoin {
pub fn dismember(self) -> (Vec<OrWhereClause>, BTreeSet<Variable>) { pub fn dismember(self) -> (Vec<OrWhereClause>, UnifyVars, BTreeSet<Variable>) {
let vars = match self.mentioned_vars { let vars = match self.mentioned_vars {
Some(m) => m, Some(m) => m,
None => self.collect_mentioned_variables(), None => self.collect_mentioned_variables(),
}; };
(self.clauses, vars) (self.clauses, self.unify_vars, vars)
} }
pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet<Variable> { pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet<Variable> {