diff --git a/core/src/counter.rs b/core/src/counter.rs index d51faa5f..9db17bb4 100644 --- a/core/src/counter.rs +++ b/core/src/counter.rs @@ -20,11 +20,28 @@ pub struct RcCounter { c: Rc, } +/// A simple shared counter. impl RcCounter { + pub fn with_initial(value: usize) -> Self { + RcCounter { c: Rc::new(AtomicUsize::new(value)) } + } + pub fn new() -> Self { RcCounter { c: Rc::new(AtomicUsize::new(0)) } } + /// Return the next value in the sequence. + /// + /// ``` + /// use mentat_core::counter::RcCounter; + /// + /// let c = RcCounter::with_initial(3); + /// assert_eq!(c.next(), 3); + /// assert_eq!(c.next(), 4); + /// let d = c.clone(); + /// assert_eq!(d.next(), 5); + /// assert_eq!(c.next(), 6); + /// ``` pub fn next(&self) -> usize { self.c.fetch_add(1, Ordering::SeqCst) } diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index c5b31b1e..4c85f25b 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -48,6 +48,8 @@ use errors::{ use types::{ ColumnConstraint, ColumnIntersection, + ComputedTable, + Column, DatomsColumn, DatomsTable, EmptyBecause, @@ -141,6 +143,10 @@ pub struct ConjoiningClauses { /// A vector of source/alias pairs used to construct a SQL `FROM` list. pub from: Vec, + /// A vector of computed tables (typically subqueries). The index into this vector is used as + /// an identifier in a `DatomsTable::Computed(c)` table reference. + pub computed_tables: Vec, + /// A list of fragments that can be joined by `AND`. pub wheres: ColumnIntersection, @@ -171,7 +177,7 @@ pub struct ConjoiningClauses { /// A mapping, similar to `column_bindings`, but used to pull type tags out of the store at runtime. /// If a var isn't present in `known_types`, it should be present here. - extracted_types: BTreeMap, + pub extracted_types: BTreeMap, } impl Debug for ConjoiningClauses { @@ -196,6 +202,7 @@ impl Default for ConjoiningClauses { empty_because: None, alias_counter: RcCounter::new(), from: vec![], + computed_tables: vec![], wheres: ColumnIntersection::default(), input_variables: BTreeSet::new(), column_bindings: BTreeMap::new(), @@ -206,33 +213,44 @@ impl Default for ConjoiningClauses { } } +impl ConjoiningClauses { + /// Construct a new `ConjoiningClauses` with the provided alias counter. This allows a caller + /// to share a counter with an enclosing scope, and to start counting at a particular offset + /// for testing. + pub fn with_alias_counter(counter: RcCounter) -> ConjoiningClauses { + ConjoiningClauses { + alias_counter: counter, + ..Default::default() + } + } +} + /// Cloning. impl ConjoiningClauses { fn make_receptacle(&self) -> ConjoiningClauses { - let mut concrete = ConjoiningClauses::default(); - concrete.empty_because = self.empty_because.clone(); - - concrete.input_variables = self.input_variables.clone(); - concrete.value_bindings = self.value_bindings.clone(); - concrete.known_types = self.known_types.clone(); - concrete.extracted_types = self.extracted_types.clone(); - - concrete + ConjoiningClauses { + alias_counter: self.alias_counter.clone(), + empty_because: self.empty_because.clone(), + input_variables: self.input_variables.clone(), + value_bindings: self.value_bindings.clone(), + known_types: self.known_types.clone(), + extracted_types: self.extracted_types.clone(), + ..Default::default() + } } /// Make a new CC populated with the relevant variable associations in this CC. /// The CC shares an alias count with all of its copies. fn use_as_template(&self, vars: &BTreeSet) -> ConjoiningClauses { - let mut template = ConjoiningClauses::default(); - template.alias_counter = self.alias_counter.clone(); // Rc ftw. - template.empty_because = self.empty_because.clone(); - - template.input_variables = self.input_variables.intersection(vars).cloned().collect(); - template.value_bindings = self.value_bindings.with_intersected_keys(&vars); - template.known_types = self.known_types.with_intersected_keys(&vars); - template.extracted_types = self.extracted_types.with_intersected_keys(&vars); - - template + ConjoiningClauses { + alias_counter: self.alias_counter.clone(), + empty_because: self.empty_because.clone(), + input_variables: self.input_variables.intersection(vars).cloned().collect(), + value_bindings: self.value_bindings.with_intersected_keys(&vars), + known_types: self.known_types.with_intersected_keys(&vars), + extracted_types: self.extracted_types.with_intersected_keys(&vars), + ..Default::default() + } } } @@ -269,35 +287,62 @@ impl ConjoiningClauses { } } - pub fn bind_column_to_var(&mut self, schema: &Schema, table: TableAlias, column: DatomsColumn, var: Variable) { + pub fn bind_column_to_var>(&mut self, schema: &Schema, table: TableAlias, column: C, var: Variable) { + let column = column.into(); // Do we have an external binding for this? if let Some(bound_val) = self.bound_value(&var) { // Great! Use that instead. // We expect callers to do things like bind keywords here; we need to translate these // before they hit our constraints. - // TODO: recognize when the valueType might be a ref and also translate entids there. - if column == DatomsColumn::Value { - self.constrain_column_to_constant(table, column, bound_val); - } else { - match bound_val { - TypedValue::Keyword(ref kw) => { - if let Some(entid) = self.entid_for_ident(schema, kw) { + match column { + Column::Variable(_) => { + // We don't need to handle expansion of attributes here. The subquery that + // produces the variable projection will do so. + self.constrain_column_to_constant(table, column, bound_val); + }, + + Column::Fixed(DatomsColumn::ValueTypeTag) => { + // I'm pretty sure this is meaningless right now, because we will never bind + // a type tag to a variable -- there's no syntax for doing so. + // In the future we might expose a way to do so, perhaps something like: + // ``` + // [:find ?x + // :where [?x _ ?y] + // [(= (typeof ?y) :db.valueType/double)]] + // ``` + unimplemented!(); + }, + + // TODO: recognize when the valueType might be a ref and also translate entids there. + Column::Fixed(DatomsColumn::Value) => { + self.constrain_column_to_constant(table, column, bound_val); + }, + + // These columns can only be entities, so attempt to translate keywords. If we can't + // get an entity out of the bound value, the pattern cannot produce results. + Column::Fixed(DatomsColumn::Attribute) | + Column::Fixed(DatomsColumn::Entity) | + Column::Fixed(DatomsColumn::Tx) => { + match bound_val { + TypedValue::Keyword(ref kw) => { + if let Some(entid) = self.entid_for_ident(schema, kw) { + self.constrain_column_to_entity(table, column, entid); + } else { + // Impossible. + // For attributes this shouldn't occur, because we check the binding in + // `table_for_places`/`alias_table`, and if it didn't resolve to a valid + // attribute then we should have already marked the pattern as empty. + self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.cloned())); + } + }, + TypedValue::Ref(entid) => { self.constrain_column_to_entity(table, column, entid); - } else { - // Impossible. - // For attributes this shouldn't occur, because we check the binding in - // `table_for_places`/`alias_table`, and if it didn't resolve to a valid - // attribute then we should have already marked the pattern as empty. - self.mark_known_empty(EmptyBecause::UnresolvedIdent(kw.cloned())); - } - }, - TypedValue::Ref(entid) => { - self.constrain_column_to_entity(table, column, entid); - }, - _ => { - // One can't bind an e, a, or tx to something other than an entity. - self.mark_known_empty(EmptyBecause::InvalidBinding(column, bound_val)); - }, + }, + _ => { + // One can't bind an e, a, or tx to something other than an entity. + self.mark_known_empty(EmptyBecause::InvalidBinding(column, bound_val)); + }, + } } } @@ -311,10 +356,12 @@ impl ConjoiningClauses { // If this is a value, and we don't already know its type or where // to get its type, record that we can get it from this table. let needs_type_extraction = - !late_binding && // Never need to extract for bound vars. - column == DatomsColumn::Value && // Never need to extract types for refs. - self.known_type(&var).is_none() && // Don't need to extract if we know a single type. - !self.extracted_types.contains_key(&var); // We're already extracting the type. + !late_binding && // Never need to extract for bound vars. + // Never need to extract types for refs, and var columns are handled elsewhere: + // a subquery will be projecting a type tag. + column == Column::Fixed(DatomsColumn::Value) && + self.known_type(&var).is_none() && // Don't need to extract if we know a single type. + !self.extracted_types.contains_key(&var); // We're already extracting the type. let alias = QualifiedAlias(table, column); @@ -326,11 +373,13 @@ impl ConjoiningClauses { self.column_bindings.entry(var).or_insert(vec![]).push(alias); } - pub fn constrain_column_to_constant(&mut self, table: TableAlias, column: DatomsColumn, constant: TypedValue) { + pub fn constrain_column_to_constant>(&mut self, table: TableAlias, column: C, constant: TypedValue) { + let column = column.into(); self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) } - pub fn constrain_column_to_entity(&mut self, table: TableAlias, column: DatomsColumn, entity: Entid) { + pub fn constrain_column_to_entity>(&mut self, table: TableAlias, column: C, entity: Entid) { + let column = column.into(); self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::Entid(entity))) } @@ -340,7 +389,7 @@ impl ConjoiningClauses { pub fn constrain_value_to_numeric(&mut self, table: TableAlias, value: i64) { self.wheres.add_intersection(ColumnConstraint::Equals( - QualifiedAlias(table, DatomsColumn::Value), + QualifiedAlias(table, Column::Fixed(DatomsColumn::Value)), QueryValue::PrimitiveLong(value))) } @@ -569,7 +618,7 @@ impl ConjoiningClauses { Some(v) => { // This pattern cannot match: the caller has bound a non-entity value to an // attribute place. - Err(EmptyBecause::InvalidBinding(DatomsColumn::Attribute, v.clone())) + Err(EmptyBecause::InvalidBinding(Column::Fixed(DatomsColumn::Attribute), v.clone())) }, } }, @@ -577,7 +626,12 @@ impl ConjoiningClauses { } pub fn next_alias_for_table(&mut self, table: DatomsTable) -> TableAlias { - format!("{}{:02}", table.name(), self.alias_counter.next()) + match table { + DatomsTable::Computed(u) => + format!("{}{:02}", table.name(), u), + _ => + format!("{}{:02}", table.name(), self.alias_counter.next()), + } } /// Produce a (table, alias) pair to handle the provided pattern. diff --git a/query-algebrizer/src/clauses/or.rs b/query-algebrizer/src/clauses/or.rs index 2ad759d6..d14dd9e1 100644 --- a/query-algebrizer/src/clauses/or.rs +++ b/query-algebrizer/src/clauses/or.rs @@ -36,8 +36,12 @@ use types::{ ColumnConstraintOrAlternation, ColumnAlternation, ColumnIntersection, + ComputedTable, DatomsTable, EmptyBecause, + QualifiedAlias, + SourceAlias, + VariableColumn, }; /// Return true if both left and right are the same variable or both are non-variable. @@ -66,6 +70,18 @@ fn _simply_matches_value_place(left: &PatternValuePlace, right: &PatternValuePla } } +trait PushComputed { + fn push_computed(&mut self, item: ComputedTable) -> DatomsTable; +} + +impl PushComputed for Vec { + fn push_computed(&mut self, item: ComputedTable) -> DatomsTable { + let next_index = self.len(); + self.push(item); + DatomsTable::Computed(next_index) + } +} + pub enum DeconstructedOrJoin { KnownSuccess, KnownEmpty(EmptyBecause), @@ -176,6 +192,8 @@ impl ConjoiningClauses { /// to be called _only_ by `deconstruct_or_join`. fn _deconstruct_or_join(&self, schema: &Schema, or_join: OrJoin) -> DeconstructedOrJoin { // Preconditions enforced by `deconstruct_or_join`. + // Note that a fully unified explicit `or-join` can arrive here, and might leave as + // an implicit `or`. assert!(or_join.is_fully_unified()); assert!(or_join.clauses.len() >= 2); @@ -193,7 +211,7 @@ impl ConjoiningClauses { let mut empty_because: Option = None; // Walk each clause in turn, bailing as soon as we know this can't be simple. - let (join_clauses, mentioned_vars) = or_join.dismember(); + let (join_clauses, _unify_vars, mentioned_vars) = or_join.dismember(); let mut clauses = join_clauses.into_iter(); while let Some(clause) = clauses.next() { // If we fail half-way through processing, we want to reconstitute the input. @@ -306,9 +324,9 @@ impl ConjoiningClauses { // using a single table alias. self.apply_simple_or_join(schema, patterns, mentioned_vars) }, - DeconstructedOrJoin::Complex(_) => { - // Do this the hard way. TODO - unimplemented!(); + DeconstructedOrJoin::Complex(or_join) => { + // Do this the hard way. + self.apply_complex_or_join(schema, or_join) }, } } @@ -341,7 +359,11 @@ impl ConjoiningClauses { /// OR (datoms00.a = 98 AND datoms00.v = 'Peter') /// ``` /// - fn apply_simple_or_join(&mut self, schema: &Schema, patterns: Vec, mentioned_vars: BTreeSet) -> Result<()> { + fn apply_simple_or_join(&mut self, + schema: &Schema, + patterns: Vec, + mentioned_vars: BTreeSet) + -> Result<()> { if self.is_known_empty() { return Ok(()) } @@ -481,6 +503,175 @@ impl ConjoiningClauses { self.narrow_types(cc.known_types); Ok(()) } + + /// Apply a provided `or` or `or-join` to this `ConjoiningClauses`. If you're calling this + /// rather than another `or`-applier, it's assumed that the contents of the `or` are relatively + /// complex: perhaps its arms consist of more than just patterns, or perhaps each arm includes + /// different variables in different places. + /// + /// Step one (not yet implemented): any clauses that are standalone patterns might differ only + /// in attribute. In that case, we can treat them as a 'simple or' -- a single pattern with a + /// WHERE clause that alternates on the attribute. Pull those out first. + /// + /// Step two: for each cluster of patterns, and for each `and`, recursively build a CC and + /// simple projection. The projection must be the same for each CC, because we will concatenate + /// these with a `UNION`. This is one reason why we require each pattern in the `or` to unify + /// the same variables! + /// + /// Finally, we alias this entire UNION block as a FROM; it can be stitched into the outer query + /// by looking at the projection. + /// + /// For example, + /// + /// ```edn + /// [:find ?page :in $ ?string :where + /// (or [?page :page/title ?string] + /// [?page :page/excerpt ?string] + /// (and [?save :save/string ?string] + /// [?page :page/save ?save]))] + /// ```edn + /// + /// would expand to something like + /// + /// ```sql + /// SELECT or123.page AS page FROM + /// (SELECT datoms124.e AS page FROM datoms AS datoms124 + /// WHERE datoms124.v = ? AND + /// (datoms124.a = :page/title OR + /// datoms124.a = :page/excerpt) + /// UNION + /// SELECT datoms126.e AS page FROM datoms AS datoms125, datoms AS datoms126 + /// WHERE datoms125.a = :save/string AND + /// datoms125.v = ? AND + /// datoms126.v = datoms125.e AND + /// datoms126.a = :page/save) + /// AS or123 + /// ``` + /// + /// Note that a top-level standalone `or` doesn't really need to be aliased, but + /// it shouldn't do any harm. + fn apply_complex_or_join(&mut self, schema: &Schema, or_join: OrJoin) -> Result<()> { + // N.B., a solitary pattern here *cannot* be simply applied to the enclosing CC. We don't + // want to join all the vars, and indeed if it were safe to do so, we wouldn't have ended up + // in this function! + let (join_clauses, unify_vars, mentioned_vars) = or_join.dismember(); + let projected = match unify_vars { + UnifyVars::Implicit => mentioned_vars.into_iter().collect(), + UnifyVars::Explicit(vs) => vs.into_iter().collect(), + }; + + let template = self.use_as_template(&projected); + + let mut acc = Vec::with_capacity(join_clauses.len()); + let mut empty_because: Option = None; + + for clause in join_clauses.into_iter() { + let mut receptacle = template.make_receptacle(); + match clause { + OrWhereClause::And(clauses) => { + for clause in clauses { + receptacle.apply_clause(&schema, clause)?; + } + }, + OrWhereClause::Clause(clause) => { + receptacle.apply_clause(&schema, clause)?; + }, + } + if receptacle.is_known_empty() { + empty_because = receptacle.empty_because; + } else { + receptacle.expand_column_bindings(); + receptacle.prune_extracted_types(); + acc.push(receptacle); + } + } + + if acc.is_empty() { + self.mark_known_empty(empty_because.expect("empty for a reason")); + return Ok(()); + } + + // TODO: optimize the case of a single element in `acc`? + + // Now `acc` contains a sequence of CCs that were all prepared with the same types, + // each ready to project the same variables. + // At this point we can lift out any common type information (and even constraints) to the + // destination CC. + // We must also contribute type extraction information for any variables that aren't + // concretely typed for all union arms. + // + // We walk the list of variables to unify -- which will become our projection + // list -- to find out its type info in each CC. We might: + // + // 1. Know the type concretely from the enclosing CC. Don't project a type tag from the + // union. Example: + // ``` + // [:find ?x ?y + // :where [?x :foo/int ?y] + // (or [(< ?y 10)] + // [_ :foo/verified ?y])] + // ``` + // 2. Not know the type, but every CC bound it to the same single type. Don't project a type + // tag; we simply contribute the single type to the enclosing CC. Example: + // ``` + // [:find ?x ?y + // :where (or [?x :foo/length ?y] + // [?x :foo/width ?y])] + // ``` + // 3. (a) Have every CC come up with a non-unit type set for the var. Every CC will project + // a type tag column from one of its internal bindings, and the union will project it + // onwards. Example: + // ``` + // [:find ?x ?y ?z + // :where [?x :foo/knows ?y] + // (or [?x _ ?z] + // [?y _ ?z])] + // ``` + // 3. (b) Have some or all CCs come up with a unit type set. Every CC will project a type + // tag column, and those with a unit type set will project a fixed constant value. + // Again, the union will pass this on. + // ``` + // [:find ?x ?y + // :where (or [?x :foo/length ?y] + // [?x _ ?y])] + // ``` + let projection: BTreeSet = projected.into_iter().collect(); + let mut type_needed: BTreeSet = BTreeSet::default(); + + // For any variable which has an imprecise type anywhere in the UNION, add it to the + // set that needs type extraction. All UNION arms must project the same columns. + for var in projection.iter() { + if acc.iter().any(|cc| !cc.known_type(var).is_some()) { + type_needed.insert(var.clone()); + } + } + + // Hang on to these so we can stuff them in our column bindings. + let var_associations: Vec; + let type_associations: Vec; + { + var_associations = projection.iter().cloned().collect(); + type_associations = type_needed.iter().cloned().collect(); + } + + let union = ComputedTable::Union { + projection: projection, + type_extraction: type_needed, + arms: acc, + }; + let table = self.computed_tables.push_computed(union); + let alias = self.next_alias_for_table(table); + + // Stitch the computed table into column_bindings, so we get cross-linking. + for var in var_associations.into_iter() { + self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(var.clone()), var); + } + for var in type_associations.into_iter() { + self.extracted_types.insert(var.clone(), QualifiedAlias::new(alias.clone(), VariableColumn::VariableTypeTag(var))); + } + self.from.push(SourceAlias(table, alias)); + Ok(()) + } } #[cfg(test)] @@ -519,13 +710,23 @@ mod testing { SourceAlias, }; - use algebrize; + use { + algebrize, + algebrize_with_counter, + }; fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { let parsed = parse_find_string(input).expect("parse failed"); algebrize(schema.into(), parsed).expect("algebrize failed").cc } + /// Algebrize with a starting counter, so we can compare inner queries by algebrizing a + /// simpler version. + fn alg_c(schema: &Schema, counter: usize, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("parse failed"); + algebrize_with_counter(schema.into(), parsed, counter).expect("algebrize failed").cc + } + fn compare_ccs(left: ConjoiningClauses, right: ConjoiningClauses) { assert_eq!(left.wheres, right.wheres); assert_eq!(left.from, right.from); @@ -605,9 +806,9 @@ mod testing { let cc = alg(&schema, query); let vx = Variable::from_valid_name("?x"); let d0 = "datoms00".to_string(); - let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); - let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); - let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); let knows = QueryValue::Entid(66); let parent = QueryValue::Entid(67); let john = QueryValue::TypedValue(TypedValue::typed_string("John")); @@ -647,11 +848,11 @@ mod testing { let vx = Variable::from_valid_name("?x"); let d0 = "datoms00".to_string(); let d1 = "datoms01".to_string(); - let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); - let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); - let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); - let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); - let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value); let name = QueryValue::Entid(65); let knows = QueryValue::Entid(66); let parent = QueryValue::Entid(67); @@ -697,12 +898,12 @@ mod testing { let vx = Variable::from_valid_name("?x"); let d0 = "datoms00".to_string(); let d1 = "datoms01".to_string(); - let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); - let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); - let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); - let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); - let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); - let d1v = QualifiedAlias(d1.clone(), DatomsColumn::Value); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); + let d1e = QualifiedAlias::new(d1.clone(), DatomsColumn::Entity); + let d1a = QualifiedAlias::new(d1.clone(), DatomsColumn::Attribute); + let d1v = QualifiedAlias::new(d1.clone(), DatomsColumn::Value); let knows = QueryValue::Entid(66); let age = QueryValue::Entid(68); let john = QueryValue::TypedValue(TypedValue::typed_string("John")); @@ -737,7 +938,6 @@ mod testing { // [:find ?x :where [?x :foo/bar ?y] (or-join [?x] [?x :foo/baz ?y])] // [:find ?x :where [?x :foo/bar ?y] [?x :foo/baz ?y]] #[test] - #[should_panic(expected = "not yet implemented")] fn test_unit_or_join_doesnt_flatten() { let schema = prepopulated_schema(); let query = r#"[:find ?x @@ -747,29 +947,26 @@ mod testing { let vx = Variable::from_valid_name("?x"); let vy = Variable::from_valid_name("?y"); let d0 = "datoms00".to_string(); - let d1 = "datoms01".to_string(); - let d0e = QualifiedAlias(d0.clone(), DatomsColumn::Entity); - let d0a = QualifiedAlias(d0.clone(), DatomsColumn::Attribute); - let d0v = QualifiedAlias(d0.clone(), DatomsColumn::Value); - let d1e = QualifiedAlias(d1.clone(), DatomsColumn::Entity); - let d1a = QualifiedAlias(d1.clone(), DatomsColumn::Attribute); + let c0 = "c00".to_string(); + let c0x = QualifiedAlias::new(c0.clone(), VariableColumn::Variable(vx.clone())); + let d0e = QualifiedAlias::new(d0.clone(), DatomsColumn::Entity); + let d0a = QualifiedAlias::new(d0.clone(), DatomsColumn::Attribute); + let d0v = QualifiedAlias::new(d0.clone(), DatomsColumn::Value); let knows = QueryValue::Entid(66); - let parent = QueryValue::Entid(67); assert!(!cc.is_known_empty()); assert_eq!(cc.wheres, ColumnIntersection(vec![ ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0a.clone(), knows.clone())), - ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d1a.clone(), parent.clone())), // The outer pattern joins against the `or` on the entity, but not value -- ?y means // different things in each place. - ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(d1e.clone()))), + ColumnConstraintOrAlternation::Constraint(ColumnConstraint::Equals(d0e.clone(), QueryValue::Column(c0x.clone()))), ])); - assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, d1e])); + assert_eq!(cc.column_bindings.get(&vx), Some(&vec![d0e, c0x])); // ?y does not have a binding in the `or-join` pattern. assert_eq!(cc.column_bindings.get(&vy), Some(&vec![d0v])); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, d0), - SourceAlias(DatomsTable::Datoms, d1)]); + SourceAlias(DatomsTable::Computed(0), c0)]); } // These two are equivalent: @@ -810,8 +1007,6 @@ mod testing { /// Strictly speaking this can be implemented with a `NOT EXISTS` clause for the second pattern, /// but that would be a fair amount of analysis work, I think. #[test] - #[should_panic(expected = "not yet implemented")] - #[allow(dead_code, unused_variables)] fn test_alternation_with_and() { let schema = prepopulated_schema(); let query = r#" @@ -820,6 +1015,34 @@ mod testing { [?x :foo/parent "Ámbar"]) [?x :foo/knows "Daphne"])]"#; let cc = alg(&schema, query); + let mut tables = cc.computed_tables.into_iter(); + match (tables.next(), tables.next()) { + (Some(ComputedTable::Union { projection, type_extraction, arms }), None) => { + assert_eq!(projection, vec![Variable::from_valid_name("?x")].into_iter().collect()); + assert!(type_extraction.is_empty()); + + let mut arms = arms.into_iter(); + match (arms.next(), arms.next(), arms.next()) { + (Some(and), Some(pattern), None) => { + let expected_and = alg_c(&schema, + 0, // The first pattern to be processed. + r#"[:find ?x :where [?x :foo/knows "John"] [?x :foo/parent "Ámbar"]]"#); + compare_ccs(and, expected_and); + + let expected_pattern = alg_c(&schema, + 2, // Two aliases taken by the other arm. + r#"[:find ?x :where [?x :foo/knows "Daphne"]]"#); + compare_ccs(pattern, expected_pattern); + }, + _ => { + panic!("Expected two arms"); + } + } + }, + _ => { + panic!("Didn't get two inner tables."); + }, + } } #[test] diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index 6aadfa52..99afdbf0 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -295,6 +295,7 @@ mod testing { }; use types::{ + Column, ColumnConstraint, DatomsTable, QualifiedAlias, @@ -365,9 +366,9 @@ mod testing { // println!("{:#?}", cc); - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute); + let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value); // After this, we know a lot of things: assert!(!cc.is_known_empty()); @@ -405,8 +406,8 @@ mod testing { // println!("{:#?}", cc); - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity); + let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value); assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); @@ -454,8 +455,8 @@ mod testing { // println!("{:#?}", cc); - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); + let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute); assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::Datoms, "datoms00".to_string())]); @@ -496,7 +497,7 @@ mod testing { }); assert!(cc.is_known_empty()); - assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(DatomsColumn::Attribute, hello)); + assert_eq!(cc.empty_because.unwrap(), EmptyBecause::InvalidBinding(Column::Fixed(DatomsColumn::Attribute), hello)); } @@ -519,7 +520,7 @@ mod testing { // println!("{:#?}", cc); - let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); + let d0_e = QualifiedAlias::new("all_datoms00".to_string(), DatomsColumn::Entity); assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); @@ -549,8 +550,8 @@ mod testing { // println!("{:#?}", cc); - let d0_e = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Entity); - let d0_v = QualifiedAlias("all_datoms00".to_string(), DatomsColumn::Value); + let d0_e = QualifiedAlias::new("all_datoms00".to_string(), DatomsColumn::Entity); + let d0_v = QualifiedAlias::new("all_datoms00".to_string(), DatomsColumn::Value); assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![SourceAlias(DatomsTable::AllDatoms, "all_datoms00".to_string())]); @@ -609,11 +610,11 @@ mod testing { println!("{:#?}", cc); - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); - let d1_e = QualifiedAlias("datoms01".to_string(), DatomsColumn::Entity); - let d1_a = QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute); + let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute); + let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value); + let d1_e = QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Entity); + let d1_a = QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Attribute); assert!(!cc.is_known_empty()); assert_eq!(cc.from, vec![ @@ -669,9 +670,9 @@ mod testing { tx: PatternNonValuePlace::Placeholder, }); - let d0_e = QualifiedAlias("datoms00".to_string(), DatomsColumn::Entity); - let d0_a = QualifiedAlias("datoms00".to_string(), DatomsColumn::Attribute); - let d0_v = QualifiedAlias("datoms00".to_string(), DatomsColumn::Value); + let d0_e = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Entity); + let d0_a = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Attribute); + let d0_v = QualifiedAlias::new("datoms00".to_string(), DatomsColumn::Value); // ?y has been expanded into `true`. assert_eq!(cc.wheres, vec![ diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 8017de5e..cac16009 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -19,11 +19,12 @@ mod types; mod validate; mod clauses; - use mentat_core::{ Schema, }; +use mentat_core::counter::RcCounter; + use mentat_query::{ FindQuery, FindSpec, @@ -69,11 +70,20 @@ impl AlgebraicQuery { } } -#[allow(dead_code)] +pub fn algebrize_with_counter(schema: &Schema, parsed: FindQuery, counter: usize) -> Result { + let alias_counter = RcCounter::with_initial(counter); + let cc = clauses::ConjoiningClauses::with_alias_counter(alias_counter); + algebrize_with_cc(schema, parsed, cc) +} + pub fn algebrize(schema: &Schema, parsed: FindQuery) -> Result { + algebrize_with_cc(schema, parsed, clauses::ConjoiningClauses::default()) +} + +#[allow(dead_code)] +pub fn algebrize_with_cc(schema: &Schema, parsed: FindQuery, mut cc: ConjoiningClauses) -> Result { // TODO: integrate default source into pattern processing. // TODO: flesh out the rest of find-into-context. - let mut cc = clauses::ConjoiningClauses::default(); let where_clauses = parsed.where_clauses; for where_clause in where_clauses { cc.apply_clause(schema, where_clause)?; @@ -96,15 +106,19 @@ pub use clauses::{ }; pub use types::{ + Column, ColumnAlternation, ColumnConstraint, ColumnConstraintOrAlternation, ColumnIntersection, + ColumnName, + ComputedTable, DatomsColumn, DatomsTable, QualifiedAlias, QueryValue, SourceAlias, TableAlias, + VariableColumn, }; diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 8d98ddae..27a93faf 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -8,6 +8,7 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +use std::collections::BTreeSet; use std::collections::HashSet; use std::fmt::{ @@ -28,13 +29,24 @@ use mentat_query::{ }; /// This enum models the fixed set of default tables we have -- two -/// tables and two views. +/// tables and two views -- and computed tables defined in the enclosing CC. #[derive(PartialEq, Eq, Clone, Copy, Debug)] pub enum DatomsTable { Datoms, // The non-fulltext datoms table. FulltextValues, // The virtual table mapping IDs to strings. FulltextDatoms, // The fulltext-datoms view. AllDatoms, // Fulltext and non-fulltext datoms. + Computed(usize), // A computed table, tracked elsewhere in the query. +} + +/// A source of rows that isn't a named table -- typically a subquery or union. +pub enum ComputedTable { + // Subquery(BTreeSet, ::clauses::ConjoiningClauses), + Union { + projection: BTreeSet, + type_extraction: BTreeSet, + arms: Vec<::clauses::ConjoiningClauses>, + }, } impl DatomsTable { @@ -44,12 +56,17 @@ impl DatomsTable { DatomsTable::FulltextValues => "fulltext_values", DatomsTable::FulltextDatoms => "fulltext_datoms", DatomsTable::AllDatoms => "all_datoms", + DatomsTable::Computed(_) => "c", } } } +pub trait ColumnName { + fn column_name(&self) -> String; +} + /// One of the named columns of our tables. -#[derive(PartialEq, Eq, Clone, Debug)] +#[derive(PartialEq, Eq, Clone)] pub enum DatomsColumn { Entity, Attribute, @@ -58,6 +75,30 @@ pub enum DatomsColumn { ValueTypeTag, } +#[derive(PartialEq, Eq, Clone)] +pub enum VariableColumn { + Variable(Variable), + VariableTypeTag(Variable), +} + +#[derive(PartialEq, Eq, Clone)] +pub enum Column { + Fixed(DatomsColumn), + Variable(VariableColumn), +} + +impl From for Column { + fn from(from: DatomsColumn) -> Column { + Column::Fixed(from) + } +} + +impl From for Column { + fn from(from: VariableColumn) -> Column { + Column::Variable(from) + } +} + impl DatomsColumn { pub fn as_str(&self) -> &'static str { use self::DatomsColumn::*; @@ -71,6 +112,46 @@ impl DatomsColumn { } } +impl ColumnName for DatomsColumn { + fn column_name(&self) -> String { + self.as_str().to_string() + } +} + +impl ColumnName for VariableColumn { + fn column_name(&self) -> String { + match self { + &VariableColumn::Variable(ref v) => v.to_string(), + &VariableColumn::VariableTypeTag(ref v) => format!("{}_value_type_tag", v.as_str()), + } + } +} + +impl Debug for VariableColumn { + fn fmt(&self, f: &mut Formatter) -> Result { + match self { + // These should agree with VariableColumn::column_name. + &VariableColumn::Variable(ref v) => write!(f, "{}", v.as_str()), + &VariableColumn::VariableTypeTag(ref v) => write!(f, "{}_value_type_tag", v.as_str()), + } + } +} + +impl Debug for DatomsColumn { + fn fmt(&self, f: &mut Formatter) -> Result { + write!(f, "{}", self.as_str()) + } +} + +impl Debug for Column { + fn fmt(&self, f: &mut Formatter) -> Result { + match self { + &Column::Fixed(ref c) => c.fmt(f), + &Column::Variable(ref v) => v.fmt(f), + } + } +} + /// A specific instance of a table within a query. E.g., "datoms123". pub type TableAlias = String; @@ -86,17 +167,22 @@ impl Debug for SourceAlias { /// A particular column of a particular aliased table. E.g., "datoms123", Attribute. #[derive(PartialEq, Eq, Clone)] -pub struct QualifiedAlias(pub TableAlias, pub DatomsColumn); +pub struct QualifiedAlias(pub TableAlias, pub Column); impl Debug for QualifiedAlias { fn fmt(&self, f: &mut Formatter) -> Result { - write!(f, "{}.{}", self.0, self.1.as_str()) + write!(f, "{}.{:?}", self.0, self.1) } } impl QualifiedAlias { + pub fn new>(table: TableAlias, column: C) -> Self { + QualifiedAlias(table, column.into()) + } + pub fn for_type_tag(&self) -> QualifiedAlias { - QualifiedAlias(self.0.clone(), DatomsColumn::ValueTypeTag) + // TODO: this only makes sense for `DatomsColumn` tables. + QualifiedAlias(self.0.clone(), Column::Fixed(DatomsColumn::ValueTypeTag)) } } @@ -319,7 +405,7 @@ pub enum EmptyBecause { UnresolvedIdent(NamespacedKeyword), InvalidAttributeIdent(NamespacedKeyword), InvalidAttributeEntid(Entid), - InvalidBinding(DatomsColumn, TypedValue), + InvalidBinding(Column, TypedValue), ValueTypeMismatch(ValueType, TypedValue), AttributeLookupFailed, // Catch-all, because the table lookup code is lazy. TODO } diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index f1d94596..7966b32b 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -37,33 +37,18 @@ use mentat_db::{ use mentat_query::{ Element, FindSpec, - Variable, }; use mentat_query_algebrizer::{ AlgebraicQuery, - DatomsColumn, - QualifiedAlias, - /* - ConjoiningClauses, - DatomsTable, - SourceAlias, - */ + ColumnName, + VariableColumn, }; use mentat_query_sql::{ ColumnOrExpression, - /* - Constraint, - FromClause, - */ - Name, Projection, ProjectedColumn, - /* - SelectQuery, - TableList, - */ }; error_chain! { @@ -169,14 +154,6 @@ impl TypedIndex { } } -fn column_name(var: &Variable) -> Name { - var.to_string() -} - -fn value_type_tag_name(var: &Variable) -> Name { - format!("{}_value_type_tag", var.as_str()) -} - /// Walk an iterator of `Element`s, collecting projector templates and columns. /// /// Returns a pair: the SQL projection (which should always be a `Projection::Columns`) @@ -213,7 +190,7 @@ fn project_elements<'a, I: IntoIterator>( .expect("Every variable has a binding"); let qa = columns[0].clone(); - let name = column_name(var); + let name = VariableColumn::Variable(var.clone()).column_name(); if let Some(t) = query.cc.known_type(var) { cols.push(ProjectedColumn(ColumnOrExpression::Column(qa), name)); @@ -221,15 +198,17 @@ fn project_elements<'a, I: IntoIterator>( templates.push(TypedIndex::Known(i, tag)); i += 1; // We used one SQL column. } else { - let table = qa.0.clone(); cols.push(ProjectedColumn(ColumnOrExpression::Column(qa), name)); templates.push(TypedIndex::Unknown(i, i + 1)); i += 2; // We used two SQL columns. // Also project the type from the SQL query. - let type_name = value_type_tag_name(var); - let type_qa = QualifiedAlias(table, DatomsColumn::ValueTypeTag); - cols.push(ProjectedColumn(ColumnOrExpression::Column(type_qa), type_name)); + let extracted_alias = query.cc + .extracted_types + .get(var) + .expect("Every variable has a known type or an extracted type"); + let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name(); + cols.push(ProjectedColumn(ColumnOrExpression::Column(extracted_alias.clone()), type_name)); } } } diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 1d3d7869..0a44a6f3 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -19,10 +19,12 @@ use mentat_core::{ }; use mentat_query_algebrizer::{ - DatomsColumn, + Column, QualifiedAlias, QueryValue, SourceAlias, + TableAlias, + VariableColumn, }; use mentat_sql::{ @@ -117,7 +119,7 @@ enum JoinOp { } // Short-hand for a list of tables all inner-joined. -pub struct TableList(pub Vec); +pub struct TableList(pub Vec); impl TableList { fn is_empty(&self) -> bool { @@ -133,8 +135,9 @@ pub struct Join { } #[allow(dead_code)] -enum TableOrSubquery { +pub enum TableOrSubquery { Table(SourceAlias), + Union(Vec, TableAlias), // TODO: Subquery. } @@ -152,9 +155,23 @@ pub struct SelectQuery { pub limit: Option, } -// We know that DatomsColumns are safe to serialize. -fn push_column(qb: &mut QueryBuilder, col: &DatomsColumn) { - qb.push_sql(col.as_str()); +fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult { + match col { + &Column::Fixed(ref d) => { + qb.push_sql(d.as_str()); + Ok(()) + }, + &Column::Variable(ref vc) => { + match vc { + &VariableColumn::Variable(ref v) => { + qb.push_identifier(v.as_str()) + }, + &VariableColumn::VariableTypeTag(ref v) => { + qb.push_identifier(format!("{}_value_type_tag", v.name()).as_str()) + }, + } + }, + } } //--------------------------------------------------------- @@ -196,8 +213,7 @@ impl QueryFragment for ColumnOrExpression { &Column(QualifiedAlias(ref table, ref column)) => { out.push_identifier(table.as_str())?; out.push_sql("."); - push_column(out, column); - Ok(()) + push_column(out, column) }, &Entid(entid) => { out.push_sql(entid.to_string().as_str()); @@ -324,8 +340,8 @@ impl QueryFragment for TableList { return Ok(()); } - interpose!(sa, self.0, - { source_alias_push_sql(out, sa)? }, + interpose!(t, self.0, + { t.push_sql(out)? }, { out.push_sql(", ") }); Ok(()) } @@ -343,7 +359,15 @@ impl QueryFragment for TableOrSubquery { fn push_sql(&self, out: &mut QueryBuilder) -> BuildQueryResult { use self::TableOrSubquery::*; match self { - &Table(ref sa) => source_alias_push_sql(out, sa) + &Table(ref sa) => source_alias_push_sql(out, sa), + &Union(ref subqueries, ref table_alias) => { + out.push_sql("("); + interpose!(subquery, subqueries, + { subquery.push_sql(out)? }, + { out.push_sql(" UNION ") }); + out.push_sql(") AS "); + out.push_identifier(table_alias.as_str()) + }, } } } @@ -406,7 +430,10 @@ impl SelectQuery { #[cfg(test)] mod tests { use super::*; - use mentat_query_algebrizer::DatomsTable; + use mentat_query_algebrizer::{ + DatomsColumn, + DatomsTable, + }; fn build_constraint(c: Constraint) -> String { let mut builder = SQLiteQueryBuilder::new(); @@ -418,19 +445,19 @@ mod tests { #[test] fn test_in_constraint() { let none = Constraint::In { - left: ColumnOrExpression::Column(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value)), + left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Value)), list: vec![], }; let one = Constraint::In { - left: ColumnOrExpression::Column(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value)), + left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Value)), list: vec![ ColumnOrExpression::Entid(123), ], }; let three = Constraint::In { - left: ColumnOrExpression::Column(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value)), + left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Value)), list: vec![ ColumnOrExpression::Entid(123), ColumnOrExpression::Entid(456), @@ -476,8 +503,8 @@ mod tests { let datoms01 = "datoms01".to_string(); let eq = Op("="); let source_aliases = vec![ - SourceAlias(DatomsTable::Datoms, datoms00.clone()), - SourceAlias(DatomsTable::Datoms, datoms01.clone()), + TableOrSubquery::Table(SourceAlias(DatomsTable::Datoms, datoms00.clone())), + TableOrSubquery::Table(SourceAlias(DatomsTable::Datoms, datoms01.clone())), ]; let mut query = SelectQuery { @@ -485,24 +512,24 @@ mod tests { projection: Projection::Columns( vec![ ProjectedColumn( - ColumnOrExpression::Column(QualifiedAlias(datoms00.clone(), DatomsColumn::Entity)), + ColumnOrExpression::Column(QualifiedAlias::new(datoms00.clone(), DatomsColumn::Entity)), "x".to_string()), ]), from: FromClause::TableList(TableList(source_aliases)), constraints: vec![ Constraint::Infix { op: eq.clone(), - left: ColumnOrExpression::Column(QualifiedAlias(datoms01.clone(), DatomsColumn::Value)), - right: ColumnOrExpression::Column(QualifiedAlias(datoms00.clone(), DatomsColumn::Value)), + left: ColumnOrExpression::Column(QualifiedAlias::new(datoms01.clone(), DatomsColumn::Value)), + right: ColumnOrExpression::Column(QualifiedAlias::new(datoms00.clone(), DatomsColumn::Value)), }, Constraint::Infix { op: eq.clone(), - left: ColumnOrExpression::Column(QualifiedAlias(datoms00.clone(), DatomsColumn::Attribute)), + left: ColumnOrExpression::Column(QualifiedAlias::new(datoms00.clone(), DatomsColumn::Attribute)), right: ColumnOrExpression::Entid(65537), }, Constraint::Infix { op: eq.clone(), - left: ColumnOrExpression::Column(QualifiedAlias(datoms01.clone(), DatomsColumn::Attribute)), + left: ColumnOrExpression::Column(QualifiedAlias::new(datoms01.clone(), DatomsColumn::Attribute)), right: ColumnOrExpression::Entid(65536), }, ], diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 4e72334a..b90d9083 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -20,10 +20,16 @@ use mentat_query_algebrizer::{ ColumnConstraint, ColumnConstraintOrAlternation, ColumnIntersection, + ColumnName, + ComputedTable, ConjoiningClauses, DatomsColumn, + DatomsTable, QualifiedAlias, QueryValue, + SourceAlias, + TableAlias, + VariableColumn, }; use mentat_query_projector::{ @@ -37,9 +43,11 @@ use mentat_query_sql::{ Constraint, FromClause, Op, + ProjectedColumn, Projection, SelectQuery, TableList, + TableOrSubquery, }; trait ToConstraint { @@ -136,7 +144,7 @@ impl ToConstraint for ColumnConstraint { }, HasType(table, value_type) => { - let column = QualifiedAlias(table, DatomsColumn::ValueTypeTag).to_column(); + let column = QualifiedAlias::new(table, DatomsColumn::ValueTypeTag).to_column(); Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag())) }, } @@ -148,6 +156,80 @@ pub struct ProjectedSelect{ pub projector: Box, } +// Nasty little hack to let us move out of indexed context. +struct ConsumableVec { + inner: Vec>, +} + +impl From> for ConsumableVec { + fn from(vec: Vec) -> ConsumableVec { + ConsumableVec { inner: vec.into_iter().map(|x| Some(x)).collect() } + } +} + +impl ConsumableVec { + fn take_dangerously(&mut self, i: usize) -> T { + ::std::mem::replace(&mut self.inner[i], None).expect("each value to only be fetched once") + } +} + +fn table_for_computed(computed: ComputedTable, alias: TableAlias) -> TableOrSubquery { + match computed { + ComputedTable::Union { + projection, type_extraction, arms, + } => { + // The projection list for each CC must have the same shape and the same names. + // The values we project might be fixed or they might be columns. + TableOrSubquery::Union( + arms.into_iter() + .map(|cc| { + // We're going to end up with the variables being projected and also some + // type tag columns. + let mut columns: Vec = Vec::with_capacity(projection.len() + type_extraction.len()); + + // For each variable, find out which column it maps to within this arm, and + // project it as the variable name. + // E.g., SELECT datoms03.v AS `?x`. + for var in projection.iter() { + let col = cc.column_bindings.get(&var).unwrap()[0].clone(); + let proj = ProjectedColumn(ColumnOrExpression::Column(col), var.to_string()); + columns.push(proj); + } + + // Similarly, project type tags if they're not known conclusively in the + // outer query. + for var in type_extraction.iter() { + let expression = + if let Some(known) = cc.known_type(var) { + // If we know the type for sure, just project the constant. + // SELECT datoms03.v AS `?x`, 10 AS `?x_value_type_tag` + ColumnOrExpression::Integer(known.value_type_tag()) + } else { + // Otherwise, we'll have an established type binding! This'll be + // either a datoms table or, recursively, a subquery. Project + // this: + // SELECT datoms03.v AS `?x`, + // datoms03.value_type_tag AS `?x_value_type_tag` + let extract = cc.extracted_types + .get(var) + .expect("Expected variable to have a known type or an extracted type"); + ColumnOrExpression::Column(extract.clone()) + }; + let type_column = VariableColumn::VariableTypeTag(var.clone()); + let proj = ProjectedColumn(expression, type_column.column_name()); + columns.push(proj); + } + + // Each arm simply turns into a subquery. + // The SQL translation will stuff "UNION" between each arm. + let projection = Projection::Columns(columns); + cc_to_select_query(projection, cc, false, None) + }).collect(), + alias) + }, + } +} + /// Returns a `SelectQuery` that queries for the provided `cc`. Note that this _always_ returns a /// query that runs SQL. The next level up the call stack can check for known-empty queries if /// needed. @@ -155,7 +237,28 @@ fn cc_to_select_query>>(projection: Projection, cc: Conjoini let from = if cc.from.is_empty() { FromClause::Nothing } else { - FromClause::TableList(TableList(cc.from)) + // Move these out of the CC. + let from = cc.from; + let mut computed: ConsumableVec<_> = cc.computed_tables.into(); + + // Why do we put computed tables directly into the `FROM` clause? The alternative is to use + // a CTE (`WITH`). They're typically equivalent, but some SQL systems (notably Postgres) + // treat CTEs as optimization barriers, so a `WITH` can be significantly slower. Given that + // this is easy enough to change later, we'll opt for using direct inclusion in `FROM`. + let tables = + from.into_iter().map(|source_alias| { + match source_alias { + SourceAlias(DatomsTable::Computed(i), alias) => { + let comp = computed.take_dangerously(i); + table_for_computed(comp, alias) + }, + _ => { + TableOrSubquery::Table(source_alias) + } + } + }); + + FromClause::TableList(TableList(tables.collect())) }; let limit = if cc.empty_because.is_some() { Some(0) } else { limit.into() }; diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index ca967cfa..02445727 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -259,3 +259,97 @@ fn test_simple_or_join() { assert_eq!(sql, "SELECT `datoms01`.v AS `?url`, `datoms02`.v AS `?description` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01`, `datoms` AS `datoms02` WHERE ((`datoms00`.a = 97 AND `datoms00`.v = $v0) OR (`datoms00`.a = 98 AND `datoms00`.v = $v1)) AND `datoms01`.a = 97 AND `datoms02`.a = 99 AND `datoms00`.e = `datoms01`.e AND `datoms00`.e = `datoms02`.e LIMIT 1"); assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), make_arg("$v1", "Foo")]); } + +#[test] +fn test_complex_or_join() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "save"), 95); + add_attribute(&mut schema, 95, Attribute { + value_type: ValueType::Ref, + ..Default::default() + }); + + associate_ident(&mut schema, NamespacedKeyword::new("save", "title"), 96); + associate_ident(&mut schema, NamespacedKeyword::new("page", "url"), 97); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + associate_ident(&mut schema, NamespacedKeyword::new("page", "description"), 99); + for x in 96..100 { + add_attribute(&mut schema, x, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + } + + let input = r#"[:find [?url ?description] + :where + (or-join [?page] + [?page :page/url "http://foo.com/"] + [?page :page/title "Foo"] + (and + [?page :page/save ?save] + [?save :save/title "Foo"])) + [?page :page/url ?url] + [?page :page/description ?description]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms04`.v AS `?url`, \ + `datoms05`.v AS `?description` \ + FROM (SELECT `datoms00`.e AS `?page` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.a = 97 \ + AND `datoms00`.v = $v0 \ + UNION \ + SELECT `datoms01`.e AS `?page` \ + FROM `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 98 \ + AND `datoms01`.v = $v1 \ + UNION \ + SELECT `datoms02`.e AS `?page` \ + FROM `datoms` AS `datoms02`, \ + `datoms` AS `datoms03` \ + WHERE `datoms02`.a = 95 \ + AND `datoms03`.a = 96 \ + AND `datoms03`.v = $v2 \ + AND `datoms02`.v = `datoms03`.e) AS `c00`, \ + `datoms` AS `datoms04`, \ + `datoms` AS `datoms05` \ + WHERE `datoms04`.a = 97 \ + AND `datoms05`.a = 99 \ + AND `c00`.`?page` = `datoms04`.e \ + AND `c00`.`?page` = `datoms05`.e \ + LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), + make_arg("$v1", "Foo"), + make_arg("$v2", "Foo")]); +} + + +#[test] +fn test_complex_or_join_type_projection() { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("page", "title"), 98); + add_attribute(&mut schema, 98, Attribute { + value_type: ValueType::String, + ..Default::default() + }); + + let input = r#"[:find [?y] + :where + (or + [6 :page/title ?y] + [5 _ ?y])]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `c00`.`?y` AS `?y`, \ + `c00`.`?y_value_type_tag` AS `?y_value_type_tag` \ + FROM (SELECT `datoms00`.v AS `?y`, \ + 10 AS `?y_value_type_tag` \ + FROM `datoms` AS `datoms00` \ + WHERE `datoms00`.e = 6 \ + AND `datoms00`.a = 98 \ + UNION \ + SELECT `all_datoms01`.v AS `?y`, \ + `all_datoms01`.value_type_tag AS `?y_value_type_tag` \ + FROM `all_datoms` AS `all_datoms01` \ + WHERE `all_datoms01`.e = 5) AS `c00` \ + LIMIT 1"); + assert_eq!(args, vec![]); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index 5c724c0f..e348030b 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -670,12 +670,12 @@ impl ContainsVariables for OrJoin { } impl OrJoin { - pub fn dismember(self) -> (Vec, BTreeSet) { + pub fn dismember(self) -> (Vec, UnifyVars, BTreeSet) { let vars = match self.mentioned_vars { Some(m) => m, None => self.collect_mentioned_variables(), }; - (self.clauses, vars) + (self.clauses, self.unify_vars, vars) } pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet {