WIP: pull

Refactoring: split up the projector crate. No other code changes.
Implement vocabulary-driven schema upgrades.
2018-04-03 15:21:02 -07:00 · 2018-04-03 15:21:02 -07:00 · 2018-04-03 15:21:02 -07:00 · 2018-04-03 15:04:25 -07:00
13 changed files with 1275 additions and 679 deletions
--- a/db/src/db.rs
+++ b/db/src/db.rs
@ -499,7 +499,7 @@ fn read_ident_map(conn: &rusqlite::Connection) -> Result<IdentMap> {
 fn read_attribute_map(conn: &rusqlite::Connection) -> Result<AttributeMap> {
    let entid_triples = read_materialized_view(conn, "schema")?;
    let mut attribute_map = AttributeMap::default();
-    metadata::update_attribute_map_from_entid_triples(&mut attribute_map, entid_triples)?;
+    metadata::update_attribute_map_from_entid_triples(&mut attribute_map, entid_triples, ::std::iter::empty())?;
    Ok(attribute_map)
 }

@ -1637,7 +1637,7 @@ mod tests {
        // Cannot retract a characteristic of an installed attribute.
        assert_transact!(conn,
                         "[[:db/retract 100 :db/cardinality :db.cardinality/many]]",
-                         Err("not yet implemented: Retracting metadata attribute assertions not yet implemented: retracted [e a] pairs [[100 8]]"));
+                         Err("bad schema assertion: Retracting 8 for 100 not permitted."));

        // Trying to install an attribute without a :db/ident is allowed.
        assert_transact!(conn, "[[:db/add 101 :db/valueType :db.type/long]
@ -1823,7 +1823,7 @@ mod tests {

        assert_transact!(conn,
                         "[[:db/retract 111 :db/fulltext true]]",
-                         Err("not yet implemented: Retracting metadata attribute assertions not yet implemented: retracted [e a] pairs [[111 12]]"));
+                         Err("bad schema assertion: Retracting 12 for 111 not permitted."));

        assert_transact!(conn,
                         "[[:db/add 222 :db/fulltext true]]",
--- a/db/src/metadata.rs
+++ b/db/src/metadata.rs
@ -27,8 +27,6 @@
 use std::collections::{BTreeMap, BTreeSet};
 use std::collections::btree_map::Entry;

-use itertools::Itertools; // For join().
-
 use add_retract_alter_set::{
    AddRetractAlterSet,
 };
@ -104,14 +102,66 @@ impl MetadataReport {
 /// contain install and alter markers.
 ///
 /// Returns a report summarizing the mutations that were applied.
-pub fn update_attribute_map_from_entid_triples<U>(attribute_map: &mut AttributeMap, assertions: U) -> Result<MetadataReport>
-    where U: IntoIterator<Item=(Entid, Entid, TypedValue)> {
+pub fn update_attribute_map_from_entid_triples<A, R>(attribute_map: &mut AttributeMap, assertions: A, retractions: R) -> Result<MetadataReport>
+    where A: IntoIterator<Item=(Entid, Entid, TypedValue)>,
+          R: IntoIterator<Item=(Entid, Entid, TypedValue)> {
+
+    fn attribute_builder_to_modify(attribute_id: Entid, existing: &AttributeMap) -> AttributeBuilder {
+        existing.get(&attribute_id)
+                .map(AttributeBuilder::to_modify_attribute)
+                .unwrap_or_else(AttributeBuilder::default)
+    }

    // Group mutations by impacted entid.
    let mut builders: BTreeMap<Entid, AttributeBuilder> = BTreeMap::new();

+    // For retractions, we start with an attribute builder that's pre-populated with the existing
+    // attribute values. That allows us to check existing values and unset them.
+    for (entid, attr, ref value) in retractions.into_iter() {
+        let builder = builders.entry(entid).or_insert_with(|| attribute_builder_to_modify(entid, attribute_map));
+        match attr {
+            // You can only retract :db/unique, :db/doc, :db/isComponent; all others
+            // must be altered instead of retracted, or are not allowed to change.
+            entids::DB_DOC => {
+                // Nothing to do here; we don't keep docstrings inside `Attribute`s.
+            },
+            entids::DB_IS_COMPONENT => {
+                match value {
+                    &TypedValue::Boolean(v) if builder.component == Some(v) => {
+                        builder.component(false);
+                    },
+                    v => {
+                        bail!(ErrorKind::BadSchemaAssertion(format!("Attempted to retract :db/isComponent with the wrong value {:?}.", v)));
+                    },
+                }
+            },
+            entids::DB_UNIQUE => {
+                match *value {
+                    TypedValue::Ref(u) => {
+                        match u {
+                            entids::DB_UNIQUE_VALUE if builder.unique == Some(Some(attribute::Unique::Value)) => {
+                                builder.non_unique();
+                            },
+                            entids::DB_UNIQUE_IDENTITY if builder.unique == Some(Some(attribute::Unique::Identity)) => {
+                                builder.non_unique();
+                            },
+                            v => {
+                                bail!(ErrorKind::BadSchemaAssertion(format!("Attempted to retract :db/unique with the wrong value {}.", v)));
+                            },
+                        }
+                    },
+                    _ => bail!(ErrorKind::BadSchemaAssertion(format!("Expected [:db/retract _ :db/unique :db.unique/_] but got [:db/retract {} :db/unique {:?}]", entid, value)))
+                }
+            },
+            _ => {
+                bail!(ErrorKind::BadSchemaAssertion(format!("Retracting {} for {} not permitted.", attr, entid)));
+            },
+        }
+    }
+
    for (entid, attr, ref value) in assertions.into_iter() {
-        let builder = builders.entry(entid).or_insert(AttributeBuilder::default());
+        // For assertions, we can start with an empty attribute builder.
+        let builder = builders.entry(entid).or_insert_with(Default::default);

        // TODO: improve error messages throughout.
        match attr {
@ -146,11 +196,6 @@ pub fn update_attribute_map_from_entid_triples<U>(attribute_map: &mut AttributeM

            entids::DB_UNIQUE => {
                match *value {
-                    // TODO: accept nil in some form.
-                    // TypedValue::Nil => {
-                    //     builder.unique_value(false);
-                    //     builder.unique_identity(false);
-                    // },
                    TypedValue::Ref(entids::DB_UNIQUE_VALUE) => { builder.unique(attribute::Unique::Value); },
                    TypedValue::Ref(entids::DB_UNIQUE_IDENTITY) => { builder.unique(attribute::Unique::Identity); },
                    _ => bail!(ErrorKind::BadSchemaAssertion(format!("Expected [... :db/unique :db.unique/value|:db.unique/identity] but got [... :db/unique {:?}]", value)))
@ -257,17 +302,14 @@ pub fn update_schema_from_entid_quadruples<U>(schema: &mut Schema, assertions: U
        attribute_set.witness((e, a), typed_value, added);
    }

-    // Datomic does not allow to retract attributes or idents.  For now, Mentat follows suit.
-    if !attribute_set.retracted.is_empty() {
-        bail!(ErrorKind::NotYetImplemented(format!("Retracting metadata attribute assertions not yet implemented: retracted [e a] pairs [{}]",
-                                                   attribute_set.retracted.keys().map(|&(e, a)| format!("[{} {}]", e, a)).join(", "))));
-    }
-
    // Collect triples.
+    let retracted_triples = attribute_set.retracted.into_iter().map(|((e, a), typed_value)| (e, a, typed_value));
    let asserted_triples = attribute_set.asserted.into_iter().map(|((e, a), typed_value)| (e, a, typed_value));
    let altered_triples = attribute_set.altered.into_iter().map(|((e, a), (_old_value, new_value))| (e, a, new_value));

-    let report = update_attribute_map_from_entid_triples(&mut schema.attribute_map, asserted_triples.chain(altered_triples))?;
+    let report = update_attribute_map_from_entid_triples(&mut schema.attribute_map,
+                                                         asserted_triples.chain(altered_triples),
+                                                         retracted_triples)?;

    let mut idents_altered: BTreeMap<Entid, IdentAlteration> = BTreeMap::new();

--- a/db/src/schema.rs
+++ b/db/src/schema.rs
@ -73,13 +73,13 @@ fn validate_attribute_map(entid_map: &EntidMap, attribute_map: &AttributeMap) ->
 #[derive(Clone,Debug,Default,Eq,Hash,Ord,PartialOrd,PartialEq)]
 pub struct AttributeBuilder {
    helpful: bool,
-    value_type: Option<ValueType>,
-    multival: Option<bool>,
-    unique: Option<Option<attribute::Unique>>,
-    index: Option<bool>,
-    fulltext: Option<bool>,
-    component: Option<bool>,
-    no_history: Option<bool>,
+    pub value_type: Option<ValueType>,
+    pub multival: Option<bool>,
+    pub unique: Option<Option<attribute::Unique>>,
+    pub index: Option<bool>,
+    pub fulltext: Option<bool>,
+    pub component: Option<bool>,
+    pub no_history: Option<bool>,
 }

 impl AttributeBuilder {
@ -92,6 +92,16 @@ impl AttributeBuilder {
        }
    }

+    /// Make a new AttributeBuilder from an existing Attribute. This is important to allow
+    /// retraction. Only attributes that we allow to change are duplicated here.
+    pub fn to_modify_attribute(attribute: &Attribute) -> Self {
+        let mut ab = AttributeBuilder::default();
+        ab.multival   = Some(attribute.multival);
+        ab.unique     = Some(attribute.unique);
+        ab.component  = Some(attribute.component);
+        ab
+    }
+
    pub fn value_type<'a>(&'a mut self, value_type: ValueType) -> &'a mut Self {
        self.value_type = Some(value_type);
        self
@ -102,6 +112,11 @@ impl AttributeBuilder {
        self
    }

+    pub fn non_unique<'a>(&'a mut self) -> &'a mut Self {
+        self.unique = Some(None);
+        self
+    }
+
    pub fn unique<'a>(&'a mut self, unique: attribute::Unique) -> &'a mut Self {
        if self.helpful && unique == attribute::Unique::Identity {
            self.index = Some(true);
@ -185,12 +200,19 @@ impl AttributeBuilder {
                mutations.push(AttributeAlteration::Cardinality);
            }
        }
+
        if let Some(ref unique) = self.unique {
            if *unique != attribute.unique {
                attribute.unique = unique.clone();
                mutations.push(AttributeAlteration::Unique);
            }
+        } else {
+            if attribute.unique != None {
+                attribute.unique = None;
+                mutations.push(AttributeAlteration::Unique);
+            }
        }
+
        if let Some(index) = self.index {
            if index != attribute.index {
                attribute.index = index;
@ -255,7 +277,10 @@ impl SchemaBuilding for Schema {
        }).collect();

        let mut schema = Schema::from_ident_map_and_attribute_map(ident_map, AttributeMap::default())?;
-        let metadata_report = metadata::update_attribute_map_from_entid_triples(&mut schema.attribute_map, entid_assertions?)?;
+        let metadata_report = metadata::update_attribute_map_from_entid_triples(&mut schema.attribute_map,
+                                                                                entid_assertions?,
+                                                                                // No retractions.
+                                                                                ::std::iter::empty())?;

        // Rebuild the component attributes list if necessary.
        if metadata_report.attributes_did_change() {
--- a/query-projector/src/aggregates.rs
+++ b/query-projector/src/aggregates.rs
@ -0,0 +1,216 @@
+// Copyright 2018 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+use mentat_core::{
+    ValueType,
+    ValueTypeSet,
+};
+
+use mentat_query::{
+    Aggregate,
+    QueryFunction,
+    Variable,
+};
+
+use mentat_query_algebrizer::{
+    ColumnName,
+    ConjoiningClauses,
+    VariableColumn,
+};
+
+use mentat_query_sql::{
+    ColumnOrExpression,
+    Expression,
+    Name,
+    ProjectedColumn,
+};
+
+use errors::{
+    ErrorKind,
+    Result,
+};
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum SimpleAggregationOp {
+    Avg,
+    Count,
+    Max,
+    Min,
+    Sum,
+}
+
+impl SimpleAggregationOp {
+    pub(crate) fn to_sql(&self) -> &'static str {
+        use self::SimpleAggregationOp::*;
+        match self {
+            &Avg => "avg",
+            &Count => "count",
+            &Max => "max",
+            &Min => "min",
+            &Sum => "sum",
+        }
+    }
+
+    fn for_function(function: &QueryFunction) -> Option<SimpleAggregationOp> {
+        match function.0.plain_name() {
+            "avg" => Some(SimpleAggregationOp::Avg),
+            "count" => Some(SimpleAggregationOp::Count),
+            "max" => Some(SimpleAggregationOp::Max),
+            "min" => Some(SimpleAggregationOp::Min),
+            "sum" => Some(SimpleAggregationOp::Sum),
+            _ => None,
+        }
+    }
+
+    /// With knowledge of the types to which a variable might be bound,
+    /// return a `Result` to determine whether this aggregation is suitable.
+    /// For example, it's valid to take the `Avg` of `{Double, Long}`, invalid
+    /// to take `Sum` of `{Instant}`, valid to take (lexicographic) `Max` of `{String}`,
+    /// but invalid to take `Max` of `{Uuid, String}`.
+    ///
+    /// The returned type is the type of the result of the aggregation.
+    pub(crate) fn is_applicable_to_types(&self, possibilities: ValueTypeSet) -> Result<ValueType> {
+        use self::SimpleAggregationOp::*;
+        if possibilities.is_empty() {
+            bail!(ErrorKind::CannotProjectImpossibleBinding(*self))
+        }
+
+        match self {
+            // One can always count results.
+            &Count => Ok(ValueType::Long),
+
+            // Only numeric types can be averaged or summed.
+            &Avg => {
+                if possibilities.is_only_numeric() {
+                    // The mean of a set of numeric values will always, for our purposes, be a double.
+                    Ok(ValueType::Double)
+                } else {
+                    bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
+                }
+            },
+            &Sum => {
+                if possibilities.is_only_numeric() {
+                    if possibilities.contains(ValueType::Double) {
+                        Ok(ValueType::Double)
+                    } else {
+                        // TODO: BigInt.
+                        Ok(ValueType::Long)
+                    }
+                } else {
+                    bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
+                }
+            },
+
+            &Max | &Min => {
+                if possibilities.is_unit() {
+                    use ValueType::*;
+                    let the_type = possibilities.exemplar().expect("a type");
+                    match the_type {
+                        // These types are numerically ordered.
+                        Double | Long | Instant => Ok(the_type),
+
+                        // Boolean: false < true.
+                        Boolean => Ok(the_type),
+
+                        // String: lexicographic order.
+                        String => Ok(the_type),
+
+                        // These types are unordered.
+                        Keyword | Ref | Uuid => {
+                            bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
+                        },
+                    }
+                } else {
+                    // It cannot be empty -- we checked.
+                    // The only types that are valid to compare cross-type are numbers.
+                    if possibilities.is_only_numeric() {
+                        // Note that if the max/min is a Long, it will be returned as a Double!
+                        if possibilities.contains(ValueType::Double) {
+                            Ok(ValueType::Double)
+                        } else {
+                            // TODO: BigInt.
+                            Ok(ValueType::Long)
+                        }
+                    } else {
+                        bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
+                    }
+                }
+            },
+        }
+    }
+}
+
+pub(crate) struct SimpleAggregate {
+    pub op: SimpleAggregationOp,
+    pub var: Variable,
+}
+
+impl SimpleAggregate {
+    pub(crate) fn column_name(&self) -> Name {
+        format!("({} {})", self.op.to_sql(), self.var.name())
+    }
+
+    pub(crate) fn use_static_value(&self) -> bool {
+        use self::SimpleAggregationOp::*;
+        match self.op {
+            Avg | Max | Min => true,
+            Count | Sum => false,
+        }
+    }
+}
+
+pub(crate) trait SimpleAggregation {
+    fn to_simple(&self) -> Option<SimpleAggregate>;
+}
+
+impl SimpleAggregation for Aggregate {
+    fn to_simple(&self) -> Option<SimpleAggregate> {
+        if self.args.len() != 1 {
+            return None;
+        }
+        self.args[0]
+            .as_variable()
+            .and_then(|v| SimpleAggregationOp::for_function(&self.func)
+                              .map(|op| SimpleAggregate { op, var: v.clone(), }))
+    }
+}
+
+/// Returns two values:
+/// - The `ColumnOrExpression` to use in the query. This will always refer to other
+///   variables by name; never to a datoms column.
+/// - The known type of that value.
+pub(crate) fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueType)> {
+    let known_types = cc.known_type_set(&simple.var);
+    let return_type = simple.op.is_applicable_to_types(known_types)?;
+    let projected_column_or_expression =
+        if let Some(value) = cc.bound_value(&simple.var) {
+            // Oh, we already know the value!
+            if simple.use_static_value() {
+                // We can statically compute the aggregate result for some operators -- not count or
+                // sum, but avg/max/min are OK.
+                ColumnOrExpression::Value(value)
+            } else {
+                let expression = Expression::Unary {
+                    sql_op: simple.op.to_sql(),
+                    arg: ColumnOrExpression::Value(value),
+                };
+                ColumnOrExpression::Expression(Box::new(expression), return_type)
+            }
+        } else {
+            // The common case: the values are bound during execution.
+            let name = VariableColumn::Variable(simple.var.clone()).column_name();
+            let expression = Expression::Unary {
+                sql_op: simple.op.to_sql(),
+                arg: ColumnOrExpression::ExistingColumn(name),
+            };
+            ColumnOrExpression::Expression(Box::new(expression), return_type)
+        };
+    Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type))
+}
--- a/query-projector/src/errors.rs
+++ b/query-projector/src/errors.rs
@ -0,0 +1,73 @@
+// Copyright 2018 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+use rusqlite;
+
+use mentat_core::{
+    ValueTypeSet,
+};
+
+use mentat_db;
+
+use mentat_query::{
+    PlainSymbol,
+};
+
+use aggregates::{
+    SimpleAggregationOp,
+};
+
+error_chain! {
+    types {
+        Error, ErrorKind, ResultExt, Result;
+    }
+
+    errors {
+        /// We're just not done yet.  Message that the feature is recognized but not yet
+        /// implemented.
+        NotYetImplemented(t: String) {
+            description("not yet implemented")
+            display("not yet implemented: {}", t)
+        }
+        CannotProjectImpossibleBinding(op: SimpleAggregationOp) {
+            description("no possible types for variable in projection list")
+            display("no possible types for value provided to {:?}", op)
+        }
+        CannotApplyAggregateOperationToTypes(op: SimpleAggregationOp, types: ValueTypeSet) {
+            description("cannot apply projection operation to types")
+            display("cannot apply projection operation {:?} to types {:?}", op, types)
+        }
+        UnboundVariable(var: PlainSymbol) {
+            description("cannot project unbound variable")
+            display("cannot project unbound variable {:?}", var)
+        }
+        NoTypeAvailableForVariable(var: PlainSymbol) {
+            description("cannot find type for variable")
+            display("cannot find type for variable {:?}", var)
+        }
+        UnexpectedResultsType(actual: &'static str, expected: &'static str) {
+            description("unexpected query results type")
+            display("expected {}, got {}", expected, actual)
+        }
+        AmbiguousAggregates(min_max_count: usize, corresponding_count: usize) {
+            description("ambiguous aggregates")
+            display("min/max expressions: {} (max 1), corresponding: {}", min_max_count, corresponding_count)
+        }
+    }
+
+    foreign_links {
+        Rusqlite(rusqlite::Error);
+    }
+
+    links {
+        DbError(mentat_db::Error, mentat_db::ErrorKind);
+    }
+}
+
--- a/query-projector/src/lib.rs
+++ b/query-projector/src/lib.rs
@ -28,21 +28,14 @@ use std::iter;

 use std::rc::Rc;

-use indexmap::{
-    IndexSet,
-};
-
 use rusqlite::{
    Row,
    Rows,
 };

 use mentat_core::{
-    SQLValueType,
-    SQLValueTypeSet,
    TypedValue,
    ValueType,
-    ValueTypeSet,
    ValueTypeTag,
 };

@ -55,79 +48,44 @@ use mentat_db::{
 };

 use mentat_query::{
-    Aggregate,
    Element,
    FindSpec,
    Limit,
-    PlainSymbol,
-    QueryFunction,
    Variable,
 };

 use mentat_query_algebrizer::{
    AlgebraicQuery,
-    ColumnName,
-    ConjoiningClauses,
-    QualifiedAlias,
    VariableBindings,
-    VariableColumn,
 };

 use mentat_query_sql::{
-    ColumnOrExpression,
-    Expression,
    GroupBy,
-    Name,
    Projection,
-    ProjectedColumn,
 };

-error_chain! {
-    types {
-        Error, ErrorKind, ResultExt, Result;
-    }
+mod aggregates;
+mod project;
+mod pull;
+pub mod errors;

-    errors {
-        /// We're just not done yet.  Message that the feature is recognized but not yet
-        /// implemented.
-        NotYetImplemented(t: String) {
-            description("not yet implemented")
-            display("not yet implemented: {}", t)
-        }
-        CannotProjectImpossibleBinding(op: SimpleAggregationOp) {
-            description("no possible types for variable in projection list")
-            display("no possible types for value provided to {:?}", op)
-        }
-        CannotApplyAggregateOperationToTypes(op: SimpleAggregationOp, types: ValueTypeSet) {
-            description("cannot apply projection operation to types")
-            display("cannot apply projection operation {:?} to types {:?}", op, types)
-        }
-        UnboundVariable(var: PlainSymbol) {
-            description("cannot project unbound variable")
-            display("cannot project unbound variable {:?}", var)
-        }
-        NoTypeAvailableForVariable(var: PlainSymbol) {
-            description("cannot find type for variable")
-            display("cannot find type for variable {:?}", var)
-        }
-        UnexpectedResultsType(actual: &'static str, expected: &'static str) {
-            description("unexpected query results type")
-            display("expected {}, got {}", expected, actual)
-        }
-        AmbiguousAggregates(min_max_count: usize, corresponding_count: usize) {
-            description("ambiguous aggregates")
-            display("min/max expressions: {} (max 1), corresponding: {}", min_max_count, corresponding_count)
-        }
-    }
+pub use aggregates::{
+    SimpleAggregationOp,
+};

-    foreign_links {
-        Rusqlite(rusqlite::Error);
-    }
+use project::{
+    ProjectedElements,
+    project_elements,
+};

-    links {
-        DbError(mentat_db::Error, mentat_db::ErrorKind);
-    }
-}
+pub use project::{
+    projected_column_for_var,
+};
+
+use errors::{
+    ErrorKind,
+    Result,
+};

 #[derive(Debug, PartialEq, Eq)]
 pub struct QueryOutput {
@ -353,525 +311,6 @@ impl TypedIndex {
    }
 }

-fn cc_column(cc: &ConjoiningClauses, var: &Variable) -> Result<QualifiedAlias> {
-    cc.column_bindings
-      .get(var)
-      .and_then(|cols| cols.get(0).cloned())
-      .ok_or_else(|| ErrorKind::UnboundVariable(var.name()).into())
-}
-
-fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> Result<(ColumnOrExpression, Name)> {
-    // Every variable should be bound by the top-level CC to at least
-    // one column in the query. If that constraint is violated it's a
-    // bug in our code, so it's appropriate to panic here.
-    cc_column(cc, var)
-        .map(|qa| {
-            let name = VariableColumn::Variable(var.clone()).column_name();
-            (ColumnOrExpression::Column(qa), name)
-        })
-}
-
-fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> Result<(ColumnOrExpression, Name)> {
-    cc.extracted_types
-      .get(var)
-      .cloned()
-      .map(|alias| {
-          let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name();
-          (ColumnOrExpression::Column(alias), type_name)
-      })
-      .ok_or_else(|| ErrorKind::UnboundVariable(var.name()).into())
-}
-
-/// Return the projected column -- that is, a value or SQL column and an associated name -- for a
-/// given variable. Also return the type.
-/// Callers are expected to determine whether to project a type tag as an additional SQL column.
-pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueTypeSet)> {
-    if let Some(value) = cc.bound_value(&var) {
-        // If we already know the value, then our lives are easy.
-        let tag = value.value_type();
-        let name = VariableColumn::Variable(var.clone()).column_name();
-        Ok((ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), ValueTypeSet::of_one(tag)))
-    } else {
-        // If we don't, then the CC *must* have bound the variable.
-        let (column, name) = candidate_column(cc, var)?;
-        Ok((ProjectedColumn(column, name), cc.known_type_set(var)))
-    }
-}
-
-/// Returns two values:
-/// - The `ColumnOrExpression` to use in the query. This will always refer to other
-///   variables by name; never to a datoms column.
-/// - The known type of that value.
-fn projected_column_for_simple_aggregate(simple: &SimpleAggregate, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueType)> {
-    let known_types = cc.known_type_set(&simple.var);
-    let return_type = simple.op.is_applicable_to_types(known_types)?;
-    let projected_column_or_expression =
-        if let Some(value) = cc.bound_value(&simple.var) {
-            // Oh, we already know the value!
-            if simple.use_static_value() {
-                // We can statically compute the aggregate result for some operators -- not count or
-                // sum, but avg/max/min are OK.
-                ColumnOrExpression::Value(value)
-            } else {
-                let expression = Expression::Unary {
-                    sql_op: simple.op.to_sql(),
-                    arg: ColumnOrExpression::Value(value),
-                };
-                ColumnOrExpression::Expression(Box::new(expression), return_type)
-            }
-        } else {
-            // The common case: the values are bound during execution.
-            let name = VariableColumn::Variable(simple.var.clone()).column_name();
-            let expression = Expression::Unary {
-                sql_op: simple.op.to_sql(),
-                arg: ColumnOrExpression::ExistingColumn(name),
-            };
-            ColumnOrExpression::Expression(Box::new(expression), return_type)
-        };
-    Ok((ProjectedColumn(projected_column_or_expression, simple.column_name()), return_type))
-}
-
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub enum SimpleAggregationOp {
-    Avg,
-    Count,
-    Max,
-    Min,
-    Sum,
-}
-
-impl SimpleAggregationOp {
-    fn to_sql(&self) -> &'static str {
-        use SimpleAggregationOp::*;
-        match self {
-            &Avg => "avg",
-            &Count => "count",
-            &Max => "max",
-            &Min => "min",
-            &Sum => "sum",
-        }
-    }
-
-    fn for_function(function: &QueryFunction) -> Option<SimpleAggregationOp> {
-        match function.0.plain_name() {
-            "avg" => Some(SimpleAggregationOp::Avg),
-            "count" => Some(SimpleAggregationOp::Count),
-            "max" => Some(SimpleAggregationOp::Max),
-            "min" => Some(SimpleAggregationOp::Min),
-            "sum" => Some(SimpleAggregationOp::Sum),
-            _ => None,
-        }
-    }
-
-    /// With knowledge of the types to which a variable might be bound,
-    /// return a `Result` to determine whether this aggregation is suitable.
-    /// For example, it's valid to take the `Avg` of `{Double, Long}`, invalid
-    /// to take `Sum` of `{Instant}`, valid to take (lexicographic) `Max` of `{String}`,
-    /// but invalid to take `Max` of `{Uuid, String}`.
-    ///
-    /// The returned type is the type of the result of the aggregation.
-    fn is_applicable_to_types(&self, possibilities: ValueTypeSet) -> Result<ValueType> {
-        use SimpleAggregationOp::*;
-        if possibilities.is_empty() {
-            bail!(ErrorKind::CannotProjectImpossibleBinding(*self))
-        }
-
-        match self {
-            // One can always count results.
-            &Count => Ok(ValueType::Long),
-
-            // Only numeric types can be averaged or summed.
-            &Avg => {
-                if possibilities.is_only_numeric() {
-                    // The mean of a set of numeric values will always, for our purposes, be a double.
-                    Ok(ValueType::Double)
-                } else {
-                    bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
-                }
-            },
-            &Sum => {
-                if possibilities.is_only_numeric() {
-                    if possibilities.contains(ValueType::Double) {
-                        Ok(ValueType::Double)
-                    } else {
-                        // TODO: BigInt.
-                        Ok(ValueType::Long)
-                    }
-                } else {
-                    bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
-                }
-            },
-
-            &Max | &Min => {
-                if possibilities.is_unit() {
-                    use ValueType::*;
-                    let the_type = possibilities.exemplar().expect("a type");
-                    match the_type {
-                        // These types are numerically ordered.
-                        Double | Long | Instant => Ok(the_type),
-
-                        // Boolean: false < true.
-                        Boolean => Ok(the_type),
-
-                        // String: lexicographic order.
-                        String => Ok(the_type),
-
-                        // These types are unordered.
-                        Keyword | Ref | Uuid => {
-                            bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
-                        },
-                    }
-                } else {
-                    // It cannot be empty -- we checked.
-                    // The only types that are valid to compare cross-type are numbers.
-                    if possibilities.is_only_numeric() {
-                        // Note that if the max/min is a Long, it will be returned as a Double!
-                        if possibilities.contains(ValueType::Double) {
-                            Ok(ValueType::Double)
-                        } else {
-                            // TODO: BigInt.
-                            Ok(ValueType::Long)
-                        }
-                    } else {
-                        bail!(ErrorKind::CannotApplyAggregateOperationToTypes(*self, possibilities))
-                    }
-                }
-            },
-        }
-    }
-}
-
-struct SimpleAggregate {
-    op: SimpleAggregationOp,
-    var: Variable,
-}
-
-impl SimpleAggregate {
-    fn column_name(&self) -> Name {
-        format!("({} {})", self.op.to_sql(), self.var.name())
-    }
-
-    fn use_static_value(&self) -> bool {
-        use SimpleAggregationOp::*;
-        match self.op {
-            Avg | Max | Min => true,
-            Count | Sum => false,
-        }
-    }
-}
-
-trait SimpleAggregation {
-    fn to_simple(&self) -> Option<SimpleAggregate>;
-}
-
-impl SimpleAggregation for Aggregate {
-    fn to_simple(&self) -> Option<SimpleAggregate> {
-        if self.args.len() != 1 {
-            return None;
-        }
-        self.args[0]
-            .as_variable()
-            .and_then(|v| SimpleAggregationOp::for_function(&self.func)
-                              .map(|op| SimpleAggregate { op, var: v.clone(), }))
-    }
-}
-
-/// An internal temporary struct to pass between the projection 'walk' and the
-/// resultant projector.
-/// Projection accumulates four things:
-/// - Two SQL projection lists. We need two because aggregate queries are nested
-///   in order to apply DISTINCT to values prior to aggregation.
-/// - A collection of templates for the projector to use to extract values.
-/// - A list of columns to use for grouping. Grouping is a property of the projection!
-struct ProjectedElements {
-    sql_projection: Projection,
-    pre_aggregate_projection: Option<Projection>,
-    templates: Vec<TypedIndex>,
-    group_by: Vec<GroupBy>,
-}
-
-/// Walk an iterator of `Element`s, collecting projector templates and columns.
-///
-/// Returns a `ProjectedElements`, which combines SQL projections
-/// and a `Vec` of `TypedIndex` 'keys' to use when looking up values.
-///
-/// Callers must ensure that every `Element` is distinct -- a query like
-///
-/// ```edn
-/// [:find ?x ?x :where [?x _ _]]
-/// ```
-///
-/// should fail to parse. See #358.
-fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
-    count: usize,
-    elements: I,
-    query: &AlgebraicQuery) -> Result<ProjectedElements> {
-
-    // Give a little padding for type tags.
-    let mut inner_projection = Vec::with_capacity(count + 2);
-
-    // Everything in the outer query will _either_ be an aggregate operation
-    // _or_ a reference to a name projected from the inner.
-    // We'll expand them later.
-    let mut outer_projection: Vec<Either<Name, ProjectedColumn>> = Vec::with_capacity(count + 2);
-
-    let mut i: i32 = 0;
-    let mut min_max_count: usize = 0;
-    let mut corresponding_count: usize = 0;
-    let mut templates = vec![];
-
-    let mut aggregates = false;
-
-    // Any variable that appears intact in the :find clause, not inside an aggregate expression.
-    // "Query variables not in aggregate expressions will group the results and appear intact
-    // in the result."
-    // We use an ordered set here so that we group in the correct order.
-    let mut outer_variables = IndexSet::new();
-
-    // Any variable that we are projecting from the inner query.
-    let mut inner_variables = BTreeSet::new();
-
-    for e in elements {
-        if let &Element::Corresponding(_) = e {
-            corresponding_count += 1;
-        }
-
-        match e {
-            // Each time we come across a variable, we push a SQL column
-            // into the SQL projection, aliased to the name of the variable,
-            // and we push an annotated index into the projector.
-            &Element::Variable(ref var) |
-            &Element::Corresponding(ref var) => {
-                if outer_variables.contains(var) {
-                    eprintln!("Warning: duplicate variable {} in query.", var);
-                }
-
-                // TODO: it's an error to have `[:find ?x (the ?x) …]`.
-                outer_variables.insert(var.clone());
-                inner_variables.insert(var.clone());
-
-                let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?;
-                outer_projection.push(Either::Left(projected_column.1.clone()));
-                inner_projection.push(projected_column);
-
-                if let Some(tag) = type_set.unique_type_tag() {
-                    templates.push(TypedIndex::Known(i, tag));
-                    i += 1;     // We used one SQL column.
-                } else {
-                    templates.push(TypedIndex::Unknown(i, i + 1));
-                    i += 2;     // We used two SQL columns.
-
-                    // Also project the type from the SQL query.
-                    let (type_column, type_name) = candidate_type_column(&query.cc, &var)?;
-                    inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
-                    outer_projection.push(Either::Left(type_name));
-                }
-            },
-            &Element::Aggregate(ref a) => {
-                if let Some(simple) = a.to_simple() {
-                    aggregates = true;
-
-                    use SimpleAggregationOp::*;
-                    match simple.op {
-                        Max | Min => {
-                            min_max_count += 1;
-                        },
-                        Avg | Count | Sum => (),
-                    }
-
-                    // When we encounter a simple aggregate -- one in which the aggregation can be
-                    // implemented in SQL, on a single variable -- we just push the SQL aggregation op.
-                    // We must ensure the following:
-                    // - There's a column for the var.
-                    // - The type of the var is known to be restricted to a sensible input set
-                    //   (not necessarily a single type, but e.g., all vals must be Double or Long).
-                    // - The type set must be appropriate for the operation. E.g., `Sum` is not a
-                    //   meaningful operation on instants.
-
-                    let (projected_column, return_type) = projected_column_for_simple_aggregate(&simple, &query.cc)?;
-                    outer_projection.push(Either::Right(projected_column));
-
-                    if !inner_variables.contains(&simple.var) {
-                        inner_variables.insert(simple.var.clone());
-                        let (projected_column, _type_set) = projected_column_for_var(&simple.var, &query.cc)?;
-                        inner_projection.push(projected_column);
-                        if query.cc.known_type_set(&simple.var).unique_type_tag().is_none() {
-                            // Also project the type from the SQL query.
-                            let (type_column, type_name) = candidate_type_column(&query.cc, &simple.var)?;
-                            inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
-                        }
-                    }
-
-                    // We might regret using the type tag here instead of the `ValueType`.
-                    templates.push(TypedIndex::Known(i, return_type.value_type_tag()));
-                    i += 1;
-                } else {
-                    // TODO: complex aggregates.
-                    bail!(ErrorKind::NotYetImplemented("complex aggregates".into()));
-                }
-            },
-        }
-    }
-
-    match (min_max_count, corresponding_count) {
-        (0, 0) | (_, 0) => {},
-        (0, _) => {
-            eprintln!("Warning: used `(the ?var)` without `min` or `max`.");
-        },
-        (1, _) => {
-            // This is the success case!
-        },
-        (n, c) => {
-            bail!(ErrorKind::AmbiguousAggregates(n, c));
-        },
-    }
-
-    // Anything used in ORDER BY (which we're given in `named_projection`)
-    // needs to be in the SQL column list so we can refer to it by name.
-    //
-    // They don't affect projection.
-    //
-    // If a variable is of a non-fixed type, also project the type tag column, so we don't
-    // accidentally unify across types when considering uniqueness!
-    for var in query.named_projection.iter() {
-        if outer_variables.contains(var) {
-            continue;
-        }
-
-        // If it's a fixed value, we need do nothing further.
-        if query.cc.is_value_bound(&var) {
-            continue;
-        }
-
-        let already_inner = inner_variables.contains(&var);
-        let (column, name) = candidate_column(&query.cc, &var)?;
-        if !already_inner {
-            inner_projection.push(ProjectedColumn(column, name.clone()));
-            inner_variables.insert(var.clone());
-        }
-
-        outer_projection.push(Either::Left(name));
-        outer_variables.insert(var.clone());
-
-        // We don't care if a column has a single _type_, we care if it has a single type _tag_,
-        // because that's what we'll use if we're projecting. E.g., Long and Double.
-        // Single type implies single type tag, and is cheaper, so we check that first.
-        let types = query.cc.known_type_set(&var);
-        if !types.has_unique_type_tag() {
-            let (type_column, type_name) = candidate_type_column(&query.cc, &var)?;
-            if !already_inner {
-                inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
-            }
-
-            outer_projection.push(Either::Left(type_name));
-        }
-    }
-
-    if !aggregates {
-        // We're done -- we never need to group unless we're aggregating.
-        return Ok(ProjectedElements {
-                      sql_projection: Projection::Columns(inner_projection),
-                      pre_aggregate_projection: None,
-                      templates,
-                      group_by: vec![],
-                  });
-    }
-
-    // OK, on to aggregates.
-    // We need to produce two SQL projection lists: one for an inner query and one for the outer.
-    //
-    // The inner serves these purposes:
-    // - Projecting variables to avoid duplicates being elided. (:with)
-    // - Making bindings available to the outermost query for projection, ordering, and grouping.
-    //
-    // The outer is consumed by the projector.
-    //
-    // We will also be producing:
-    // - A GROUP BY list to group the output of the inner query by non-aggregate variables
-    //   so that it can be correctly aggregated.
-
-    // Turn this collection of vars into a collection of columns from the query.
-    // We don't allow grouping on anything but a variable bound in the query.
-    // We group by tag if necessary.
-    let mut group_by = Vec::with_capacity(outer_variables.len() + 2);
-    for var in outer_variables.into_iter() {
-        if query.cc.is_value_bound(&var) {
-            continue;
-        }
-
-        // The GROUP BY goes outside, but it needs every variable and type tag to be
-        // projected from inside. Collect in both directions here.
-        let name = VariableColumn::Variable(var.clone()).column_name();
-        group_by.push(GroupBy::ProjectedColumn(name));
-
-        let needs_type_projection = !query.cc.known_type_set(&var).has_unique_type_tag();
-
-        let already_inner = inner_variables.contains(&var);
-        if !already_inner {
-            let (column, name) = candidate_column(&query.cc, &var)?;
-            inner_projection.push(ProjectedColumn(column, name.clone()));
-        }
-
-        if needs_type_projection {
-            let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name();
-            if !already_inner {
-                let type_col = query.cc
-                                    .extracted_types
-                                    .get(&var)
-                                    .cloned()
-                                    .ok_or_else(|| ErrorKind::NoTypeAvailableForVariable(var.name().clone()))?;
-                inner_projection.push(ProjectedColumn(ColumnOrExpression::Column(type_col), type_name.clone()));
-            }
-            group_by.push(GroupBy::ProjectedColumn(type_name));
-        };
-    }
-
-    for var in query.with.iter() {
-        // We never need to project a constant.
-        if query.cc.is_value_bound(&var) {
-            continue;
-        }
-
-        // We don't need to add inner projections for :with if they are already there.
-        if !inner_variables.contains(&var) {
-            let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?;
-            inner_projection.push(projected_column);
-
-            if type_set.unique_type_tag().is_none() {
-                // Also project the type from the SQL query.
-                let (type_column, type_name) = candidate_type_column(&query.cc, &var)?;
-                inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
-            }
-        }
-    }
-
-    // At this point we know we have a double-layer projection. Collect the outer.
-    //
-    // If we have an inner and outer layer, the inner layer will name its
-    // variables, and the outer will re-project them.
-    // If we only have one layer, then the outer will do the naming.
-    // (We could try to not use names in the inner query, but then what would we do for
-    // `ground` and known values?)
-    // Walk the projection, switching the outer columns to use the inner names.
-
-    let outer_projection = outer_projection.into_iter().map(|c| {
-        match c {
-            Either::Left(name) => {
-                ProjectedColumn(ColumnOrExpression::ExistingColumn(name.clone()),
-                                name)
-            },
-            Either::Right(pc) => pc,
-        }
-    }).collect();
-
-    Ok(ProjectedElements {
-        sql_projection: Projection::Columns(outer_projection),
-        pre_aggregate_projection: Some(Projection::Columns(inner_projection)),
-        templates,
-        group_by,
-    })
-}
-
 pub trait Projector {
    fn project<'stmt>(&self, rows: Rows<'stmt>) -> Result<QueryOutput>;
    fn columns<'s>(&'s self) -> Box<Iterator<Item=&Element> + 's>;
--- a/query-projector/src/project.rs
+++ b/query-projector/src/project.rs
@ -0,0 +1,403 @@
+// Copyright 2018 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+use std::collections::{
+    BTreeSet,
+};
+
+use indexmap::{
+    IndexSet,
+};
+
+use mentat_core::{
+    SQLValueType,
+    SQLValueTypeSet,
+    ValueTypeSet,
+};
+
+use mentat_core::util::{
+    Either,
+};
+
+use mentat_query::{
+    Element,
+    Variable,
+};
+
+use mentat_query_algebrizer::{
+    AlgebraicQuery,
+    ColumnName,
+    ConjoiningClauses,
+    QualifiedAlias,
+    VariableColumn,
+};
+
+
+use mentat_query_sql::{
+    ColumnOrExpression,
+    GroupBy,
+    Name,
+    Projection,
+    ProjectedColumn,
+};
+
+use aggregates::{
+    SimpleAggregation,
+    projected_column_for_simple_aggregate,
+};
+
+use errors::{
+    ErrorKind,
+    Result,
+};
+
+use super::{
+    TypedIndex,
+};
+
+/// An internal temporary struct to pass between the projection 'walk' and the
+/// resultant projector.
+/// Projection accumulates four things:
+/// - Two SQL projection lists. We need two because aggregate queries are nested
+///   in order to apply DISTINCT to values prior to aggregation.
+/// - A collection of templates for the projector to use to extract values.
+/// - A list of columns to use for grouping. Grouping is a property of the projection!
+pub(crate) struct ProjectedElements {
+    pub sql_projection: Projection,
+    pub pre_aggregate_projection: Option<Projection>,
+    pub templates: Vec<TypedIndex>,
+    pub group_by: Vec<GroupBy>,
+}
+
+fn candidate_type_column(cc: &ConjoiningClauses, var: &Variable) -> Result<(ColumnOrExpression, Name)> {
+    cc.extracted_types
+      .get(var)
+      .cloned()
+      .map(|alias| {
+          let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name();
+          (ColumnOrExpression::Column(alias), type_name)
+      })
+      .ok_or_else(|| ErrorKind::UnboundVariable(var.name()).into())
+}
+
+fn cc_column(cc: &ConjoiningClauses, var: &Variable) -> Result<QualifiedAlias> {
+    cc.column_bindings
+      .get(var)
+      .and_then(|cols| cols.get(0).cloned())
+      .ok_or_else(|| ErrorKind::UnboundVariable(var.name()).into())
+}
+
+fn candidate_column(cc: &ConjoiningClauses, var: &Variable) -> Result<(ColumnOrExpression, Name)> {
+    // Every variable should be bound by the top-level CC to at least
+    // one column in the query. If that constraint is violated it's a
+    // bug in our code, so it's appropriate to panic here.
+    cc_column(cc, var)
+        .map(|qa| {
+            let name = VariableColumn::Variable(var.clone()).column_name();
+            (ColumnOrExpression::Column(qa), name)
+        })
+}
+
+/// Return the projected column -- that is, a value or SQL column and an associated name -- for a
+/// given variable. Also return the type.
+/// Callers are expected to determine whether to project a type tag as an additional SQL column.
+pub fn projected_column_for_var(var: &Variable, cc: &ConjoiningClauses) -> Result<(ProjectedColumn, ValueTypeSet)> {
+    if let Some(value) = cc.bound_value(&var) {
+        // If we already know the value, then our lives are easy.
+        let tag = value.value_type();
+        let name = VariableColumn::Variable(var.clone()).column_name();
+        Ok((ProjectedColumn(ColumnOrExpression::Value(value.clone()), name), ValueTypeSet::of_one(tag)))
+    } else {
+        // If we don't, then the CC *must* have bound the variable.
+        let (column, name) = candidate_column(cc, var)?;
+        Ok((ProjectedColumn(column, name), cc.known_type_set(var)))
+    }
+}
+/// Walk an iterator of `Element`s, collecting projector templates and columns.
+///
+/// Returns a `ProjectedElements`, which combines SQL projections
+/// and a `Vec` of `TypedIndex` 'keys' to use when looking up values.
+///
+/// Callers must ensure that every `Element` is distinct -- a query like
+///
+/// ```edn
+/// [:find ?x ?x :where [?x _ _]]
+/// ```
+///
+/// should fail to parse. See #358.
+pub(crate) fn project_elements<'a, I: IntoIterator<Item = &'a Element>>(
+    count: usize,
+    elements: I,
+    query: &AlgebraicQuery) -> Result<ProjectedElements> {
+
+    // Give a little padding for type tags.
+    let mut inner_projection = Vec::with_capacity(count + 2);
+
+    // Everything in the outer query will _either_ be an aggregate operation
+    // _or_ a reference to a name projected from the inner.
+    // We'll expand them later.
+    let mut outer_projection: Vec<Either<Name, ProjectedColumn>> = Vec::with_capacity(count + 2);
+
+    let mut i: i32 = 0;
+    let mut min_max_count: usize = 0;
+    let mut corresponding_count: usize = 0;
+    let mut templates = vec![];
+
+    let mut aggregates = false;
+
+    // Any variable that appears intact in the :find clause, not inside an aggregate expression.
+    // "Query variables not in aggregate expressions will group the results and appear intact
+    // in the result."
+    // We use an ordered set here so that we group in the correct order.
+    let mut outer_variables = IndexSet::new();
+
+    // Any variable that we are projecting from the inner query.
+    let mut inner_variables = BTreeSet::new();
+
+    for e in elements {
+        if let &Element::Corresponding(_) = e {
+            corresponding_count += 1;
+        }
+
+        match e {
+            // Each time we come across a variable, we push a SQL column
+            // into the SQL projection, aliased to the name of the variable,
+            // and we push an annotated index into the projector.
+            &Element::Variable(ref var) |
+            &Element::Corresponding(ref var) => {
+                if outer_variables.contains(var) {
+                    eprintln!("Warning: duplicate variable {} in query.", var);
+                }
+
+                // TODO: it's an error to have `[:find ?x (the ?x) …]`.
+                outer_variables.insert(var.clone());
+                inner_variables.insert(var.clone());
+
+                let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?;
+                outer_projection.push(Either::Left(projected_column.1.clone()));
+                inner_projection.push(projected_column);
+
+                if let Some(tag) = type_set.unique_type_tag() {
+                    templates.push(TypedIndex::Known(i, tag));
+                    i += 1;     // We used one SQL column.
+                } else {
+                    templates.push(TypedIndex::Unknown(i, i + 1));
+                    i += 2;     // We used two SQL columns.
+
+                    // Also project the type from the SQL query.
+                    let (type_column, type_name) = candidate_type_column(&query.cc, &var)?;
+                    inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
+                    outer_projection.push(Either::Left(type_name));
+                }
+            },
+            &Element::Aggregate(ref a) => {
+                if let Some(simple) = a.to_simple() {
+                    aggregates = true;
+
+                    use aggregates::SimpleAggregationOp::*;
+                    match simple.op {
+                        Max | Min => {
+                            min_max_count += 1;
+                        },
+                        Avg | Count | Sum => (),
+                    }
+
+                    // When we encounter a simple aggregate -- one in which the aggregation can be
+                    // implemented in SQL, on a single variable -- we just push the SQL aggregation op.
+                    // We must ensure the following:
+                    // - There's a column for the var.
+                    // - The type of the var is known to be restricted to a sensible input set
+                    //   (not necessarily a single type, but e.g., all vals must be Double or Long).
+                    // - The type set must be appropriate for the operation. E.g., `Sum` is not a
+                    //   meaningful operation on instants.
+
+                    let (projected_column, return_type) = projected_column_for_simple_aggregate(&simple, &query.cc)?;
+                    outer_projection.push(Either::Right(projected_column));
+
+                    if !inner_variables.contains(&simple.var) {
+                        inner_variables.insert(simple.var.clone());
+                        let (projected_column, _type_set) = projected_column_for_var(&simple.var, &query.cc)?;
+                        inner_projection.push(projected_column);
+                        if query.cc.known_type_set(&simple.var).unique_type_tag().is_none() {
+                            // Also project the type from the SQL query.
+                            let (type_column, type_name) = candidate_type_column(&query.cc, &simple.var)?;
+                            inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
+                        }
+                    }
+
+                    // We might regret using the type tag here instead of the `ValueType`.
+                    templates.push(TypedIndex::Known(i, return_type.value_type_tag()));
+                    i += 1;
+                } else {
+                    // TODO: complex aggregates.
+                    bail!(ErrorKind::NotYetImplemented("complex aggregates".into()));
+                }
+            },
+        }
+    }
+
+    match (min_max_count, corresponding_count) {
+        (0, 0) | (_, 0) => {},
+        (0, _) => {
+            eprintln!("Warning: used `(the ?var)` without `min` or `max`.");
+        },
+        (1, _) => {
+            // This is the success case!
+        },
+        (n, c) => {
+            bail!(ErrorKind::AmbiguousAggregates(n, c));
+        },
+    }
+
+    // Anything used in ORDER BY (which we're given in `named_projection`)
+    // needs to be in the SQL column list so we can refer to it by name.
+    //
+    // They don't affect projection.
+    //
+    // If a variable is of a non-fixed type, also project the type tag column, so we don't
+    // accidentally unify across types when considering uniqueness!
+    for var in query.named_projection.iter() {
+        if outer_variables.contains(var) {
+            continue;
+        }
+
+        // If it's a fixed value, we need do nothing further.
+        if query.cc.is_value_bound(&var) {
+            continue;
+        }
+
+        let already_inner = inner_variables.contains(&var);
+        let (column, name) = candidate_column(&query.cc, &var)?;
+        if !already_inner {
+            inner_projection.push(ProjectedColumn(column, name.clone()));
+            inner_variables.insert(var.clone());
+        }
+
+        outer_projection.push(Either::Left(name));
+        outer_variables.insert(var.clone());
+
+        // We don't care if a column has a single _type_, we care if it has a single type _tag_,
+        // because that's what we'll use if we're projecting. E.g., Long and Double.
+        // Single type implies single type tag, and is cheaper, so we check that first.
+        let types = query.cc.known_type_set(&var);
+        if !types.has_unique_type_tag() {
+            let (type_column, type_name) = candidate_type_column(&query.cc, &var)?;
+            if !already_inner {
+                inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
+            }
+
+            outer_projection.push(Either::Left(type_name));
+        }
+    }
+
+    if !aggregates {
+        // We're done -- we never need to group unless we're aggregating.
+        return Ok(ProjectedElements {
+                      sql_projection: Projection::Columns(inner_projection),
+                      pre_aggregate_projection: None,
+                      templates,
+                      group_by: vec![],
+                  });
+    }
+
+    // OK, on to aggregates.
+    // We need to produce two SQL projection lists: one for an inner query and one for the outer.
+    //
+    // The inner serves these purposes:
+    // - Projecting variables to avoid duplicates being elided. (:with)
+    // - Making bindings available to the outermost query for projection, ordering, and grouping.
+    //
+    // The outer is consumed by the projector.
+    //
+    // We will also be producing:
+    // - A GROUP BY list to group the output of the inner query by non-aggregate variables
+    //   so that it can be correctly aggregated.
+
+    // Turn this collection of vars into a collection of columns from the query.
+    // We don't allow grouping on anything but a variable bound in the query.
+    // We group by tag if necessary.
+    let mut group_by = Vec::with_capacity(outer_variables.len() + 2);
+    for var in outer_variables.into_iter() {
+        if query.cc.is_value_bound(&var) {
+            continue;
+        }
+
+        // The GROUP BY goes outside, but it needs every variable and type tag to be
+        // projected from inside. Collect in both directions here.
+        let name = VariableColumn::Variable(var.clone()).column_name();
+        group_by.push(GroupBy::ProjectedColumn(name));
+
+        let needs_type_projection = !query.cc.known_type_set(&var).has_unique_type_tag();
+
+        let already_inner = inner_variables.contains(&var);
+        if !already_inner {
+            let (column, name) = candidate_column(&query.cc, &var)?;
+            inner_projection.push(ProjectedColumn(column, name.clone()));
+        }
+
+        if needs_type_projection {
+            let type_name = VariableColumn::VariableTypeTag(var.clone()).column_name();
+            if !already_inner {
+                let type_col = query.cc
+                                    .extracted_types
+                                    .get(&var)
+                                    .cloned()
+                                    .ok_or_else(|| ErrorKind::NoTypeAvailableForVariable(var.name().clone()))?;
+                inner_projection.push(ProjectedColumn(ColumnOrExpression::Column(type_col), type_name.clone()));
+            }
+            group_by.push(GroupBy::ProjectedColumn(type_name));
+        };
+    }
+
+    for var in query.with.iter() {
+        // We never need to project a constant.
+        if query.cc.is_value_bound(&var) {
+            continue;
+        }
+
+        // We don't need to add inner projections for :with if they are already there.
+        if !inner_variables.contains(&var) {
+            let (projected_column, type_set) = projected_column_for_var(&var, &query.cc)?;
+            inner_projection.push(projected_column);
+
+            if type_set.unique_type_tag().is_none() {
+                // Also project the type from the SQL query.
+                let (type_column, type_name) = candidate_type_column(&query.cc, &var)?;
+                inner_projection.push(ProjectedColumn(type_column, type_name.clone()));
+            }
+        }
+    }
+
+    // At this point we know we have a double-layer projection. Collect the outer.
+    //
+    // If we have an inner and outer layer, the inner layer will name its
+    // variables, and the outer will re-project them.
+    // If we only have one layer, then the outer will do the naming.
+    // (We could try to not use names in the inner query, but then what would we do for
+    // `ground` and known values?)
+    // Walk the projection, switching the outer columns to use the inner names.
+
+    let outer_projection = outer_projection.into_iter().map(|c| {
+        match c {
+            Either::Left(name) => {
+                ProjectedColumn(ColumnOrExpression::ExistingColumn(name.clone()),
+                                name)
+            },
+            Either::Right(pc) => pc,
+        }
+    }).collect();
+
+    Ok(ProjectedElements {
+        sql_projection: Projection::Columns(outer_projection),
+        pre_aggregate_projection: Some(Projection::Columns(inner_projection)),
+        templates,
+        group_by,
+    })
+}
--- a/query-projector/src/pull.rs
+++ b/query-projector/src/pull.rs
@ -0,0 +1,87 @@
+// Copyright 2018 Mozilla
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+// this file except in compliance with the License. You may obtain a copy of the
+// License at http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+use std::collections::{
+    BTreeSet,
+};
+
+use indexmap::{
+    IndexMap,
+    IndexSet,
+};
+
+use mentat_core::{
+    Entid,
+    SQLValueType,
+    SQLValueTypeSet,
+    TypedValue,
+    ValueType,
+    ValueTypeSet,
+};
+
+use mentat_core::util::{
+    Either,
+};
+
+use mentat_query::{
+    Element,
+    Variable,
+};
+
+use mentat_query_algebrizer::{
+    AlgebraicQuery,
+    ColumnName,
+    ConjoiningClauses,
+    QualifiedAlias,
+    VariableColumn,
+};
+
+
+use mentat_query_sql::{
+    ColumnOrExpression,
+    GroupBy,
+    Name,
+    Projection,
+    ProjectedColumn,
+};
+
+use aggregates::{
+    SimpleAggregation,
+    projected_column_for_simple_aggregate,
+};
+
+use errors::{
+    ErrorKind,
+    Result,
+};
+
+use super::{
+    TypedIndex,
+};
+
+/// A pull expression expands a binding into a structure. The returned structure
+/// associates attributes named in the input or retrieved from the store with values.
+/// This association is a `StructuredMap`.
+struct StructuredMap {
+    attrs: IndexMap<Entid, StructuredValue>,
+}
+
+/// The values stored in a `StructuredMap` can be:
+/// * Vecs of structured values, for multi-valued component attributes or nested expressions.
+/// * Vecs of typed values, for multi-valued simple attributes. Unlike Datomic, Mentat can express
+///   an entity without a `{:db/id 12345678}` map.
+/// * Single structured values, for single-valued component attributes or nested expressions.
+/// * Single typed values, for simple attributes.
+enum StructuredValue {
+    Value(TypedValue),
+    Values(Vec<TypedValue>),
+    Structure(StructuredMap),
+    Structures(Vec<StructuredMap>),
+}
--- a/query-translator/src/lib.rs
+++ b/query-translator/src/lib.rs
@ -38,6 +38,6 @@ error_chain! {
    }

    links {
-        ProjectorError(mentat_query_projector::Error, mentat_query_projector::ErrorKind);
+        ProjectorError(mentat_query_projector::errors::Error, mentat_query_projector::errors::ErrorKind);
    }
 }
--- a/src/errors.rs
+++ b/src/errors.rs
@ -47,7 +47,7 @@ error_chain! {
        DbError(mentat_db::Error, mentat_db::ErrorKind);
        QueryError(mentat_query_algebrizer::Error, mentat_query_algebrizer::ErrorKind);   // Let's not leak the term 'algebrizer'.
        QueryParseError(mentat_query_parser::Error, mentat_query_parser::ErrorKind);
-        ProjectorError(mentat_query_projector::Error, mentat_query_projector::ErrorKind);
+        ProjectorError(mentat_query_projector::errors::Error, mentat_query_projector::errors::ErrorKind);
        TranslatorError(mentat_query_translator::Error, mentat_query_translator::ErrorKind);
        SqlError(mentat_sql::Error, mentat_sql::ErrorKind);
        TxParseError(mentat_tx_parser::Error, mentat_tx_parser::ErrorKind);
--- a/src/vocabulary.rs
+++ b/src/vocabulary.rs
@ -85,7 +85,9 @@ use std::collections::BTreeMap;

 pub use mentat_core::attribute;
 use mentat_core::attribute::Unique;
-use mentat_core::KnownEntid;
+use mentat_core::{
+    KnownEntid,
+};

 use ::{
    CORE_SCHEMA_VERSION,
@ -126,7 +128,7 @@ pub type Datom = (Entid, Entid, TypedValue);
 /// its version number, we need to know the attributes that the application cares about -- it's
 /// not enough to know the name and version. Indeed, we even care about the details of each attribute,
 /// because that's how we'll detect errors.
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct Definition {
    pub name: NamespacedKeyword,
    pub version: Version,
@ -243,7 +245,7 @@ impl<T> HasCoreSchema for T where T: HasSchema {
 }

 impl Definition {
-    fn description_for_attributes<'s, T, R>(&'s self, attributes: &[R], via: &T) -> Result<Terms>
+    fn description_for_attributes<'s, T, R>(&'s self, attributes: &[R], via: &T, diff: Option<BTreeMap<NamespacedKeyword, Attribute>>) -> Result<Terms>
     where T: HasCoreSchema,
           R: ::std::borrow::Borrow<(NamespacedKeyword, Attribute)> {

@ -279,13 +281,10 @@ impl Definition {
        // Describe each of its attributes.
        // This is a lot like Schema::to_edn_value; at some point we should tidy this up.
        for ref r in attributes.iter() {
-            let &(ref name, ref attr) = r.borrow();
+            let &(ref kw, ref attr) = r.borrow();

-            // Note that we allow tempid resolution to find an existing entity, if it
-            // exists. We don't yet support upgrades, which will involve producing
-            // alteration statements.
-            let tempid = builder.named_tempid(name.to_string());
-            let name: TypedValue = name.clone().into();
+            let tempid = builder.named_tempid(kw.to_string());
+            let name: TypedValue = kw.clone().into();
            builder.add(tempid.clone(), a_ident, name)?;
            builder.add(schema.clone(), a_attr, tempid.clone())?;

@ -299,18 +298,12 @@ impl Definition {
            };
            builder.add(tempid.clone(), a_cardinality, c)?;

-            if attr.index {
-                builder.add(tempid.clone(), a_index, TypedValue::Boolean(true))?;
-            }
-            if attr.fulltext {
-                builder.add(tempid.clone(), a_fulltext, TypedValue::Boolean(true))?;
-            }
-            if attr.component {
-                builder.add(tempid.clone(), a_is_component, TypedValue::Boolean(true))?;
-            }
-            if attr.no_history {
-                builder.add(tempid.clone(), a_no_history, TypedValue::Boolean(true))?;
-            }
+            // These are all unconditional because we use attribute descriptions to _alter_, not
+            // just to _add_, and so absence is distinct from negation!
+            builder.add(tempid.clone(), a_index, TypedValue::Boolean(attr.index))?;
+            builder.add(tempid.clone(), a_fulltext, TypedValue::Boolean(attr.fulltext))?;
+            builder.add(tempid.clone(), a_is_component, TypedValue::Boolean(attr.component))?;
+            builder.add(tempid.clone(), a_no_history, TypedValue::Boolean(attr.no_history))?;

            if let Some(u) = attr.unique {
                let uu = match u {
@ -318,15 +311,49 @@ impl Definition {
                    Unique::Value => v_unique_value,
                };
                builder.add(tempid.clone(), a_unique, uu)?;
+            } else {
+                 let existing_unique =
+                    if let Some(ref diff) = diff {
+                        diff.get(kw).and_then(|a| a.unique)
+                    } else {
+                        None
+                    };
+                 match existing_unique {
+                    None => {
+                        // Nothing to do.
+                    },
+                    Some(Unique::Identity) => {
+                        builder.retract(tempid.clone(), a_unique, v_unique_identity.clone())?;
+                    },
+                    Some(Unique::Value) => {
+                        builder.retract(tempid.clone(), a_unique, v_unique_value.clone())?;
+                    },
+                 }
            }
        }

        builder.build()
    }

+    /// Return a sequence of terms that describes this vocabulary definition and its attributes.
+    fn description_diff<T>(&self, via: &T, from: &Vocabulary) -> Result<Terms> where T: HasSchema {
+        let relevant = self.attributes.iter()
+                           .filter_map(|(ref keyword, _)|
+                               // Look up the keyword to see if it's currently in use.
+                               via.get_entid(keyword)
+
+                               // If so, map it to the existing attribute.
+                                  .and_then(|e| from.find(e).cloned())
+
+                               // Collect enough that we can do lookups.
+                                  .map(|e| (keyword.clone(), e)))
+                           .collect();
+        self.description_for_attributes(self.attributes.as_slice(), via, Some(relevant))
+    }
+
    /// Return a sequence of terms that describes this vocabulary definition and its attributes.
    fn description<T>(&self, via: &T) -> Result<Terms> where T: HasSchema {
-        self.description_for_attributes(self.attributes.as_slice(), via)
+        self.description_for_attributes(self.attributes.as_slice(), via, None)
    }
 }

@ -361,46 +388,8 @@ pub trait HasVocabularies {
    fn read_vocabulary_named(&self, name: &NamespacedKeyword) -> Result<Option<Vocabulary>>;
 }

-pub trait VersionedStore {
+pub trait VersionedStore: HasVocabularies + HasSchema {
    /// Check whether the vocabulary described by the provided metadata is present in the store.
-    fn check_vocabulary<'definition>(&self, definition: &'definition Definition) -> Result<VocabularyCheck<'definition>>;
-
-    /// Check whether the provided vocabulary is present in the store. If it isn't, make it so.
-    fn ensure_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
-
-    /// Make sure that our expectations of the core vocabulary -- basic types and attributes -- are met.
-    fn verify_core_schema(&self) -> Result<()>;
-}
-
-trait VocabularyMechanics {
-    fn install_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
-    fn install_attributes_for<'definition>(&mut self, definition: &'definition Definition, attributes: Vec<&'definition (NamespacedKeyword, Attribute)>) -> Result<VocabularyOutcome>;
-    fn upgrade_vocabulary(&mut self, definition: &Definition, from_version: Vocabulary) -> Result<VocabularyOutcome>;
-}
-
-impl Vocabulary {
-    // TODO: don't do linear search!
-    fn find<T>(&self, entid: T) -> Option<&Attribute> where T: Into<Entid> {
-        let to_find = entid.into();
-        self.attributes.iter().find(|&&(e, _)| e == to_find).map(|&(_, ref a)| a)
-    }
-}
-
-impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
-    fn verify_core_schema(&self) -> Result<()> {
-        if let Some(core) = self.read_vocabulary_named(&DB_SCHEMA_CORE)? {
-            if core.version != CORE_SCHEMA_VERSION {
-                bail!(ErrorKind::UnexpectedCoreSchema(Some(core.version)));
-            }
-
-            // TODO: check things other than the version.
-        } else {
-            // This would be seriously messed up.
-            bail!(ErrorKind::UnexpectedCoreSchema(None));
-        }
-        Ok(())
-    }
-
    fn check_vocabulary<'definition>(&self, definition: &'definition Definition) -> Result<VocabularyCheck<'definition>> {
        if let Some(vocabulary) = self.read_vocabulary_named(&definition.name)? {
            // The name is present.
@ -449,6 +438,49 @@ impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
        }
    }

+    /// Check whether the provided vocabulary is present in the store. If it isn't, make it so.
+    fn ensure_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
+
+    /// Check whether the provided vocabularies are present in the store at the correct
+    /// version and with all defined attributes. If any are not, invoke the `pre`
+    /// function on the provided `VocabularyProvider`, install or upgrade the necessary vocabularies,
+    /// then invoke `post`. Returns `Ok` if all of these steps succeed.
+    ///
+    /// Use this function instead of calling `ensure_vocabulary` if you need to have pre/post
+    /// functions invoked when vocabulary changes are necessary.
+    fn ensure_vocabularies(&mut self, vocabularies: &VocabularyProvider) -> Result<BTreeMap<NamespacedKeyword, VocabularyOutcome>>;
+
+    /// Make sure that our expectations of the core vocabulary -- basic types and attributes -- are met.
+    fn verify_core_schema(&self) -> Result<()> {
+        if let Some(core) = self.read_vocabulary_named(&DB_SCHEMA_CORE)? {
+            if core.version != CORE_SCHEMA_VERSION {
+                bail!(ErrorKind::UnexpectedCoreSchema(Some(core.version)));
+            }
+
+            // TODO: check things other than the version.
+        } else {
+            // This would be seriously messed up.
+            bail!(ErrorKind::UnexpectedCoreSchema(None));
+        }
+        Ok(())
+    }
+}
+
+trait VocabularyMechanics {
+    fn install_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
+    fn install_attributes_for<'definition>(&mut self, definition: &'definition Definition, attributes: Vec<&'definition (NamespacedKeyword, Attribute)>) -> Result<VocabularyOutcome>;
+    fn upgrade_vocabulary(&mut self, definition: &Definition, from_version: Vocabulary) -> Result<VocabularyOutcome>;
+}
+
+impl Vocabulary {
+    // TODO: don't do linear search!
+    fn find<T>(&self, entid: T) -> Option<&Attribute> where T: Into<Entid> {
+        let to_find = entid.into();
+        self.attributes.iter().find(|&&(e, _)| e == to_find).map(|&(_, ref a)| a)
+    }
+}
+
+impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
    fn ensure_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome> {
        match self.check_vocabulary(definition)? {
            VocabularyCheck::Present => Ok(VocabularyOutcome::Existed),
@ -458,6 +490,59 @@ impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
            VocabularyCheck::PresentButTooNew { newer_version } => Err(ErrorKind::ExistingVocabularyTooNew(definition.name.to_string(), newer_version.version, definition.version).into()),
        }
    }
+
+    fn ensure_vocabularies(&mut self, vocabularies: &VocabularyProvider) -> Result<BTreeMap<NamespacedKeyword, VocabularyOutcome>> {
+        let mut install = Vec::new();
+        let mut update  = Vec::new();
+        let mut missing = Vec::new();
+        let mut out = BTreeMap::new();
+
+        for definition in vocabularies.definitions.iter() {
+            match self.check_vocabulary(definition)? {
+                VocabularyCheck::Present => {
+                    out.insert(definition.name.clone(), VocabularyOutcome::Existed);
+                },
+                VocabularyCheck::NotPresent => {
+                    install.push(definition);
+                },
+                VocabularyCheck::PresentButNeedsUpdate { older_version } => {
+                    update.push((definition, older_version));
+                },
+                VocabularyCheck::PresentButMissingAttributes { attributes } => {
+                    missing.push((definition, attributes));
+                },
+                VocabularyCheck::PresentButTooNew { newer_version } => {
+                    bail!(ErrorKind::ExistingVocabularyTooNew(definition.name.to_string(), newer_version.version, definition.version));
+                },
+            }
+        }
+
+        if install.is_empty() && update.is_empty() && missing.is_empty() {
+            return Ok(out);
+        }
+
+        // If any work needs to be done, run pre/post.
+        (vocabularies.pre)(self)?;
+
+        for d in install {
+            out.insert(d.name.clone(), self.install_vocabulary(d)?);
+        }
+        for (d, v) in update {
+            out.insert(d.name.clone(), self.upgrade_vocabulary(d, v)?);
+        }
+        for (d, a) in missing {
+            out.insert(d.name.clone(), self.install_attributes_for(d, a)?);
+        }
+
+        (vocabularies.post)(self)?;
+        Ok(out)
+    }
+}
+
+pub struct VocabularyProvider {
+    pub pre: fn(&mut InProgress) -> Result<()>,
+    pub post: fn(&mut InProgress) -> Result<()>,
+    pub definitions: Vec<Definition>,
 }

 impl<'a, 'c> VocabularyMechanics for InProgress<'a, 'c> {
@ -469,17 +554,23 @@ impl<'a, 'c> VocabularyMechanics for InProgress<'a, 'c> {
    }

    fn install_attributes_for<'definition>(&mut self, definition: &'definition Definition, attributes: Vec<&'definition (NamespacedKeyword, Attribute)>) -> Result<VocabularyOutcome> {
-        let (terms, tempids) = definition.description_for_attributes(&attributes, self)?;
+        let (terms, tempids) = definition.description_for_attributes(&attributes, self, None)?;
        self.transact_terms(terms, tempids)?;
        Ok(VocabularyOutcome::InstalledMissingAttributes)
    }

    /// Turn the declarative parts of the vocabulary into alterations. Run the 'pre' steps.
    /// Transact the changes. Run the 'post' steps. Return the result and the new `InProgress`!
-    fn upgrade_vocabulary(&mut self, _definition: &Definition, _from_version: Vocabulary) -> Result<VocabularyOutcome> {
-        unimplemented!();
-        // TODO
-        // Ok(VocabularyOutcome::Installed)
+    fn upgrade_vocabulary(&mut self, definition: &Definition, from_version: Vocabulary) -> Result<VocabularyOutcome> {
+        // It's sufficient for us to generate the datom form of each attribute and transact that.
+        // We trust that the vocabulary will implement a 'pre' function that cleans up data for any
+        // failable conversion (e.g., cardinality-many to cardinality-one).
+
+        // TODO: don't do work for attributes that are unchanged. Here we rely on the transactor
+        // to elide duplicate datoms.
+        let (terms, tempids) = definition.description_diff(self, &from_version)?;
+        self.transact_terms(terms, tempids)?;
+        Ok(VocabularyOutcome::Upgraded)
    }
 }

--- a/tests/query.rs
+++ b/tests/query.rs
@ -584,7 +584,7 @@ fn test_aggregates_type_handling() {
            Error(
                ErrorKind::TranslatorError(
                    ::mentat_query_translator::ErrorKind::ProjectorError(
-                        ::mentat_query_projector::ErrorKind::CannotApplyAggregateOperationToTypes(
+                        ::mentat_query_projector::errors::ErrorKind::CannotApplyAggregateOperationToTypes(
                            SimpleAggregationOp::Sum,
                            types
                        ),
@ -605,7 +605,7 @@ fn test_aggregates_type_handling() {
            Error(
                ErrorKind::TranslatorError(
                    ::mentat_query_translator::ErrorKind::ProjectorError(
-                        ::mentat_query_projector::ErrorKind::CannotApplyAggregateOperationToTypes(
+                        ::mentat_query_projector::errors::ErrorKind::CannotApplyAggregateOperationToTypes(
                            SimpleAggregationOp::Sum,
                            types
                        ),
@ -1173,7 +1173,7 @@ fn test_aggregation_implicit_grouping() {
            Error(
                ErrorKind::TranslatorError(
                    ::mentat_query_translator::ErrorKind::ProjectorError(
-                        ::mentat_query_projector::ErrorKind::AmbiguousAggregates(mmc, cc)
+                        ::mentat_query_projector::errors::ErrorKind::AmbiguousAggregates(mmc, cc)
                    )
            ), _)) => {
            assert_eq!(mmc, 2);
--- a/tests/vocabulary.rs
+++ b/tests/vocabulary.rs
@ -23,6 +23,7 @@ use mentat::vocabulary::{
    VersionedStore,
    VocabularyCheck,
    VocabularyOutcome,
+    VocabularyProvider,
 };

 use mentat::query::IntoResult;
@ -38,6 +39,7 @@ use mentat::{
    Conn,
    NamespacedKeyword,
    Queryable,
+    Store,
    TypedValue,
    ValueType,
 };
@ -291,4 +293,222 @@ fn test_add_vocab() {
            _ => panic!(),
        }
    }
+
+    // Some alterations -- cardinality/one to cardinality/many, unique to weaker unique or
+    // no unique, unindexed to indexed -- can be applied automatically, so long as you
+    // bump the version number.
+
+    let multival_bar = vocabulary::AttributeBuilder::helpful()
+                  .value_type(ValueType::Instant)
+                  .multival(true)
+                  .index(true)
+                  .build();
+    let multival_bar_and_baz = vec![
+        (kw!(:foo/bar), multival_bar),
+        (kw!(:foo/baz), baz.clone()),
+    ];
+
+    let altered_vocabulary = vocabulary::Definition {
+        name: kw!(:org.mozilla/foo),
+        version: 2,
+        attributes: multival_bar_and_baz,
+    };
+
+    // foo/bar starts single-valued.
+    assert_eq!(false, conn.current_schema().attribute_for_ident(&kw!(:foo/bar)).expect("attribute").0.multival);
+
+    // Scoped borrow of `conn`.
+    {
+        let mut in_progress = conn.begin_transaction(&mut sqlite).expect("begun successfully");
+        assert_eq!(in_progress.ensure_vocabulary(&altered_vocabulary).expect("success"),
+                   VocabularyOutcome::Upgraded);
+        in_progress.commit().expect("commit succeeded");
+    }
+
+    // Now it's multi-valued.
+    assert_eq!(true, conn.current_schema().attribute_for_ident(&kw!(:foo/bar)).expect("attribute").0.multival);
+}
+
+// This is a real-world-style test that evolves a schema with data changes.
+// We start with a basic vocabulary in three parts:
+//
+// Part 1 describes foods by name.
+// Part 2 describes movies by title.
+// Part 3 describes people: their names and heights, and their likes.
+//
+// We simulate three common migrations:
+// - We made a trivial modeling error: movie names should not be unique.
+// - We made a less trivial modeling error, one that can fail: food names should be unique so that
+//   we can more easily refer to them during writes.
+//   In order for this migration to succeed, we need to merge duplicates, then alter the schema --
+//   which we will do by introducing a new property in the same vocabulary, deprecating the old one
+//   -- then transact the transformed data.
+// - We need to normalize some non-unique data: we recorded heights in inches when they should be
+//   in centimeters.
+// - We need to normalize some unique data: food names should all be lowercase. Again, that can fail
+//   because of a uniqueness constraint. (We might know that it can't fail thanks to application
+//   restrictions, in which case we can treat this as we did the height alteration.)
+// - We made a more significant modeling error: we used 'like' to identify both movies and foods,
+//   and we have decided that food preferences and movie preferences should be different attributes.
+//   We wish to split these up and deprecate the old attribute. In order to do so we need to retract
+//   all of the datoms that use the old attribute, transact new attributes _in both movies and foods_,
+//   then re-assert the data.
+#[test]
+fn test_upgrade_with_functions() {
+    let mut store = Store::open("").expect("open");
+
+    let food_v1 = vocabulary::Definition {
+        name: kw!(:org.mozilla/food),
+        version: 1,
+        attributes: vec![
+            (kw!(:food/name),
+             vocabulary::AttributeBuilder::helpful()
+                .value_type(ValueType::String)
+                .multival(false)
+                .build()),
+        ],
+    };
+
+    let movies_v1 = vocabulary::Definition {
+        name: kw!(:org.mozilla/movies),
+        version: 1,
+        attributes: vec![
+            (kw!(:movie/year),
+             vocabulary::AttributeBuilder::helpful()
+                .value_type(ValueType::Long)             // No need for Instant here.
+                .multival(false)
+                .build()),
+            (kw!(:movie/title),
+             vocabulary::AttributeBuilder::helpful()
+                .value_type(ValueType::String)
+                .multival(false)
+                .unique(vocabulary::attribute::Unique::Identity)
+                .index(true)
+                .build()),
+        ],
+    };
+
+    let people_v1 = vocabulary::Definition {
+        name: kw!(:org.mozilla/people),
+        version: 1,
+        attributes: vec![
+            (kw!(:person/name),
+             vocabulary::AttributeBuilder::helpful()
+                .value_type(ValueType::String)
+                .multival(false)
+                .unique(vocabulary::attribute::Unique::Identity)
+                .index(true)
+                .build()),
+            (kw!(:person/height),
+             vocabulary::AttributeBuilder::helpful()
+                .value_type(ValueType::Long)
+                .multival(false)
+                .build()),
+            (kw!(:person/likes),
+             vocabulary::AttributeBuilder::helpful()
+                .value_type(ValueType::Ref)
+                .multival(true)
+                .build()),
+        ],
+    };
+
+    // Apply v1 of each.
+    let v1_provider = VocabularyProvider {
+        pre: |_ip| Ok(()),
+        definitions: vec![
+            food_v1.clone(),
+            movies_v1.clone(),
+            people_v1.clone(),
+        ],
+        post: |_ip| Ok(()),
+    };
+
+    // Mutable borrow of store.
+    {
+        let mut in_progress = store.begin_transaction().expect("began");
+
+        in_progress.ensure_vocabularies(&v1_provider).expect("success");
+
+        // Also add some data. We do this in one transaction 'cos -- thanks to the modeling errors
+        // we are about to fix! -- it's a little awkward to make references to entities without
+        // unique attributes.
+        in_progress.transact(r#"[
+            {:movie/title "John Wick"
+             :movie/year 2014
+             :db/id "mjw"}
+            {:movie/title "Terminator 2: Judgment Day"
+             :movie/year 1991
+             :db/id "mt2"}
+            {:movie/title "Dune"
+             :db/id "md"
+             :movie/year 1984}
+            {:movie/title "Upstream Color"
+             :movie/year 2013
+             :db/id "muc"}
+            {:movie/title "Primer"
+             :db/id "mp"
+             :movie/year 2004}
+
+            ;; No year: not yet released.
+            {:movie/title "The Modern Ocean"
+             :db/id "mtmo"}
+
+            {:food/name "Carrots" :db/id "fc"}
+            {:food/name "Weird blue worms" :db/id "fwbw"}
+            {:food/name "Spice" :db/id "fS"}
+            {:food/name "spice" :db/id "fs"}
+
+            ;; Sam likes action movies, carrots, and lowercase spice.
+            {:person/name "Sam"
+             :person/height 64
+             :person/likes ["mjw", "mt2", "fc", "fs"]}
+
+            ;; Beth likes thoughtful and weird movies, weird blue worms, and Spice.
+            {:person/name "Beth"
+             :person/height 68
+             :person/likes ["muc", "mp", "md", "fwbw", "fS"]}
+
+        ]"#).expect("transacted");
+
+        in_progress.commit().expect("commit succeeded");
+    }
+
+    // Mutable borrow of store.
+    {
+
+        // Crap, there are several movies named Dune. We need to de-uniqify that attribute.
+        let movies_v2 = vocabulary::Definition {
+            name: kw!(:org.mozilla/movies),
+            version: 2,
+            attributes: vec![
+                (kw!(:movie/title),
+                 vocabulary::AttributeBuilder::helpful()
+                    .value_type(ValueType::String)
+                    .multival(false)
+                    .non_unique()
+                    .index(true)
+                    .build()),
+            ],
+        };
+        let mut in_progress = store.begin_transaction().expect("began");
+        in_progress.ensure_vocabulary(&movies_v2).expect("success");
+
+        // We can now add another Dune movie: Denis Villeneuve's 2019 version.
+        // (Let's just pretend that it's been released, here in 2018!)
+        in_progress.transact(r#"[
+            {:movie/title "Dune"
+             :movie/year 2019}
+        ]"#).expect("transact succeeded");
+
+        // And we can query both.
+        let years =
+            in_progress.q_once(r#"[:find [?year ...]
+                                   :where [?movie :movie/title "Dune"]
+                                           [?movie :movie/year ?year]
+                                   :order (asc ?year)]"#, None)
+                       .into_coll_result()
+                       .expect("coll");
+        assert_eq!(years, vec![TypedValue::Long(1984), TypedValue::Long(2019)]);
+        in_progress.commit().expect("commit succeeded");
+    }
 }
Author	SHA1	Message	Date
Richard Newman	e94337c683	WIP: pull	2018-04-03 15:21:02 -07:00
Richard Newman	c7ea94b4c9	Refactoring: split up the projector crate. No other code changes.	2018-04-03 15:21:02 -07:00
Richard Newman	65e7252b56	Implement vocabulary-driven schema upgrades.	2018-04-03 15:21:02 -07:00
Richard Newman	29ccbee911	Allow retraction of some schema attributes. (#379 )	2018-04-03 15:04:25 -07:00