Implement vocabulary-driven schema upgrades. (#595) r=emily

This commit is contained in:
Richard Newman 2018-04-03 15:04:04 -07:00
parent a5cda7c3e9
commit 3f8464e8ed
4 changed files with 1222 additions and 101 deletions

View file

@ -330,6 +330,13 @@ impl From<f64> for TypedValue {
}
impl TypedValue {
pub fn into_known_entid(self) -> Option<KnownEntid> {
match self {
TypedValue::Ref(v) => Some(KnownEntid(v)),
_ => None,
}
}
pub fn into_entid(self) -> Option<Entid> {
match self {
TypedValue::Ref(v) => Some(v),

View file

@ -1541,10 +1541,10 @@ mod tests {
fn add_schema(conn: &mut Store) {
// transact some schema
let mut in_progress = conn.begin_transaction().expect("expected in progress");
in_progress.ensure_vocabulary(&Definition {
name: kw!(:todo/items),
version: 1,
attributes: vec![
in_progress.ensure_vocabulary(&Definition::new(
kw!(:todo/items),
1,
vec![
(kw!(:todo/uuid),
AttributeBuilder::helpful()
.value_type(ValueType::Uuid)
@ -1577,7 +1577,7 @@ mod tests {
.multival(false)
.build()),
],
}).expect("expected vocubulary");
)).expect("expected vocubulary");
in_progress.commit().expect("Expected vocabulary committed");
}

View file

@ -9,16 +9,20 @@
// specific language governing permissions and limitations under the License.
//! This module exposes an interface for programmatic management of vocabularies. A vocabulary
//! is defined as a name, a version number, and a collection of attribute definitions. In the
//! future, this input will be augmented with specifications of migrations between versions.
//! This module exposes an interface for programmatic management of vocabularies.
//!
//! A Mentat store exposes, via the `HasSchema` trait, operations to read vocabularies by name
//! or in bulk.
//! A vocabulary is defined by a name, a version number, and a collection of attribute definitions.
//!
//! An in-progress transaction (`InProgress`) further exposes a trait, `VersionedStore`, which
//! allows for a vocabulary definition to be checked for existence in the store, and transacted
//! if needed.
//! Operations on vocabularies can include migrations between versions. These are defined
//! programmatically as a pair of functions, `pre` and `post`, that are invoked prior to
//! an upgrade.
//!
//! A Mentat store exposes, via the `HasSchema` trait, operations to read
//! vocabularies by name or in bulk.
//!
//! An in-progress transaction (`InProgress`) further exposes a trait,
//! `VersionedStore`, which allows for a vocabulary definition to be
//! checked for existence in the store, and transacted if needed.
//!
//! Typical use is the following:
//!
@ -72,6 +76,8 @@
//! .fulltext(true)
//! .build()),
//! ],
//! pre: Definition::no_op,
//! post: Definition::no_op,
//! }).expect("ensured");
//!
//! // Now we can do stuff.
@ -80,12 +86,22 @@
//! }
//! }
//! ```
//!
//! A similar approach is taken using the
//! [VocabularyProvider](mentat::vocabulary::VocabularyProvider) trait to handle migrations across
//! multiple vocabularies.
use std::collections::BTreeMap;
pub use mentat_core::attribute;
use mentat_core::attribute::Unique;
use mentat_core::KnownEntid;
use mentat_core::attribute::{
Unique,
};
use mentat_core::{
KnownEntid,
};
use ::{
CORE_SCHEMA_VERSION,
@ -114,25 +130,154 @@ use ::entity_builder::{
Terms,
};
/// AttributeBuilder is how you build vocabulary definitions to apply to a store.
pub use mentat_db::AttributeBuilder;
pub type Version = u32;
pub type Datom = (Entid, Entid, TypedValue);
/// A definition of an attribute that is independent of a particular store.
///
/// `Attribute` instances not only aren't named, but don't even have entids.
/// We need two kinds of structure here: an abstract definition of a vocabulary in terms of names,
///
/// We need two kinds of structure: an abstract definition of a vocabulary in terms of names,
/// and a concrete instance of a vocabulary in a particular store.
///
/// `Definition` is the former, and `Vocabulary` is the latter.
///
/// Note that, because it's possible to 'flesh out' a vocabulary with attributes without bumping
/// its version number, we need to know the attributes that the application cares about -- it's
/// its version number, we need to track the attributes that the application cares about — it's
/// not enough to know the name and version. Indeed, we even care about the details of each attribute,
/// because that's how we'll detect errors.
#[derive(Debug)]
///
/// `Definition` includes two additional fields: functions to run if this vocabulary is being
/// upgraded. `pre` and `post` are run before and after the definition is transacted against the
/// store. Each is called with the existing `Vocabulary` instance so that they can do version
/// checks or employ more fine-grained logic.
#[derive(Clone)]
pub struct Definition {
pub name: NamespacedKeyword,
pub version: Version,
pub attributes: Vec<(NamespacedKeyword, Attribute)>,
pub pre: fn(&mut InProgress, &Vocabulary) -> Result<()>,
pub post: fn(&mut InProgress, &Vocabulary) -> Result<()>,
}
/// ```
/// #[macro_use(kw)]
/// extern crate mentat;
///
/// use mentat::{
/// HasSchema,
/// IntoResult,
/// Queryable,
/// Store,
/// ValueType,
/// };
///
/// use mentat::entity_builder::{
/// BuildTerms,
/// TermBuilder,
/// };
///
/// use mentat::vocabulary;
/// use mentat::vocabulary::{
/// AttributeBuilder,
/// Definition,
/// HasVocabularies,
/// VersionedStore,
/// };
///
/// fn main() {
/// let mut store = Store::open("").expect("connected");
/// let mut in_progress = store.begin_transaction().expect("began transaction");
///
/// // Make sure the core vocabulary exists.
/// in_progress.verify_core_schema().expect("verified");
///
/// // Make sure our vocabulary is installed, and install if necessary.
/// in_progress.ensure_vocabulary(&Definition {
/// name: kw!(:example/links),
/// version: 2,
/// attributes: vec![
/// (kw!(:link/title),
/// AttributeBuilder::helpful()
/// .value_type(ValueType::String)
/// .multival(false)
/// .fulltext(true)
/// .build()),
/// ],
/// pre: |ip, from| {
/// // Version one allowed multiple titles; version two
/// // doesn't. Retract any duplicates we find.
/// if from.version < 2 {
/// let link_title = ip.get_entid(&kw!(:link/title)).unwrap();
///
/// let results = ip.q_once(r#"
/// [:find ?e ?t2
/// :where [?e :link/title ?t1]
/// [?e :link/title ?t2]
/// [(unpermute ?t1 ?t2)]]
/// "#, None).into_rel_result()?;
///
/// if !results.is_empty() {
/// let mut builder = TermBuilder::new();
/// for row in results.into_iter() {
/// let mut r = row.into_iter();
/// let e = r.next().and_then(|e| e.into_known_entid()).expect("entity");
/// let obsolete = r.next().expect("value");
/// builder.retract(e, link_title, obsolete)?;
/// }
/// ip.transact_builder(builder)?;
/// }
/// }
/// Ok(())
/// },
/// post: |_ip, from| {
/// println!("We migrated :example/links from version {}", from.version);
/// Ok(())
/// },
/// }).expect("ensured");
///
/// // Now we can do stuff.
/// in_progress.transact("[{:link/title \"Title\"}]").expect("transacts");
/// in_progress.commit().expect("commits");
/// }
/// ```
impl Definition {
pub fn no_op(_ip: &mut InProgress, _from: &Vocabulary) -> Result<()> {
Ok(())
}
pub fn new<N, A>(name: N, version: Version, attributes: A) -> Definition
where N: Into<NamespacedKeyword>,
A: Into<Vec<(NamespacedKeyword, Attribute)>> {
Definition {
name: name.into(),
version: version,
attributes: attributes.into(),
pre: Definition::no_op,
post: Definition::no_op,
}
}
/// Called with an in-progress transaction and the previous vocabulary version
/// if the definition's version is later than that of the vocabulary in the store.
fn pre(&self, ip: &mut InProgress, from: &Vocabulary) -> Result<()> {
(self.pre)(ip, from)
}
/// Called with an in-progress transaction and the previous vocabulary version
/// if the definition's version is later than that of the vocabulary in the store.
fn post(&self, ip: &mut InProgress, from: &Vocabulary) -> Result<()> {
(self.post)(ip, from)
}
}
/// A definition of a vocabulary as retrieved from a particular store.
///
/// A `Vocabulary` is just like `Definition`, but concrete: its name and attributes are identified
/// by `Entid`, not `NamespacedKeyword`.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Vocabulary {
pub entity: Entid,
@ -146,6 +291,7 @@ impl Vocabulary {
}
}
/// A collection of named `Vocabulary` instances, as retrieved from the store.
#[derive(Debug, Default, Clone)]
pub struct Vocabularies(pub BTreeMap<NamespacedKeyword, Vocabulary>); // N.B., this has a copy of the attributes in Schema!
@ -243,7 +389,7 @@ impl<T> HasCoreSchema for T where T: HasSchema {
}
impl Definition {
fn description_for_attributes<'s, T, R>(&'s self, attributes: &[R], via: &T) -> Result<Terms>
fn description_for_attributes<'s, T, R>(&'s self, attributes: &[R], via: &T, diff: Option<BTreeMap<NamespacedKeyword, Attribute>>) -> Result<Terms>
where T: HasCoreSchema,
R: ::std::borrow::Borrow<(NamespacedKeyword, Attribute)> {
@ -279,13 +425,10 @@ impl Definition {
// Describe each of its attributes.
// This is a lot like Schema::to_edn_value; at some point we should tidy this up.
for ref r in attributes.iter() {
let &(ref name, ref attr) = r.borrow();
let &(ref kw, ref attr) = r.borrow();
// Note that we allow tempid resolution to find an existing entity, if it
// exists. We don't yet support upgrades, which will involve producing
// alteration statements.
let tempid = builder.named_tempid(name.to_string());
let name: TypedValue = name.clone().into();
let tempid = builder.named_tempid(kw.to_string());
let name: TypedValue = kw.clone().into();
builder.add(tempid.clone(), a_ident, name)?;
builder.add(schema.clone(), a_attr, tempid.clone())?;
@ -299,18 +442,12 @@ impl Definition {
};
builder.add(tempid.clone(), a_cardinality, c)?;
if attr.index {
builder.add(tempid.clone(), a_index, TypedValue::Boolean(true))?;
}
if attr.fulltext {
builder.add(tempid.clone(), a_fulltext, TypedValue::Boolean(true))?;
}
if attr.component {
builder.add(tempid.clone(), a_is_component, TypedValue::Boolean(true))?;
}
if attr.no_history {
builder.add(tempid.clone(), a_no_history, TypedValue::Boolean(true))?;
}
// These are all unconditional because we use attribute descriptions to _alter_, not
// just to _add_, and so absence is distinct from negation!
builder.add(tempid.clone(), a_index, TypedValue::Boolean(attr.index))?;
builder.add(tempid.clone(), a_fulltext, TypedValue::Boolean(attr.fulltext))?;
builder.add(tempid.clone(), a_is_component, TypedValue::Boolean(attr.component))?;
builder.add(tempid.clone(), a_no_history, TypedValue::Boolean(attr.no_history))?;
if let Some(u) = attr.unique {
let uu = match u {
@ -318,27 +455,74 @@ impl Definition {
Unique::Value => v_unique_value,
};
builder.add(tempid.clone(), a_unique, uu)?;
} else {
let existing_unique =
if let Some(ref diff) = diff {
diff.get(kw).and_then(|a| a.unique)
} else {
None
};
match existing_unique {
None => {
// Nothing to do.
},
Some(Unique::Identity) => {
builder.retract(tempid.clone(), a_unique, v_unique_identity.clone())?;
},
Some(Unique::Value) => {
builder.retract(tempid.clone(), a_unique, v_unique_value.clone())?;
},
}
}
}
builder.build()
}
/// Return a sequence of terms that describes this vocabulary definition and its attributes.
fn description_diff<T>(&self, via: &T, from: &Vocabulary) -> Result<Terms> where T: HasSchema {
let relevant = self.attributes.iter()
.filter_map(|&(ref keyword, _)|
// Look up the keyword to see if it's currently in use.
via.get_entid(keyword)
// If so, map it to the existing attribute.
.and_then(|e| from.find(e).cloned())
// Collect enough that we can do lookups.
.map(|e| (keyword.clone(), e)))
.collect();
self.description_for_attributes(self.attributes.as_slice(), via, Some(relevant))
}
/// Return a sequence of terms that describes this vocabulary definition and its attributes.
fn description<T>(&self, via: &T) -> Result<Terms> where T: HasSchema {
self.description_for_attributes(self.attributes.as_slice(), via)
self.description_for_attributes(self.attributes.as_slice(), via, None)
}
}
/// This enum captures the various relationships between a particular vocabulary pair — one
/// `Definition` and one `Vocabulary`, if present.
#[derive(Debug, Eq, PartialEq)]
pub enum VocabularyCheck<'definition> {
/// The provided definition is not already present in the store.
NotPresent,
/// The provided definition is present in the store, and all of its attributes exist.
Present,
/// The provided definition is present in the store with an earlier version number.
PresentButNeedsUpdate { older_version: Vocabulary },
/// The provided definition is present in the store with a more recent version number.
PresentButTooNew { newer_version: Vocabulary },
/// The provided definition is present in the store, but some of its attributes are not.
PresentButMissingAttributes { attributes: Vec<&'definition (NamespacedKeyword, Attribute)> },
}
/// This enum captures the outcome of attempting to ensure that a vocabulary definition is present
/// and up-to-date in the store.
#[derive(Debug, Eq, PartialEq)]
pub enum VocabularyOutcome {
/// The vocabulary was absent and has been installed.
@ -356,51 +540,15 @@ pub enum VocabularyOutcome {
Upgraded,
}
/// This trait captures the ability to retrieve and describe stored vocabularies.
pub trait HasVocabularies {
fn read_vocabularies(&self) -> Result<Vocabularies>;
fn read_vocabulary_named(&self, name: &NamespacedKeyword) -> Result<Option<Vocabulary>>;
}
pub trait VersionedStore {
/// This trait captures the ability of a store to check and install/upgrade vocabularies.
pub trait VersionedStore: HasVocabularies + HasSchema {
/// Check whether the vocabulary described by the provided metadata is present in the store.
fn check_vocabulary<'definition>(&self, definition: &'definition Definition) -> Result<VocabularyCheck<'definition>>;
/// Check whether the provided vocabulary is present in the store. If it isn't, make it so.
fn ensure_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
/// Make sure that our expectations of the core vocabulary -- basic types and attributes -- are met.
fn verify_core_schema(&self) -> Result<()>;
}
trait VocabularyMechanics {
fn install_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
fn install_attributes_for<'definition>(&mut self, definition: &'definition Definition, attributes: Vec<&'definition (NamespacedKeyword, Attribute)>) -> Result<VocabularyOutcome>;
fn upgrade_vocabulary(&mut self, definition: &Definition, from_version: Vocabulary) -> Result<VocabularyOutcome>;
}
impl Vocabulary {
// TODO: don't do linear search!
fn find<T>(&self, entid: T) -> Option<&Attribute> where T: Into<Entid> {
let to_find = entid.into();
self.attributes.iter().find(|&&(e, _)| e == to_find).map(|&(_, ref a)| a)
}
}
impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
fn verify_core_schema(&self) -> Result<()> {
if let Some(core) = self.read_vocabulary_named(&DB_SCHEMA_CORE)? {
if core.version != CORE_SCHEMA_VERSION {
bail!(ErrorKind::UnexpectedCoreSchema(Some(core.version)));
}
// TODO: check things other than the version.
} else {
// This would be seriously messed up.
bail!(ErrorKind::UnexpectedCoreSchema(None));
}
Ok(())
}
fn check_vocabulary<'definition>(&self, definition: &'definition Definition) -> Result<VocabularyCheck<'definition>> {
if let Some(vocabulary) = self.read_vocabulary_named(&definition.name)? {
// The name is present.
@ -449,6 +597,83 @@ impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
}
}
/// Check whether the provided vocabulary is present in the store. If it isn't, make it so.
fn ensure_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
/// Check whether the provided vocabularies are present in the store at the correct
/// version and with all defined attributes. If any are not, invoke the `pre`
/// function on the provided `VocabularySource`, install or upgrade the necessary vocabularies,
/// then invoke `post`. Returns `Ok` if all of these steps succeed.
///
/// Use this function instead of calling `ensure_vocabulary` if you need to have pre/post
/// functions invoked when vocabulary changes are necessary.
fn ensure_vocabularies(&mut self, vocabularies: &mut VocabularySource) -> Result<BTreeMap<NamespacedKeyword, VocabularyOutcome>>;
/// Make sure that our expectations of the core vocabulary — basic types and attributes — are met.
fn verify_core_schema(&self) -> Result<()> {
if let Some(core) = self.read_vocabulary_named(&DB_SCHEMA_CORE)? {
if core.version != CORE_SCHEMA_VERSION {
bail!(ErrorKind::UnexpectedCoreSchema(Some(core.version)));
}
// TODO: check things other than the version.
} else {
// This would be seriously messed up.
bail!(ErrorKind::UnexpectedCoreSchema(None));
}
Ok(())
}
}
/// `VocabularyStatus` is passed to `pre` function when attempting to add or upgrade vocabularies
/// via `ensure_vocabularies`. This is how you can find the status and versions of existing
/// vocabularies — you can retrieve the requested definition and the resulting `VocabularyCheck`
/// by name.
pub trait VocabularyStatus {
fn get(&self, name: &NamespacedKeyword) -> Option<(&Definition, &VocabularyCheck)>;
fn version(&self, name: &NamespacedKeyword) -> Option<Version>;
}
#[derive(Default)]
struct CheckedVocabularies<'a> {
items: BTreeMap<NamespacedKeyword, (&'a Definition, VocabularyCheck<'a>)>,
}
impl<'a> CheckedVocabularies<'a> {
fn add(&mut self, definition: &'a Definition, check: VocabularyCheck<'a>) {
self.items.insert(definition.name.clone(), (definition, check));
}
fn is_empty(&self) -> bool {
self.items.is_empty()
}
}
impl<'a> VocabularyStatus for CheckedVocabularies<'a> {
fn get(&self, name: &NamespacedKeyword) -> Option<(&Definition, &VocabularyCheck)> {
self.items.get(name).map(|&(ref d, ref c)| (*d, c))
}
fn version(&self, name: &NamespacedKeyword) -> Option<Version> {
self.items.get(name).map(|&(d, _)| d.version)
}
}
trait VocabularyMechanics {
fn install_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome>;
fn install_attributes_for<'definition>(&mut self, definition: &'definition Definition, attributes: Vec<&'definition (NamespacedKeyword, Attribute)>) -> Result<VocabularyOutcome>;
fn upgrade_vocabulary(&mut self, definition: &Definition, from_version: Vocabulary) -> Result<VocabularyOutcome>;
}
impl Vocabulary {
// TODO: don't do linear search!
fn find<T>(&self, entid: T) -> Option<&Attribute> where T: Into<Entid> {
let to_find = entid.into();
self.attributes.iter().find(|&&(e, _)| e == to_find).map(|&(_, ref a)| a)
}
}
impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
fn ensure_vocabulary(&mut self, definition: &Definition) -> Result<VocabularyOutcome> {
match self.check_vocabulary(definition)? {
VocabularyCheck::Present => Ok(VocabularyOutcome::Existed),
@ -458,6 +683,128 @@ impl<'a, 'c> VersionedStore for InProgress<'a, 'c> {
VocabularyCheck::PresentButTooNew { newer_version } => Err(ErrorKind::ExistingVocabularyTooNew(definition.name.to_string(), newer_version.version, definition.version).into()),
}
}
fn ensure_vocabularies(&mut self, vocabularies: &mut VocabularySource) -> Result<BTreeMap<NamespacedKeyword, VocabularyOutcome>> {
let definitions = vocabularies.definitions();
let mut update = Vec::new();
let mut missing = Vec::new();
let mut out = BTreeMap::new();
let mut work = CheckedVocabularies::default();
for definition in definitions.iter() {
match self.check_vocabulary(definition)? {
VocabularyCheck::Present => {
out.insert(definition.name.clone(), VocabularyOutcome::Existed);
},
VocabularyCheck::PresentButTooNew { newer_version } => {
bail!(ErrorKind::ExistingVocabularyTooNew(definition.name.to_string(), newer_version.version, definition.version));
},
c @ VocabularyCheck::NotPresent |
c @ VocabularyCheck::PresentButNeedsUpdate { older_version: _ } |
c @ VocabularyCheck::PresentButMissingAttributes { attributes: _ } => {
work.add(definition, c);
},
}
}
if work.is_empty() {
return Ok(out);
}
// If any work needs to be done, run pre/post.
vocabularies.pre(self, &work)?;
for (name, (definition, check)) in work.items.into_iter() {
match check {
VocabularyCheck::NotPresent => {
// Install it directly.
out.insert(name, self.install_vocabulary(definition)?);
},
VocabularyCheck::PresentButNeedsUpdate { older_version } => {
// Save this: we'll do it later.
update.push((definition, older_version));
},
VocabularyCheck::PresentButMissingAttributes { attributes } => {
// Save this: we'll do it later.
missing.push((definition, attributes));
},
VocabularyCheck::Present |
VocabularyCheck::PresentButTooNew { newer_version: _ } => {
unreachable!();
}
}
}
for (d, v) in update {
out.insert(d.name.clone(), self.upgrade_vocabulary(d, v)?);
}
for (d, a) in missing {
out.insert(d.name.clone(), self.install_attributes_for(d, a)?);
}
vocabularies.post(self)?;
Ok(out)
}
}
/// Implement `VocabularySource` to have full programmatic control over how a set of `Definition`s
/// are checked against and transacted into a store.
pub trait VocabularySource {
/// Called to obtain the list of `Definition`s to install. This will be called before `pre`.
fn definitions(&mut self) -> Vec<Definition>;
/// Called before the supplied `Definition`s are transacted. Do not commit the `InProgress`.
/// If this function returns `Err`, the entire vocabulary operation will fail.
fn pre(&mut self, _in_progress: &mut InProgress, _checks: &VocabularyStatus) -> Result<()> {
Ok(())
}
/// Called after the supplied `Definition`s are transacted. Do not commit the `InProgress`.
/// If this function returns `Err`, the entire vocabulary operation will fail.
fn post(&mut self, _in_progress: &mut InProgress) -> Result<()> {
Ok(())
}
}
/// A convenience struct to package simple `pre` and `post` functions with a collection of
/// vocabulary `Definition`s.
pub struct SimpleVocabularySource {
pub definitions: Vec<Definition>,
pub pre: Option<fn(&mut InProgress) -> Result<()>>,
pub post: Option<fn(&mut InProgress) -> Result<()>>,
}
impl SimpleVocabularySource {
pub fn new(definitions: Vec<Definition>,
pre: Option<fn(&mut InProgress) -> Result<()>>,
post: Option<fn(&mut InProgress) -> Result<()>>) -> SimpleVocabularySource {
SimpleVocabularySource {
pre: pre,
post: post,
definitions: definitions,
}
}
pub fn with_definitions(definitions: Vec<Definition>) -> SimpleVocabularySource {
Self::new(definitions, None, None)
}
}
impl VocabularySource for SimpleVocabularySource {
fn pre(&mut self, in_progress: &mut InProgress, _checks: &VocabularyStatus) -> Result<()> {
self.pre.map(|pre| (pre)(in_progress)).unwrap_or(Ok(()))
}
fn post(&mut self, in_progress: &mut InProgress) -> Result<()> {
self.post.map(|pre| (pre)(in_progress)).unwrap_or(Ok(()))
}
fn definitions(&mut self) -> Vec<Definition> {
self.definitions.clone()
}
}
impl<'a, 'c> VocabularyMechanics for InProgress<'a, 'c> {
@ -469,17 +816,27 @@ impl<'a, 'c> VocabularyMechanics for InProgress<'a, 'c> {
}
fn install_attributes_for<'definition>(&mut self, definition: &'definition Definition, attributes: Vec<&'definition (NamespacedKeyword, Attribute)>) -> Result<VocabularyOutcome> {
let (terms, tempids) = definition.description_for_attributes(&attributes, self)?;
let (terms, tempids) = definition.description_for_attributes(&attributes, self, None)?;
self.transact_terms(terms, tempids)?;
Ok(VocabularyOutcome::InstalledMissingAttributes)
}
/// Turn the declarative parts of the vocabulary into alterations. Run the 'pre' steps.
/// Transact the changes. Run the 'post' steps. Return the result and the new `InProgress`!
fn upgrade_vocabulary(&mut self, _definition: &Definition, _from_version: Vocabulary) -> Result<VocabularyOutcome> {
unimplemented!();
// TODO
// Ok(VocabularyOutcome::Installed)
fn upgrade_vocabulary(&mut self, definition: &Definition, from_version: Vocabulary) -> Result<VocabularyOutcome> {
// It's sufficient for us to generate the datom form of each attribute and transact that.
// We trust that the vocabulary will implement a 'pre' function that cleans up data for any
// failable conversion (e.g., cardinality-many to cardinality-one).
definition.pre(self, &from_version)?;
// TODO: don't do work for attributes that are unchanged. Here we rely on the transactor
// to elide duplicate datoms.
let (terms, tempids) = definition.description_diff(self, &from_version)?;
self.transact_terms(terms, tempids)?;
definition.post(self, &from_version)?;
Ok(VocabularyOutcome::Upgraded)
}
}

View file

@ -1,4 +1,4 @@
// Copyright 2016 Mozilla
// Copyright 2018 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
@ -20,9 +20,15 @@ extern crate rusqlite;
use mentat::vocabulary;
use mentat::vocabulary::{
Definition,
SimpleVocabularySource,
Version,
VersionedStore,
Vocabulary,
VocabularyCheck,
VocabularyOutcome,
VocabularySource,
VocabularyStatus,
};
use mentat::query::IntoResult;
@ -36,13 +42,20 @@ use mentat_db::AttributeValidation;
use mentat::{
Conn,
InProgress,
KnownEntid,
NamespacedKeyword,
QueryInputs,
Queryable,
Store,
TypedValue,
ValueType,
};
use mentat::entity_builder::BuildTerms;
use mentat::entity_builder::{
BuildTerms,
TermBuilder,
};
use mentat::errors::{
Error,
@ -75,7 +88,9 @@ lazy_static! {
.multival(false)
.index(true)
.build()),
]
],
pre: Definition::no_op,
post: Definition::no_op,
}
};
}
@ -163,17 +178,8 @@ fn test_add_vocab() {
(kw!(:foo/baz), baz.clone()),
];
let foo_v1_a = vocabulary::Definition {
name: kw!(:org.mozilla/foo),
version: 1,
attributes: bar_only.clone(),
};
let foo_v1_b = vocabulary::Definition {
name: kw!(:org.mozilla/foo),
version: 1,
attributes: bar_and_baz.clone(),
};
let foo_v1_a = vocabulary::Definition::new(kw!(:org.mozilla/foo), 1, bar_only.clone());
let foo_v1_b = vocabulary::Definition::new(kw!(:org.mozilla/foo), 1, bar_and_baz.clone());
let mut sqlite = mentat_db::db::new_connection("").unwrap();
let mut conn = Conn::connect(&mut sqlite).unwrap();
@ -271,11 +277,12 @@ fn test_add_vocab() {
(kw!(:foo/bar), bar),
(kw!(:foo/baz), malformed_baz.clone()),
];
let foo_v1_malformed = vocabulary::Definition {
name: kw!(:org.mozilla/foo),
version: 1,
attributes: bar_and_malformed_baz.clone(),
};
let foo_v1_malformed = vocabulary::Definition::new(
kw!(:org.mozilla/foo),
1,
bar_and_malformed_baz.clone()
);
// Scoped borrow of `conn`.
{
@ -291,4 +298,754 @@ fn test_add_vocab() {
_ => panic!(),
}
}
// Some alterations -- cardinality/one to cardinality/many, unique to weaker unique or
// no unique, unindexed to indexed -- can be applied automatically, so long as you
// bump the version number.
let multival_bar = vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::Instant)
.multival(true)
.index(true)
.build();
let multival_bar_and_baz = vec![
(kw!(:foo/bar), multival_bar),
(kw!(:foo/baz), baz.clone()),
];
let altered_vocabulary = vocabulary::Definition::new(
kw!(:org.mozilla/foo),
2,
multival_bar_and_baz
);
// foo/bar starts single-valued.
assert_eq!(false, conn.current_schema().attribute_for_ident(&kw!(:foo/bar)).expect("attribute").0.multival);
// Scoped borrow of `conn`.
{
let mut in_progress = conn.begin_transaction(&mut sqlite).expect("begun successfully");
assert_eq!(in_progress.ensure_vocabulary(&altered_vocabulary).expect("success"),
VocabularyOutcome::Upgraded);
in_progress.commit().expect("commit succeeded");
}
// Now it's multi-valued.
assert_eq!(true, conn.current_schema().attribute_for_ident(&kw!(:foo/bar)).expect("attribute").0.multival);
}
/// A helper to turn rows from `[:find ?e ?a :where [?e ?a ?v]]` into a tuple.
fn ea(row: Vec<TypedValue>) -> (KnownEntid, KnownEntid) {
let mut row = row.into_iter();
match (row.next(), row.next()) {
(Some(TypedValue::Ref(e)), Some(TypedValue::Ref(a))) => {
(KnownEntid(e), KnownEntid(a))
},
_ => panic!("Incorrect query shape for 'ea' helper."),
}
}
/// A helper to turn rows from `[:find ?a ?v :where [?e ?a ?v]]` into a tuple.
fn av(row: Vec<TypedValue>) -> (KnownEntid, TypedValue) {
let mut row = row.into_iter();
match (row.next(), row.next()) {
(Some(TypedValue::Ref(a)), Some(v)) => {
(KnownEntid(a), v)
},
_ => panic!("Incorrect query shape for 'av' helper."),
}
}
/// A helper to turn rows from `[:find ?e ?v :where [?e ?a ?v]]` into a tuple.
fn ev(row: Vec<TypedValue>) -> (KnownEntid, TypedValue) {
// This happens to be the same as `av`.
av(row)
}
type Inches = i64;
type Centimeters = i64;
/// ```
/// assert_eq!(inches_to_cm(100), 254);
/// ```
fn inches_to_cm(inches: Inches) -> Centimeters {
(inches as f64 * 2.54f64) as Centimeters
}
fn height_of_person(in_progress: &InProgress, name: &str) -> Option<i64> {
let h = in_progress.q_once(r#"[:find ?h .
:in ?name
:where [?p :person/name ?name]
[?p :person/height ?h]]"#,
QueryInputs::with_value_sequence(vec![(var!(?name), TypedValue::typed_string(name))]))
.into_scalar_result()
.expect("result");
match h {
Some(TypedValue::Long(v)) => Some(v),
_ => None,
}
}
// This is a real-world-style test that evolves a schema with data changes.
// We start with a basic vocabulary in three parts:
//
// Part 1 describes foods by name.
// Part 2 describes movies by title.
// Part 3 describes people: their names and heights, and their likes.
//
// We simulate four common migrations:
//
// 1. We made a trivial modeling error: movie names should not be unique.
// We simply fix this -- removing a uniqueness constraint cannot fail.
//
// 2. We need to normalize some non-unique data: we recorded heights in inches when they should be
// in centimeters. We fix this with a migration function and a version bump on the people schema.
//
// 3. We need to normalize some data: food names should all be lowercase, and they should be unique
// so that we can more easily refer to them during writes.
//
// That combination of changes can fail in either order if there are currently foods whose names
// differ only by case.
//
// (We might know that it can't fail thanks to application restrictions, in which case we can
// treat this as we did the height alteration.)
//
// In order for this migration to succeed, we need to merge duplicates, then alter the schema.
//
// 4. We made a more significant modeling error: we used 'like' to identify both movies and foods,
// and we have decided that food preferences and movie preferences should be different attributes.
// We wish to split these up and deprecate the old attribute. In order to do so we need to retract
// all of the datoms that use the old attribute, transact new attributes _in both movies and foods_,
// then re-assert the data.
//
// This one's a little contrived, because it can also be solved without cross-vocabulary work,
// but it's close enough to reality to be illustrative.
#[test]
fn test_upgrade_with_functions() {
let mut store = Store::open("").expect("open");
let food_v1 = vocabulary::Definition {
name: kw!(:org.mozilla/food),
version: 1,
attributes: vec![
(kw!(:food/name),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)
.build()),
],
pre: Definition::no_op,
post: Definition::no_op,
};
let movies_v1 = vocabulary::Definition {
name: kw!(:org.mozilla/movies),
version: 1,
attributes: vec![
(kw!(:movie/year),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::Long) // No need for Instant here.
.multival(false)
.build()),
(kw!(:movie/title),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)
.unique(vocabulary::attribute::Unique::Identity)
.index(true)
.build()),
],
pre: Definition::no_op,
post: Definition::no_op,
};
let people_v1 = vocabulary::Definition {
name: kw!(:org.mozilla/people),
version: 1,
attributes: vec![
(kw!(:person/name),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)
.unique(vocabulary::attribute::Unique::Identity)
.index(true)
.build()),
(kw!(:person/height),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::Long)
.multival(false)
.build()),
(kw!(:person/likes),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::Ref)
.multival(true)
.build()),
],
pre: Definition::no_op,
post: Definition::no_op,
};
// Apply v1 of each.
let mut v1_provider = SimpleVocabularySource::with_definitions(
vec![
food_v1.clone(),
movies_v1.clone(),
people_v1.clone(),
]);
// Mutable borrow of store.
{
let mut in_progress = store.begin_transaction().expect("began");
in_progress.ensure_vocabularies(&mut v1_provider).expect("success");
// Also add some data. We do this in one transaction 'cos -- thanks to the modeling errors
// we are about to fix! -- it's a little awkward to make references to entities without
// unique attributes.
in_progress.transact(r#"[
{:movie/title "John Wick"
:movie/year 2014
:db/id "mjw"}
{:movie/title "Terminator 2: Judgment Day"
:movie/year 1991
:db/id "mt2"}
{:movie/title "Dune"
:db/id "md"
:movie/year 1984}
{:movie/title "Upstream Color"
:movie/year 2013
:db/id "muc"}
{:movie/title "Primer"
:db/id "mp"
:movie/year 2004}
;; No year: not yet released.
{:movie/title "The Modern Ocean"
:db/id "mtmo"}
{:food/name "Carrots" :db/id "fc"}
{:food/name "Weird blue worms" :db/id "fwbw"}
{:food/name "Spice" :db/id "fS"}
{:food/name "spice" :db/id "fs"}
;; Sam likes action movies, carrots, and lowercase spice.
{:person/name "Sam"
:person/height 64
:person/likes ["mjw", "mt2", "fc", "fs"]}
;; Beth likes thoughtful and weird movies, weird blue worms, and Spice.
{:person/name "Beth"
:person/height 68
:person/likes ["muc", "mp", "md", "fwbw", "fS"]}
]"#).expect("transacted");
in_progress.commit().expect("commit succeeded");
}
//
// Migration 1: removal of a uniqueness constraint.
//
// Crap, there are several movies named Dune. We need to de-uniqify that attribute.
let movies_v2 = vocabulary::Definition {
name: kw!(:org.mozilla/movies),
version: 2,
attributes: vec![
(kw!(:movie/title),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)
.non_unique()
.index(true)
.build()),
],
pre: Definition::no_op,
post: Definition::no_op,
};
// Mutable borrow of store.
{
let mut in_progress = store.begin_transaction().expect("began");
in_progress.ensure_vocabulary(&movies_v2).expect("success");
// We can now add another Dune movie: Denis Villeneuve's 2019 version.
// (Let's just pretend that it's been released, here in 2018!)
in_progress.transact(r#"[
{:movie/title "Dune"
:movie/year 2019}
]"#).expect("transact succeeded");
// And we can query both.
let years =
in_progress.q_once(r#"[:find [?year ...]
:where [?movie :movie/title "Dune"]
[?movie :movie/year ?year]
:order (asc ?year)]"#, None)
.into_coll_result()
.expect("coll");
assert_eq!(years, vec![TypedValue::Long(1984), TypedValue::Long(2019)]);
in_progress.commit().expect("commit succeeded");
}
//
// Migration 2: let's fix those heights!
//
fn convert_heights_to_centimeters(ip: &mut InProgress, from: &Vocabulary) -> mentat::errors::Result<()> {
let mut builder = TermBuilder::new();
// We keep a redundant safety check here to avoid running twice!
if from.version < 2 {
// Find every height and multiply it by 2.54.
let person_height = ip.get_entid(&kw!(:person/height)).unwrap();
for row in ip.q_once("[:find ?p ?h :where [?p :person/height ?h]]", None)
.into_rel_result()?
.into_iter() {
let mut row = row.into_iter();
match (row.next(), row.next()) {
(Some(TypedValue::Ref(person)), Some(TypedValue::Long(height))) => {
let person = KnownEntid(person);
builder.retract(person, person_height, TypedValue::Long(height))?;
builder.add(person, person_height, TypedValue::Long(inches_to_cm(height)))?;
},
_ => {},
}
}
}
if builder.is_empty() {
return Ok(());
}
ip.transact_builder(builder).and(Ok(()))
}
fn people_v1_to_v2(ip: &mut InProgress, from: &Vocabulary) -> mentat::errors::Result<()> {
convert_heights_to_centimeters(ip, from)?;
// Let's update our documentation, too.
if from.version < 2 {
ip.transact(r#"[
[:db/add :person/height :db/doc "A person's height in centimeters."]
]"#)?;
}
Ok(())
}
// Note that this definition is exactly the same as v1, but the version number is different,
// and we add some functions to do cleanup.
let people_v2 = vocabulary::Definition {
name: kw!(:org.mozilla/people),
version: 2,
attributes: people_v1.attributes.clone(),
pre: Definition::no_op,
post: people_v1_to_v2,
};
// Mutable borrow of store.
{
let mut in_progress = store.begin_transaction().expect("began");
// Before, Sam's height is 64 (inches).
assert_eq!(Some(64), height_of_person(&in_progress, "Sam"));
in_progress.ensure_vocabulary(&people_v2).expect("expected success");
// Now, Sam's height is 162 (centimeters).
assert_eq!(Some(162), height_of_person(&in_progress, "Sam"));
in_progress.commit().expect("commit succeeded");
}
//
// Migration 3: food names should be unique and lowercase.
// Unfortunately, we have "spice" and "Spice"!
//
/// This is a straightforward migration -- retract the old string and add the new one.
/// This would be everything we need if we _knew_ there were no collisions (e.g., we were
/// cleaning up UUIDs).
fn lowercase_names(ip: &mut InProgress) -> mentat::errors::Result<()> {
let food_name = ip.get_entid(&kw!(:food/name)).unwrap();
let mut builder = TermBuilder::new();
for row in ip.q_once("[:find ?f ?name :where [?f :food/name ?name]]", None)
.into_rel_result()?
.into_iter() {
let mut row = row.into_iter();
match (row.next(), row.next()) {
(Some(TypedValue::Ref(food)), Some(TypedValue::String(name))) => {
if name.chars().any(|c| !c.is_lowercase()) {
let lowercased = name.to_lowercase();
println!("Need to rename {} from '{}' to '{}'", food, name, lowercased);
let food = KnownEntid(food);
let new_name: TypedValue = lowercased.into();
builder.retract(food, food_name, TypedValue::String(name))?;
builder.add(food, food_name, new_name)?;
}
},
_ => {},
}
}
if builder.is_empty() {
return Ok(());
}
ip.transact_builder(builder).and(Ok(()))
}
/// This is the function we write to dedupe. This logic is very suitable for sharing:
/// indeed, "make this attribute unique by merging entities" is something we should
/// lift out for reuse.
fn merge_foods_with_same_name(ip: &mut InProgress) -> mentat::errors::Result<()> {
let mut builder = TermBuilder::new();
for row in ip.q_once("[:find ?a ?b
:where [?a :food/name ?name]
[?b :food/name ?name]
[(unpermute ?a ?b)]]", None)
.into_rel_result()?
.into_iter() {
let mut row = row.into_iter();
match (row.next(), row.next()) {
(Some(TypedValue::Ref(left)), Some(TypedValue::Ref(right))) => {
let keep = KnownEntid(left);
let replace = KnownEntid(right);
// For each use of the second entity, retract it and re-assert with the first.
// We should offer some support for doing this, 'cos this is long-winded and has
// the unexpected side-effect of also trying to retract metadata about the entity…
println!("Replacing uses of {} to {}.", replace.0, keep.0);
for (a, v) in ip.q_once("[:find ?a ?v
:in ?old
:where [?old ?a ?v]]",
QueryInputs::with_value_sequence(vec![(var!(?old), replace.into())]))
.into_rel_result()?
.into_iter()
.map(av) {
builder.retract(replace, a, v.clone())?;
builder.add(keep, a, v)?;
}
for (e, a) in ip.q_once("[:find ?e ?a
:in ?old
:where [?e ?a ?old]]",
QueryInputs::with_value_sequence(vec![(var!(?old), replace.into())]))
.into_rel_result()?
.into_iter()
.map(ea) {
builder.retract(e, a, replace)?;
builder.add(e, a, keep)?;
}
// TODO: `retractEntity` on `replace` (when we support that).
},
_ => {},
}
}
if builder.is_empty() {
return Ok(());
}
ip.transact_builder(builder).and(Ok(()))
}
// This migration is bad: it can't impose the uniqueness constraint because we end up with
// two entities both with `:food/name "spice"`. We expect it to fail.
let food_v2_bad = vocabulary::Definition {
name: kw!(:org.mozilla/food),
version: 2,
attributes: vec![
(kw!(:food/name),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)
.unique(vocabulary::attribute::Unique::Identity)
.build()),
],
pre: |ip, from| {
if from.version < 2 {
lowercase_names(ip) // <- no merging!
} else {
Ok(())
}
},
post: Definition::no_op,
};
// This migration is better: once we rewrite the names, we merge the entities.
let food_v2_good = vocabulary::Definition {
name: kw!(:org.mozilla/food),
version: 2,
attributes: vec![
(kw!(:food/name),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)
.unique(vocabulary::attribute::Unique::Identity)
.build()),
],
pre: |ip, from| {
if from.version < 2 {
lowercase_names(ip).and_then(|_| merge_foods_with_same_name(ip))
} else {
Ok(())
}
},
post: Definition::no_op,
};
// Mutable borrow of store.
{
let mut in_progress = store.begin_transaction().expect("began");
// Yep, the bad one fails!
let _err = in_progress.ensure_vocabulary(&food_v2_bad).expect_err("expected error");
}
// Before the good migration, Sam and Beth don't like any of the same foods.
assert!(store.q_once(r#"[:find [?food ...]
:where [?sam :person/name "Sam"]
[?beth :person/name "Beth"]
[?sam :person/likes ?f]
[?beth :person/likes ?f]
[?f :food/name ?food]]"#, None)
.into_coll_result()
.expect("success")
.is_empty());
// Mutable borrow of store.
{
let mut in_progress = store.begin_transaction().expect("began");
// The good one succeeded!
in_progress.ensure_vocabulary(&food_v2_good).expect("expected success");
in_progress.commit().expect("commit succeeded");
}
// After, Sam and Beth both like "spice" — the same entity.
assert_eq!(store.q_once(r#"[:find [?food ...]
:where [?sam :person/name "Sam"]
[?beth :person/name "Beth"]
[?sam :person/likes ?f]
[?beth :person/likes ?f]
[?f :food/name ?food]]"#, None)
.into_coll_result()
.expect("success"),
vec![TypedValue::typed_string("spice")]);
//
// Migration 4: multi-definition migration.
//
// Here we apply a function to a collection of definitions, not just the definitions
// themselves.
//
// In this example we _could_ split up the work -- have :org.mozilla/movies port the movie
// likes, and :org.mozilla/food port the food likes -- but for the sake of illustration we'll
// write an enclosing function to do it.
//
// Further, the v3 versions of all vocabularies _can be applied to earlier versions_
// and our migrations will still work. This vocabulary definition set will work for empty, v1, and
// v2 of each definition.
//
let movies_v3 = vocabulary::Definition {
name: kw!(:org.mozilla/movies),
version: 3,
attributes: vec![
(kw!(:movie/title),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)
.non_unique()
.index(true)
.build()),
// This phrasing is backward, but this is just a test.
(kw!(:movie/likes),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::Ref)
.multival(true)
.build()),
],
pre: Definition::no_op,
post: Definition::no_op,
};
let food_v3 = vocabulary::Definition {
name: kw!(:org.mozilla/food),
version: 3,
attributes: vec![
(kw!(:food/name),
vocabulary::AttributeBuilder::helpful()
.value_type(ValueType::String)
.multival(false)