Collect tempids after upsert resolution. Fixes #299. (#365) r=rnewman

* Test collecting tempids after upsert resolution. Fixes #299.

I just didn't finish and expose the tempid collection when I
implemented upsert resolution.  Here it is!

* Review comment: Take ownership of temp_id_map; avoid contains_key().
This commit is contained in:
Nick Alexander 2017-03-20 11:34:38 -07:00 committed by GitHub
parent 1801db2a77
commit 8beea55e39
3 changed files with 85 additions and 22 deletions

View file

@ -860,6 +860,7 @@ mod tests {
use edn; use edn;
use mentat_tx_parser; use mentat_tx_parser;
use rusqlite; use rusqlite;
use std::collections::BTreeMap;
use types::TxReport; use types::TxReport;
use tx; use tx;
@ -934,6 +935,14 @@ mod tests {
} }
} }
fn tempids(report: &TxReport) -> edn::Value {
let mut map: BTreeMap<edn::Value, edn::Value> = BTreeMap::default();
for (tempid, &entid) in report.tempids.iter() {
map.insert(edn::Value::Text(tempid.clone()), edn::Value::Integer(entid));
}
edn::Value::Map(map)
}
#[test] #[test]
fn test_open_current_version() { fn test_open_current_version() {
// TODO: figure out how to reference the fixtures directory for real. For now, assume we're // TODO: figure out how to reference the fixtures directory for real. For now, assume we're
@ -1086,7 +1095,6 @@ mod tests {
#[test] #[test]
fn test_upsert_vector() { fn test_upsert_vector() {
// TODO: assert the tempids allocated throughout.
let mut conn = TestConn::default(); let mut conn = TestConn::default();
// Insert some :db.unique/identity elements. // Insert some :db.unique/identity elements.
@ -1101,7 +1109,7 @@ mod tests {
[101 :db/ident :name/Petr]]"); [101 :db/ident :name/Petr]]");
// Upserting two tempids to the same entid works. // Upserting two tempids to the same entid works.
conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
[:db/add \"t1\" :db.schema/attribute 100] [:db/add \"t1\" :db.schema/attribute 100]
[:db/add \"t2\" :db/ident :name/Petr] [:db/add \"t2\" :db/ident :name/Petr]
[:db/add \"t2\" :db.schema/attribute 101]]").unwrap(); [:db/add \"t2\" :db.schema/attribute 101]]").unwrap();
@ -1114,10 +1122,13 @@ mod tests {
[100 :db.schema/attribute 100] [100 :db.schema/attribute 100]
[101 :db/ident :name/Petr] [101 :db/ident :name/Petr]
[101 :db.schema/attribute 101]]"); [101 :db.schema/attribute 101]]");
assert_matches!(tempids(&report),
"{\"t1\" 100
\"t2\" 101}");
// Upserting a tempid works. The ref doesn't have to exist (at this time), but we can't // Upserting a tempid works. The ref doesn't have to exist (at this time), but we can't
// reuse an existing ref due to :db/unique :db.unique/value. // reuse an existing ref due to :db/unique :db.unique/value.
conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
[:db/add \"t1\" :db.schema/attribute 102]]").unwrap(); [:db/add \"t1\" :db.schema/attribute 102]]").unwrap();
assert_matches!(conn.last_transaction(), assert_matches!(conn.last_transaction(),
"[[100 :db.schema/attribute 102 ?tx true] "[[100 :db.schema/attribute 102 ?tx true]
@ -1128,12 +1139,18 @@ mod tests {
[100 :db.schema/attribute 102] [100 :db.schema/attribute 102]
[101 :db/ident :name/Petr] [101 :db/ident :name/Petr]
[101 :db.schema/attribute 101]]"); [101 :db.schema/attribute 101]]");
assert_matches!(tempids(&report),
"{\"t1\" 100}");
// A single complex upsert allocates a new entid. // A single complex upsert allocates a new entid.
conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap(); let report = conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap();
assert_matches!(conn.last_transaction(), assert_matches!(conn.last_transaction(),
"[[65536 :db.schema/attribute 65537 ?tx true] "[[65536 :db.schema/attribute 65537 ?tx true]
[?tx :db/txInstant ?ms ?tx true]]"); [?tx :db/txInstant ?ms ?tx true]]");
assert_matches!(tempids(&report),
"{\"t1\" 65536
\"t2\" 65537}");
// Conflicting upserts fail. // Conflicting upserts fail.
let err = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] let err = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
@ -1146,19 +1163,24 @@ mod tests {
// tempids in :db/retract that do upsert are retracted. The ref given doesn't exist, so the // tempids in :db/retract that do upsert are retracted. The ref given doesn't exist, so the
// assertion will be ignored. // assertion will be ignored.
conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
[:db/retract \"t1\" :db.schema/attribute 103]]").unwrap(); [:db/retract \"t1\" :db.schema/attribute 103]]").unwrap();
assert_matches!(conn.last_transaction(), assert_matches!(conn.last_transaction(),
"[[?tx :db/txInstant ?ms ?tx true]]"); "[[?tx :db/txInstant ?ms ?tx true]]");
assert_matches!(tempids(&report),
"{\"t1\" 100}");
// A multistep upsert. The upsert algorithm will first try to resolve "t1", fail, and then // A multistep upsert. The upsert algorithm will first try to resolve "t1", fail, and then
// allocate both "t1" and "t2". // allocate both "t1" and "t2".
conn.transact("[[:db/add \"t1\" :db/ident :name/Josef] let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Josef]
[:db/add \"t2\" :db.schema/attribute \"t1\"]]").unwrap(); [:db/add \"t2\" :db.schema/attribute \"t1\"]]").unwrap();
assert_matches!(conn.last_transaction(), assert_matches!(conn.last_transaction(),
"[[65538 :db/ident :name/Josef ?tx true] "[[65538 :db/ident :name/Josef ?tx true]
[65539 :db.schema/attribute 65538 ?tx true] [65539 :db.schema/attribute 65538 ?tx true]
[?tx :db/txInstant ?ms ?tx true]]"); [?tx :db/txInstant ?ms ?tx true]]");
assert_matches!(tempids(&report),
"{\"t1\" 65538
\"t2\" 65539}");
// A multistep insert. This time, we can resolve both, but we have to try "t1", succeed, // A multistep insert. This time, we can resolve both, but we have to try "t1", succeed,
// and then resolve "t2". // and then resolve "t2".

View file

@ -47,7 +47,10 @@
use std; use std;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::BTreeSet; use std::collections::{
BTreeMap,
BTreeSet,
};
use ::{to_namespaced_keyword}; use ::{to_namespaced_keyword};
use db; use db;
@ -164,9 +167,12 @@ impl<'conn, 'a> Tx<'conn, 'a> {
/// Pipeline stage 1: convert `Entity` instances into `Term` instances, ready for term /// Pipeline stage 1: convert `Entity` instances into `Term` instances, ready for term
/// rewriting. /// rewriting.
/// ///
/// The `Term` instances produce share interned TempId and LookupRef handles. /// The `Term` instances produce share interned TempId and LookupRef handles, and we return the
fn entities_into_terms_with_temp_ids_and_lookup_refs<I>(&self, entities: I) -> Result<Vec<TermWithTempIdsAndLookupRefs>> where I: IntoIterator<Item=Entity> { /// interned handle sets so that consumers can ensure all handles are used appropriately.
fn entities_into_terms_with_temp_ids_and_lookup_refs<I>(&self, entities: I) -> Result<(Vec<TermWithTempIdsAndLookupRefs>, intern_set::InternSet<String>, intern_set::InternSet<AVPair>)> where I: IntoIterator<Item=Entity> {
let mut temp_ids = intern_set::InternSet::new(); let mut temp_ids = intern_set::InternSet::new();
// #183: We don't yet support lookup refs, so this isn't mutable. Later, it'll be mutable.
let lookup_refs = intern_set::InternSet::new();
entities.into_iter() entities.into_iter()
.map(|entity: Entity| -> Result<TermWithTempIdsAndLookupRefs> { .map(|entity: Entity| -> Result<TermWithTempIdsAndLookupRefs> {
@ -218,6 +224,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
} }
}) })
.collect::<Result<Vec<_>>>() .collect::<Result<Vec<_>>>()
.map(|terms| (terms, temp_ids, lookup_refs))
} }
/// Pipeline stage 2: rewrite `Term` instances with lookup refs into `Term` instances without /// Pipeline stage 2: rewrite `Term` instances with lookup refs into `Term` instances without
@ -242,16 +249,13 @@ impl<'conn, 'a> Tx<'conn, 'a> {
// TODO: move this to the transactor layer. // TODO: move this to the transactor layer.
pub fn transact_entities<I>(&mut self, entities: I) -> Result<TxReport> where I: IntoIterator<Item=Entity> { pub fn transact_entities<I>(&mut self, entities: I) -> Result<TxReport> where I: IntoIterator<Item=Entity> {
// TODO: push these into an internal transaction report? // TODO: push these into an internal transaction report?
let mut tempids: BTreeMap<String, Entid> = BTreeMap::default();
// We don't yet support lookup refs, so this isn't mutable. Later, it'll be mutable.
let lookup_refs: intern_set::InternSet<AVPair> = intern_set::InternSet::new();
// TODO: extract the tempids set as well.
// Pipeline stage 1: entities -> terms with tempids and lookup refs. // Pipeline stage 1: entities -> terms with tempids and lookup refs.
let terms_with_temp_ids_and_lookup_refs = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?; let (terms_with_temp_ids_and_lookup_refs, tempid_set, lookup_ref_set) = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?;
// Pipeline stage 2: resolve lookup refs -> terms with tempids. // Pipeline stage 2: resolve lookup refs -> terms with tempids.
let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_refs.inner.iter().map(|rc| &**rc).collect(); let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_ref_set.inner.iter().map(|rc| &**rc).collect();
let lookup_ref_map: AVMap = self.store.resolve_avs(&lookup_ref_avs[..])?; let lookup_ref_map: AVMap = self.store.resolve_avs(&lookup_ref_avs[..])?;
let terms_with_temp_ids = self.resolve_lookup_refs(&lookup_ref_map, terms_with_temp_ids_and_lookup_refs)?; let terms_with_temp_ids = self.resolve_lookup_refs(&lookup_ref_map, terms_with_temp_ids_and_lookup_refs)?;
@ -263,8 +267,24 @@ impl<'conn, 'a> Tx<'conn, 'a> {
// And evolve them forward. // And evolve them forward.
while generation.can_evolve() { while generation.can_evolve() {
// Evolve further. // Evolve further.
let temp_id_map = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?; let temp_id_map: TempIdMap = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?;
generation = generation.evolve_one_step(&temp_id_map); generation = generation.evolve_one_step(&temp_id_map);
// Report each tempid that resolves via upsert.
for (tempid, entid) in temp_id_map {
// Every tempid should be resolved at most once. Prima facie, we might expect a
// tempid to be resolved in two different generations. However, that is not so: the
// keys of temp_id_map are unique between generations.Suppose that id->e and id->e*
// are two such mappings, resolved on subsequent evolutionary steps, and that `id`
// is a key in the intersection of the two key sets. This can't happen: if `id` maps
// to `e` via id->e, all instances of `id` have been evolved forward (replaced with
// `e`) before we try to resolve the next set of `UpsertsE`. That is, we'll never
// successfully upsert the same tempid in more than one generation step. (We might
// upsert the same tempid to multiple entids via distinct `[a v]` pairs in a single
// generation step; in this case, the transaction will fail.)
let previous = tempids.insert((*tempid).clone(), entid);
assert!(previous.is_none());
}
} }
// Allocate entids for tempids that didn't upsert. BTreeSet rather than HashSet so this is deterministic. // Allocate entids for tempids that didn't upsert. BTreeSet rather than HashSet so this is deterministic.
@ -277,6 +297,19 @@ impl<'conn, 'a> Tx<'conn, 'a> {
let final_populations = generation.into_final_populations(&temp_id_allocations)?; let final_populations = generation.into_final_populations(&temp_id_allocations)?;
// Report each tempid that is allocated.
for (tempid, &entid) in &temp_id_allocations {
// Every tempid should be allocated at most once.
assert!(!tempids.contains_key(&**tempid));
tempids.insert((**tempid).clone(), entid);
}
// Verify that every tempid we interned either resolved or has been allocated.
assert_eq!(tempids.len(), tempid_set.inner.len());
for tempid in &tempid_set.inner {
assert!(tempids.contains_key(&**tempid));
}
/// Assertions that are :db.cardinality/one and not :db.fulltext. /// Assertions that are :db.cardinality/one and not :db.fulltext.
let mut non_fts_one: Vec<db::ReducedEntity> = vec![]; let mut non_fts_one: Vec<db::ReducedEntity> = vec![];
@ -333,6 +366,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
Ok(TxReport { Ok(TxReport {
tx_id: self.tx_id, tx_id: self.tx_id,
tx_instant: self.tx_instant, tx_instant: self.tx_instant,
tempids: tempids,
}) })
} }
} }

View file

@ -88,4 +88,11 @@ pub struct TxReport {
/// This is milliseconds after the Unix epoch according to the transactor's local clock. /// This is milliseconds after the Unix epoch according to the transactor's local clock.
// TODO: :db.type/instant. // TODO: :db.type/instant.
pub tx_instant: i64, pub tx_instant: i64,
/// A map from string literal tempid to resolved or allocated entid.
///
/// Every string literal tempid presented to the transactor either resolves via upsert to an
/// existing entid, or is allocated a new entid. (It is possible for multiple distinct string
/// literal tempids to all unify to a single freshly allocated entid.)
pub tempids: BTreeMap<String, Entid>,
} }