From 8beea55e398d68c69c20a24af7bf2c3b77bf1180 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 20 Mar 2017 11:34:38 -0700 Subject: [PATCH] Collect tempids after upsert resolution. Fixes #299. (#365) r=rnewman * Test collecting tempids after upsert resolution. Fixes #299. I just didn't finish and expose the tempid collection when I implemented upsert resolution. Here it is! * Review comment: Take ownership of temp_id_map; avoid contains_key(). --- db/src/db.rs | 46 ++++++++++++++++++++++++++++++----------- db/src/tx.rs | 54 ++++++++++++++++++++++++++++++++++++++++--------- db/src/types.rs | 7 +++++++ 3 files changed, 85 insertions(+), 22 deletions(-) diff --git a/db/src/db.rs b/db/src/db.rs index ac672067..f791bae9 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -860,6 +860,7 @@ mod tests { use edn; use mentat_tx_parser; use rusqlite; + use std::collections::BTreeMap; use types::TxReport; use tx; @@ -934,6 +935,14 @@ mod tests { } } + fn tempids(report: &TxReport) -> edn::Value { + let mut map: BTreeMap = BTreeMap::default(); + for (tempid, &entid) in report.tempids.iter() { + map.insert(edn::Value::Text(tempid.clone()), edn::Value::Integer(entid)); + } + edn::Value::Map(map) + } + #[test] fn test_open_current_version() { // TODO: figure out how to reference the fixtures directory for real. For now, assume we're @@ -1086,7 +1095,6 @@ mod tests { #[test] fn test_upsert_vector() { - // TODO: assert the tempids allocated throughout. let mut conn = TestConn::default(); // Insert some :db.unique/identity elements. @@ -1101,10 +1109,10 @@ mod tests { [101 :db/ident :name/Petr]]"); // Upserting two tempids to the same entid works. - conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] - [:db/add \"t1\" :db.schema/attribute 100] - [:db/add \"t2\" :db/ident :name/Petr] - [:db/add \"t2\" :db.schema/attribute 101]]").unwrap(); + let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] + [:db/add \"t1\" :db.schema/attribute 100] + [:db/add \"t2\" :db/ident :name/Petr] + [:db/add \"t2\" :db.schema/attribute 101]]").unwrap(); assert_matches!(conn.last_transaction(), "[[100 :db.schema/attribute 100 ?tx true] [101 :db.schema/attribute 101 ?tx true] @@ -1114,11 +1122,14 @@ mod tests { [100 :db.schema/attribute 100] [101 :db/ident :name/Petr] [101 :db.schema/attribute 101]]"); + assert_matches!(tempids(&report), + "{\"t1\" 100 + \"t2\" 101}"); // Upserting a tempid works. The ref doesn't have to exist (at this time), but we can't // reuse an existing ref due to :db/unique :db.unique/value. - conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] - [:db/add \"t1\" :db.schema/attribute 102]]").unwrap(); + let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] + [:db/add \"t1\" :db.schema/attribute 102]]").unwrap(); assert_matches!(conn.last_transaction(), "[[100 :db.schema/attribute 102 ?tx true] [?tx :db/txInstant ?ms ?tx true]]"); @@ -1128,12 +1139,18 @@ mod tests { [100 :db.schema/attribute 102] [101 :db/ident :name/Petr] [101 :db.schema/attribute 101]]"); + assert_matches!(tempids(&report), + "{\"t1\" 100}"); + // A single complex upsert allocates a new entid. - conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap(); + let report = conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap(); assert_matches!(conn.last_transaction(), "[[65536 :db.schema/attribute 65537 ?tx true] [?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(tempids(&report), + "{\"t1\" 65536 + \"t2\" 65537}"); // Conflicting upserts fail. let err = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] @@ -1146,19 +1163,24 @@ mod tests { // tempids in :db/retract that do upsert are retracted. The ref given doesn't exist, so the // assertion will be ignored. - conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] - [:db/retract \"t1\" :db.schema/attribute 103]]").unwrap(); + let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan] + [:db/retract \"t1\" :db.schema/attribute 103]]").unwrap(); assert_matches!(conn.last_transaction(), "[[?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(tempids(&report), + "{\"t1\" 100}"); // A multistep upsert. The upsert algorithm will first try to resolve "t1", fail, and then // allocate both "t1" and "t2". - conn.transact("[[:db/add \"t1\" :db/ident :name/Josef] - [:db/add \"t2\" :db.schema/attribute \"t1\"]]").unwrap(); + let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Josef] + [:db/add \"t2\" :db.schema/attribute \"t1\"]]").unwrap(); assert_matches!(conn.last_transaction(), "[[65538 :db/ident :name/Josef ?tx true] [65539 :db.schema/attribute 65538 ?tx true] [?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(tempids(&report), + "{\"t1\" 65538 + \"t2\" 65539}"); // A multistep insert. This time, we can resolve both, but we have to try "t1", succeed, // and then resolve "t2". diff --git a/db/src/tx.rs b/db/src/tx.rs index 066a911f..e086f9dd 100644 --- a/db/src/tx.rs +++ b/db/src/tx.rs @@ -47,7 +47,10 @@ use std; use std::borrow::Cow; -use std::collections::BTreeSet; +use std::collections::{ + BTreeMap, + BTreeSet, +}; use ::{to_namespaced_keyword}; use db; @@ -164,9 +167,12 @@ impl<'conn, 'a> Tx<'conn, 'a> { /// Pipeline stage 1: convert `Entity` instances into `Term` instances, ready for term /// rewriting. /// - /// The `Term` instances produce share interned TempId and LookupRef handles. - fn entities_into_terms_with_temp_ids_and_lookup_refs(&self, entities: I) -> Result> where I: IntoIterator { + /// The `Term` instances produce share interned TempId and LookupRef handles, and we return the + /// interned handle sets so that consumers can ensure all handles are used appropriately. + fn entities_into_terms_with_temp_ids_and_lookup_refs(&self, entities: I) -> Result<(Vec, intern_set::InternSet, intern_set::InternSet)> where I: IntoIterator { let mut temp_ids = intern_set::InternSet::new(); + // #183: We don't yet support lookup refs, so this isn't mutable. Later, it'll be mutable. + let lookup_refs = intern_set::InternSet::new(); entities.into_iter() .map(|entity: Entity| -> Result { @@ -218,6 +224,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { } }) .collect::>>() + .map(|terms| (terms, temp_ids, lookup_refs)) } /// Pipeline stage 2: rewrite `Term` instances with lookup refs into `Term` instances without @@ -242,16 +249,13 @@ impl<'conn, 'a> Tx<'conn, 'a> { // TODO: move this to the transactor layer. pub fn transact_entities(&mut self, entities: I) -> Result where I: IntoIterator { // TODO: push these into an internal transaction report? + let mut tempids: BTreeMap = BTreeMap::default(); - // We don't yet support lookup refs, so this isn't mutable. Later, it'll be mutable. - let lookup_refs: intern_set::InternSet = intern_set::InternSet::new(); - - // TODO: extract the tempids set as well. // Pipeline stage 1: entities -> terms with tempids and lookup refs. - let terms_with_temp_ids_and_lookup_refs = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?; + let (terms_with_temp_ids_and_lookup_refs, tempid_set, lookup_ref_set) = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?; // Pipeline stage 2: resolve lookup refs -> terms with tempids. - let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_refs.inner.iter().map(|rc| &**rc).collect(); + let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_ref_set.inner.iter().map(|rc| &**rc).collect(); let lookup_ref_map: AVMap = self.store.resolve_avs(&lookup_ref_avs[..])?; let terms_with_temp_ids = self.resolve_lookup_refs(&lookup_ref_map, terms_with_temp_ids_and_lookup_refs)?; @@ -263,8 +267,24 @@ impl<'conn, 'a> Tx<'conn, 'a> { // And evolve them forward. while generation.can_evolve() { // Evolve further. - let temp_id_map = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?; + let temp_id_map: TempIdMap = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?; generation = generation.evolve_one_step(&temp_id_map); + + // Report each tempid that resolves via upsert. + for (tempid, entid) in temp_id_map { + // Every tempid should be resolved at most once. Prima facie, we might expect a + // tempid to be resolved in two different generations. However, that is not so: the + // keys of temp_id_map are unique between generations.Suppose that id->e and id->e* + // are two such mappings, resolved on subsequent evolutionary steps, and that `id` + // is a key in the intersection of the two key sets. This can't happen: if `id` maps + // to `e` via id->e, all instances of `id` have been evolved forward (replaced with + // `e`) before we try to resolve the next set of `UpsertsE`. That is, we'll never + // successfully upsert the same tempid in more than one generation step. (We might + // upsert the same tempid to multiple entids via distinct `[a v]` pairs in a single + // generation step; in this case, the transaction will fail.) + let previous = tempids.insert((*tempid).clone(), entid); + assert!(previous.is_none()); + } } // Allocate entids for tempids that didn't upsert. BTreeSet rather than HashSet so this is deterministic. @@ -277,6 +297,19 @@ impl<'conn, 'a> Tx<'conn, 'a> { let final_populations = generation.into_final_populations(&temp_id_allocations)?; + // Report each tempid that is allocated. + for (tempid, &entid) in &temp_id_allocations { + // Every tempid should be allocated at most once. + assert!(!tempids.contains_key(&**tempid)); + tempids.insert((**tempid).clone(), entid); + } + + // Verify that every tempid we interned either resolved or has been allocated. + assert_eq!(tempids.len(), tempid_set.inner.len()); + for tempid in &tempid_set.inner { + assert!(tempids.contains_key(&**tempid)); + } + /// Assertions that are :db.cardinality/one and not :db.fulltext. let mut non_fts_one: Vec = vec![]; @@ -333,6 +366,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { Ok(TxReport { tx_id: self.tx_id, tx_instant: self.tx_instant, + tempids: tempids, }) } } diff --git a/db/src/types.rs b/db/src/types.rs index 0403f47a..ab73da52 100644 --- a/db/src/types.rs +++ b/db/src/types.rs @@ -88,4 +88,11 @@ pub struct TxReport { /// This is milliseconds after the Unix epoch according to the transactor's local clock. // TODO: :db.type/instant. pub tx_instant: i64, + + /// A map from string literal tempid to resolved or allocated entid. + /// + /// Every string literal tempid presented to the transactor either resolves via upsert to an + /// existing entid, or is allocated a new entid. (It is possible for multiple distinct string + /// literal tempids to all unify to a single freshly allocated entid.) + pub tempids: BTreeMap, }