Collect tempids after upsert resolution. Fixes #299. (#365) r=rnewman

* Test collecting tempids after upsert resolution. Fixes #299.

I just didn't finish and expose the tempid collection when I
implemented upsert resolution.  Here it is!

* Review comment: Take ownership of temp_id_map; avoid contains_key().
This commit is contained in:
Nick Alexander 2017-03-20 11:34:38 -07:00 committed by GitHub
parent 1801db2a77
commit 8beea55e39
3 changed files with 85 additions and 22 deletions

View file

@ -860,6 +860,7 @@ mod tests {
use edn;
use mentat_tx_parser;
use rusqlite;
use std::collections::BTreeMap;
use types::TxReport;
use tx;
@ -934,6 +935,14 @@ mod tests {
}
}
fn tempids(report: &TxReport) -> edn::Value {
let mut map: BTreeMap<edn::Value, edn::Value> = BTreeMap::default();
for (tempid, &entid) in report.tempids.iter() {
map.insert(edn::Value::Text(tempid.clone()), edn::Value::Integer(entid));
}
edn::Value::Map(map)
}
#[test]
fn test_open_current_version() {
// TODO: figure out how to reference the fixtures directory for real. For now, assume we're
@ -1086,7 +1095,6 @@ mod tests {
#[test]
fn test_upsert_vector() {
// TODO: assert the tempids allocated throughout.
let mut conn = TestConn::default();
// Insert some :db.unique/identity elements.
@ -1101,7 +1109,7 @@ mod tests {
[101 :db/ident :name/Petr]]");
// Upserting two tempids to the same entid works.
conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
[:db/add \"t1\" :db.schema/attribute 100]
[:db/add \"t2\" :db/ident :name/Petr]
[:db/add \"t2\" :db.schema/attribute 101]]").unwrap();
@ -1114,10 +1122,13 @@ mod tests {
[100 :db.schema/attribute 100]
[101 :db/ident :name/Petr]
[101 :db.schema/attribute 101]]");
assert_matches!(tempids(&report),
"{\"t1\" 100
\"t2\" 101}");
// Upserting a tempid works. The ref doesn't have to exist (at this time), but we can't
// reuse an existing ref due to :db/unique :db.unique/value.
conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
[:db/add \"t1\" :db.schema/attribute 102]]").unwrap();
assert_matches!(conn.last_transaction(),
"[[100 :db.schema/attribute 102 ?tx true]
@ -1128,12 +1139,18 @@ mod tests {
[100 :db.schema/attribute 102]
[101 :db/ident :name/Petr]
[101 :db.schema/attribute 101]]");
assert_matches!(tempids(&report),
"{\"t1\" 100}");
// A single complex upsert allocates a new entid.
conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap();
let report = conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap();
assert_matches!(conn.last_transaction(),
"[[65536 :db.schema/attribute 65537 ?tx true]
[?tx :db/txInstant ?ms ?tx true]]");
assert_matches!(tempids(&report),
"{\"t1\" 65536
\"t2\" 65537}");
// Conflicting upserts fail.
let err = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
@ -1146,19 +1163,24 @@ mod tests {
// tempids in :db/retract that do upsert are retracted. The ref given doesn't exist, so the
// assertion will be ignored.
conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
[:db/retract \"t1\" :db.schema/attribute 103]]").unwrap();
assert_matches!(conn.last_transaction(),
"[[?tx :db/txInstant ?ms ?tx true]]");
assert_matches!(tempids(&report),
"{\"t1\" 100}");
// A multistep upsert. The upsert algorithm will first try to resolve "t1", fail, and then
// allocate both "t1" and "t2".
conn.transact("[[:db/add \"t1\" :db/ident :name/Josef]
let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Josef]
[:db/add \"t2\" :db.schema/attribute \"t1\"]]").unwrap();
assert_matches!(conn.last_transaction(),
"[[65538 :db/ident :name/Josef ?tx true]
[65539 :db.schema/attribute 65538 ?tx true]
[?tx :db/txInstant ?ms ?tx true]]");
assert_matches!(tempids(&report),
"{\"t1\" 65538
\"t2\" 65539}");
// A multistep insert. This time, we can resolve both, but we have to try "t1", succeed,
// and then resolve "t2".

View file

@ -47,7 +47,10 @@
use std;
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::collections::{
BTreeMap,
BTreeSet,
};
use ::{to_namespaced_keyword};
use db;
@ -164,9 +167,12 @@ impl<'conn, 'a> Tx<'conn, 'a> {
/// Pipeline stage 1: convert `Entity` instances into `Term` instances, ready for term
/// rewriting.
///
/// The `Term` instances produce share interned TempId and LookupRef handles.
fn entities_into_terms_with_temp_ids_and_lookup_refs<I>(&self, entities: I) -> Result<Vec<TermWithTempIdsAndLookupRefs>> where I: IntoIterator<Item=Entity> {
/// The `Term` instances produce share interned TempId and LookupRef handles, and we return the
/// interned handle sets so that consumers can ensure all handles are used appropriately.
fn entities_into_terms_with_temp_ids_and_lookup_refs<I>(&self, entities: I) -> Result<(Vec<TermWithTempIdsAndLookupRefs>, intern_set::InternSet<String>, intern_set::InternSet<AVPair>)> where I: IntoIterator<Item=Entity> {
let mut temp_ids = intern_set::InternSet::new();
// #183: We don't yet support lookup refs, so this isn't mutable. Later, it'll be mutable.
let lookup_refs = intern_set::InternSet::new();
entities.into_iter()
.map(|entity: Entity| -> Result<TermWithTempIdsAndLookupRefs> {
@ -218,6 +224,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
}
})
.collect::<Result<Vec<_>>>()
.map(|terms| (terms, temp_ids, lookup_refs))
}
/// Pipeline stage 2: rewrite `Term` instances with lookup refs into `Term` instances without
@ -242,16 +249,13 @@ impl<'conn, 'a> Tx<'conn, 'a> {
// TODO: move this to the transactor layer.
pub fn transact_entities<I>(&mut self, entities: I) -> Result<TxReport> where I: IntoIterator<Item=Entity> {
// TODO: push these into an internal transaction report?
let mut tempids: BTreeMap<String, Entid> = BTreeMap::default();
// We don't yet support lookup refs, so this isn't mutable. Later, it'll be mutable.
let lookup_refs: intern_set::InternSet<AVPair> = intern_set::InternSet::new();
// TODO: extract the tempids set as well.
// Pipeline stage 1: entities -> terms with tempids and lookup refs.
let terms_with_temp_ids_and_lookup_refs = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?;
let (terms_with_temp_ids_and_lookup_refs, tempid_set, lookup_ref_set) = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?;
// Pipeline stage 2: resolve lookup refs -> terms with tempids.
let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_refs.inner.iter().map(|rc| &**rc).collect();
let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_ref_set.inner.iter().map(|rc| &**rc).collect();
let lookup_ref_map: AVMap = self.store.resolve_avs(&lookup_ref_avs[..])?;
let terms_with_temp_ids = self.resolve_lookup_refs(&lookup_ref_map, terms_with_temp_ids_and_lookup_refs)?;
@ -263,8 +267,24 @@ impl<'conn, 'a> Tx<'conn, 'a> {
// And evolve them forward.
while generation.can_evolve() {
// Evolve further.
let temp_id_map = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?;
let temp_id_map: TempIdMap = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?;
generation = generation.evolve_one_step(&temp_id_map);
// Report each tempid that resolves via upsert.
for (tempid, entid) in temp_id_map {
// Every tempid should be resolved at most once. Prima facie, we might expect a
// tempid to be resolved in two different generations. However, that is not so: the
// keys of temp_id_map are unique between generations.Suppose that id->e and id->e*
// are two such mappings, resolved on subsequent evolutionary steps, and that `id`
// is a key in the intersection of the two key sets. This can't happen: if `id` maps
// to `e` via id->e, all instances of `id` have been evolved forward (replaced with
// `e`) before we try to resolve the next set of `UpsertsE`. That is, we'll never
// successfully upsert the same tempid in more than one generation step. (We might
// upsert the same tempid to multiple entids via distinct `[a v]` pairs in a single
// generation step; in this case, the transaction will fail.)
let previous = tempids.insert((*tempid).clone(), entid);
assert!(previous.is_none());
}
}
// Allocate entids for tempids that didn't upsert. BTreeSet rather than HashSet so this is deterministic.
@ -277,6 +297,19 @@ impl<'conn, 'a> Tx<'conn, 'a> {
let final_populations = generation.into_final_populations(&temp_id_allocations)?;
// Report each tempid that is allocated.
for (tempid, &entid) in &temp_id_allocations {
// Every tempid should be allocated at most once.
assert!(!tempids.contains_key(&**tempid));
tempids.insert((**tempid).clone(), entid);
}
// Verify that every tempid we interned either resolved or has been allocated.
assert_eq!(tempids.len(), tempid_set.inner.len());
for tempid in &tempid_set.inner {
assert!(tempids.contains_key(&**tempid));
}
/// Assertions that are :db.cardinality/one and not :db.fulltext.
let mut non_fts_one: Vec<db::ReducedEntity> = vec![];
@ -333,6 +366,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
Ok(TxReport {
tx_id: self.tx_id,
tx_instant: self.tx_instant,
tempids: tempids,
})
}
}

View file

@ -88,4 +88,11 @@ pub struct TxReport {
/// This is milliseconds after the Unix epoch according to the transactor's local clock.
// TODO: :db.type/instant.
pub tx_instant: i64,
/// A map from string literal tempid to resolved or allocated entid.
///
/// Every string literal tempid presented to the transactor either resolves via upsert to an
/// existing entid, or is allocated a new entid. (It is possible for multiple distinct string
/// literal tempids to all unify to a single freshly allocated entid.)
pub tempids: BTreeMap<String, Entid>,
}