Collect tempids after upsert resolution. Fixes #299. (#365) r=rnewman

* Test collecting tempids after upsert resolution. Fixes #299. I just didn't finish and expose the tempid collection when I implemented upsert resolution. Here it is! * Review comment: Take ownership of temp_id_map; avoid contains_key().
2017-03-20 11:34:38 -07:00 · 2017-03-20 11:34:38 -07:00 · 8beea55e39
commit 8beea55e39
parent 1801db2a77
3 changed files with 85 additions and 22 deletions
--- a/db/src/db.rs
+++ b/db/src/db.rs
@ -860,6 +860,7 @@ mod tests {
    use edn;
    use mentat_tx_parser;
    use rusqlite;
+    use std::collections::BTreeMap;
    use types::TxReport;
    use tx;

@ -934,6 +935,14 @@ mod tests {
        }
    }

+    fn tempids(report: &TxReport) -> edn::Value {
+        let mut map: BTreeMap<edn::Value, edn::Value> = BTreeMap::default();
+        for (tempid, &entid) in report.tempids.iter() {
+            map.insert(edn::Value::Text(tempid.clone()), edn::Value::Integer(entid));
+        }
+        edn::Value::Map(map)
+    }
+
    #[test]
    fn test_open_current_version() {
        // TODO: figure out how to reference the fixtures directory for real.  For now, assume we're
@ -1086,7 +1095,6 @@ mod tests {

    #[test]
    fn test_upsert_vector() {
-        // TODO: assert the tempids allocated throughout.
        let mut conn = TestConn::default();

        // Insert some :db.unique/identity elements.
@ -1101,10 +1109,10 @@ mod tests {
                          [101 :db/ident :name/Petr]]");

        // Upserting two tempids to the same entid works.
-        conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
-                        [:db/add \"t1\" :db.schema/attribute 100]
-                        [:db/add \"t2\" :db/ident :name/Petr]
-                        [:db/add \"t2\" :db.schema/attribute 101]]").unwrap();
+        let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
+                                     [:db/add \"t1\" :db.schema/attribute 100]
+                                     [:db/add \"t2\" :db/ident :name/Petr]
+                                     [:db/add \"t2\" :db.schema/attribute 101]]").unwrap();
        assert_matches!(conn.last_transaction(),
                        "[[100 :db.schema/attribute 100 ?tx true]
                          [101 :db.schema/attribute 101 ?tx true]
@ -1114,11 +1122,14 @@ mod tests {
                          [100 :db.schema/attribute 100]
                          [101 :db/ident :name/Petr]
                          [101 :db.schema/attribute 101]]");
+        assert_matches!(tempids(&report),
+                        "{\"t1\" 100
+                          \"t2\" 101}");

        // Upserting a tempid works.  The ref doesn't have to exist (at this time), but we can't
        // reuse an existing ref due to :db/unique :db.unique/value.
-        conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
-                        [:db/add \"t1\" :db.schema/attribute 102]]").unwrap();
+        let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
+                                     [:db/add \"t1\" :db.schema/attribute 102]]").unwrap();
        assert_matches!(conn.last_transaction(),
                        "[[100 :db.schema/attribute 102 ?tx true]
                          [?tx :db/txInstant ?ms ?tx true]]");
@ -1128,12 +1139,18 @@ mod tests {
                          [100 :db.schema/attribute 102]
                          [101 :db/ident :name/Petr]
                          [101 :db.schema/attribute 101]]");
+        assert_matches!(tempids(&report),
+                        "{\"t1\" 100}");
+

        // A single complex upsert allocates a new entid.
-        conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap();
+        let report = conn.transact("[[:db/add \"t1\" :db.schema/attribute \"t2\"]]").unwrap();
        assert_matches!(conn.last_transaction(),
                        "[[65536 :db.schema/attribute 65537 ?tx true]
                          [?tx :db/txInstant ?ms ?tx true]]");
+        assert_matches!(tempids(&report),
+                        "{\"t1\" 65536
+                          \"t2\" 65537}");

        // Conflicting upserts fail.
        let err = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
@ -1146,19 +1163,24 @@ mod tests {

        // tempids in :db/retract that do upsert are retracted.  The ref given doesn't exist, so the
        // assertion will be ignored.
-        conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
-                        [:db/retract \"t1\" :db.schema/attribute 103]]").unwrap();
+        let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Ivan]
+                                     [:db/retract \"t1\" :db.schema/attribute 103]]").unwrap();
        assert_matches!(conn.last_transaction(),
                        "[[?tx :db/txInstant ?ms ?tx true]]");
+        assert_matches!(tempids(&report),
+                        "{\"t1\" 100}");

        // A multistep upsert.  The upsert algorithm will first try to resolve "t1", fail, and then
        // allocate both "t1" and "t2".
-        conn.transact("[[:db/add \"t1\" :db/ident :name/Josef]
-                        [:db/add \"t2\" :db.schema/attribute \"t1\"]]").unwrap();
+        let report = conn.transact("[[:db/add \"t1\" :db/ident :name/Josef]
+                                     [:db/add \"t2\" :db.schema/attribute \"t1\"]]").unwrap();
        assert_matches!(conn.last_transaction(),
                        "[[65538 :db/ident :name/Josef ?tx true]
                          [65539 :db.schema/attribute 65538 ?tx true]
                          [?tx :db/txInstant ?ms ?tx true]]");
+        assert_matches!(tempids(&report),
+                        "{\"t1\" 65538
+                          \"t2\" 65539}");

        // A multistep insert.  This time, we can resolve both, but we have to try "t1", succeed,
        // and then resolve "t2".
--- a/db/src/tx.rs
+++ b/db/src/tx.rs
@ -47,7 +47,10 @@

 use std;
 use std::borrow::Cow;
-use std::collections::BTreeSet;
+use std::collections::{
+    BTreeMap,
+    BTreeSet,
+};

 use ::{to_namespaced_keyword};
 use db;
@ -164,9 +167,12 @@ impl<'conn, 'a> Tx<'conn, 'a> {
    /// Pipeline stage 1: convert `Entity` instances into `Term` instances, ready for term
    /// rewriting.
    ///
-    /// The `Term` instances produce share interned TempId and LookupRef handles.
-    fn entities_into_terms_with_temp_ids_and_lookup_refs<I>(&self, entities: I) -> Result<Vec<TermWithTempIdsAndLookupRefs>> where I: IntoIterator<Item=Entity> {
+    /// The `Term` instances produce share interned TempId and LookupRef handles, and we return the
+    /// interned handle sets so that consumers can ensure all handles are used appropriately.
+    fn entities_into_terms_with_temp_ids_and_lookup_refs<I>(&self, entities: I) -> Result<(Vec<TermWithTempIdsAndLookupRefs>, intern_set::InternSet<String>, intern_set::InternSet<AVPair>)> where I: IntoIterator<Item=Entity> {
        let mut temp_ids = intern_set::InternSet::new();
+        // #183: We don't yet support lookup refs, so this isn't mutable.  Later, it'll be mutable.
+        let lookup_refs = intern_set::InternSet::new();

        entities.into_iter()
            .map(|entity: Entity| -> Result<TermWithTempIdsAndLookupRefs> {
@ -218,6 +224,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
                }
            })
            .collect::<Result<Vec<_>>>()
+            .map(|terms| (terms, temp_ids, lookup_refs))
    }

    /// Pipeline stage 2: rewrite `Term` instances with lookup refs into `Term` instances without
@ -242,16 +249,13 @@ impl<'conn, 'a> Tx<'conn, 'a> {
    // TODO: move this to the transactor layer.
    pub fn transact_entities<I>(&mut self, entities: I) -> Result<TxReport> where I: IntoIterator<Item=Entity> {
        // TODO: push these into an internal transaction report?
+        let mut tempids: BTreeMap<String, Entid> = BTreeMap::default();

-        // We don't yet support lookup refs, so this isn't mutable.  Later, it'll be mutable.
-        let lookup_refs: intern_set::InternSet<AVPair> = intern_set::InternSet::new();
-
-        // TODO: extract the tempids set as well.
        // Pipeline stage 1: entities -> terms with tempids and lookup refs.
-        let terms_with_temp_ids_and_lookup_refs = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?;
+        let (terms_with_temp_ids_and_lookup_refs, tempid_set, lookup_ref_set) = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?;

        // Pipeline stage 2: resolve lookup refs -> terms with tempids.
-        let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_refs.inner.iter().map(|rc| &**rc).collect();
+        let lookup_ref_avs: Vec<&(i64, TypedValue)> = lookup_ref_set.inner.iter().map(|rc| &**rc).collect();
        let lookup_ref_map: AVMap = self.store.resolve_avs(&lookup_ref_avs[..])?;

        let terms_with_temp_ids = self.resolve_lookup_refs(&lookup_ref_map, terms_with_temp_ids_and_lookup_refs)?;
@ -263,8 +267,24 @@ impl<'conn, 'a> Tx<'conn, 'a> {
        // And evolve them forward.
        while generation.can_evolve() {
            // Evolve further.
-            let temp_id_map = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?;
+            let temp_id_map: TempIdMap = self.resolve_temp_id_avs(&generation.temp_id_avs()[..])?;
            generation = generation.evolve_one_step(&temp_id_map);
+
+            // Report each tempid that resolves via upsert.
+            for (tempid, entid) in temp_id_map {
+                // Every tempid should be resolved at most once.  Prima facie, we might expect a
+                // tempid to be resolved in two different generations.  However, that is not so: the
+                // keys of temp_id_map are unique between generations.Suppose that id->e and id->e*
+                // are two such mappings, resolved on subsequent evolutionary steps, and that `id`
+                // is a key in the intersection of the two key sets. This can't happen: if `id` maps
+                // to `e` via id->e, all instances of `id` have been evolved forward (replaced with
+                // `e`) before we try to resolve the next set of `UpsertsE`.  That is, we'll never
+                // successfully upsert the same tempid in more than one generation step.  (We might
+                // upsert the same tempid to multiple entids via distinct `[a v]` pairs in a single
+                // generation step; in this case, the transaction will fail.)
+                let previous = tempids.insert((*tempid).clone(), entid);
+                assert!(previous.is_none());
+            }
        }

        // Allocate entids for tempids that didn't upsert.  BTreeSet rather than HashSet so this is deterministic.
@ -277,6 +297,19 @@ impl<'conn, 'a> Tx<'conn, 'a> {

        let final_populations = generation.into_final_populations(&temp_id_allocations)?;

+        // Report each tempid that is allocated.
+        for (tempid, &entid) in &temp_id_allocations {
+            // Every tempid should be allocated at most once.
+            assert!(!tempids.contains_key(&**tempid));
+            tempids.insert((**tempid).clone(), entid);
+        }
+
+        // Verify that every tempid we interned either resolved or has been allocated.
+        assert_eq!(tempids.len(), tempid_set.inner.len());
+        for tempid in &tempid_set.inner {
+            assert!(tempids.contains_key(&**tempid));
+        }
+
        /// Assertions that are :db.cardinality/one and not :db.fulltext.
        let mut non_fts_one: Vec<db::ReducedEntity> = vec![];

@ -333,6 +366,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
        Ok(TxReport {
            tx_id: self.tx_id,
            tx_instant: self.tx_instant,
+            tempids: tempids,
        })
    }
 }
--- a/db/src/types.rs
+++ b/db/src/types.rs
@ -88,4 +88,11 @@ pub struct TxReport {
    /// This is milliseconds after the Unix epoch according to the transactor's local clock.
    // TODO: :db.type/instant.
    pub tx_instant: i64,
+
+    /// A map from string literal tempid to resolved or allocated entid.
+    ///
+    /// Every string literal tempid presented to the transactor either resolves via upsert to an
+    /// existing entid, or is allocated a new entid.  (It is possible for multiple distinct string
+    /// literal tempids to all unify to a single freshly allocated entid.)
+    pub tempids: BTreeMap<String, Entid>,
 }