From bc011bbf43d865d2003ebed26eaf512c7ef2224e Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Tue, 4 Oct 2016 11:34:28 -0700 Subject: [PATCH 01/10] Pre: Add util/group-by-kv. --- src/common/datomish/util.cljc | 14 ++++++++++++++ test/datomish/util_test.cljc | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/src/common/datomish/util.cljc b/src/common/datomish/util.cljc index 13a9ee09..dde3955b 100644 --- a/src/common/datomish/util.cljc +++ b/src/common/datomish/util.cljc @@ -154,3 +154,17 @@ (defn unlimited-buffer [] (UnlimitedBuffer. #?(:cljs (array) :clj (java.util.LinkedList.)))) + +(defn group-by-kv + "Returns a map of the elements of coll keyed by the first element of + the result of f on each element. The value at each key will be a + vector of the second element of the result of f on the corresponding + elements, in the order they appeared in coll." + {:static true} + [f coll] + (persistent! + (reduce + (fn [ret x] + (let [[k v] (f x)] + (assoc! ret k (conj (get ret k []) v)))) + (transient {}) coll))) diff --git a/test/datomish/util_test.cljc b/test/datomish/util_test.cljc index 87f25ce1..f1e215b2 100644 --- a/test/datomish/util_test.cljc +++ b/test/datomish/util_test.cljc @@ -46,3 +46,8 @@ (is (util/unblocking-chan? (a/chan (a/sliding-buffer 10)))) (is (util/unblocking-chan? (a/chan (util/unlimited-buffer)))) (is (not (util/unblocking-chan? (a/chan (a/buffer 10)))))) + +(deftest test-group-by-kvs + (are [m xs] (= m (util/group-by-kv identity xs)) + {:a [1 2] :b [3]} + [[:a 1] [:a 2] [:b 3]])) From d94dfae01b7011cd1186e32c13a96275d857e3d9 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Wed, 5 Oct 2016 20:51:46 -0700 Subject: [PATCH 02/10] Pre: Add multistep upsert example. --- test/datomish/db_test.cljc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/datomish/db_test.cljc b/test/datomish/db_test.cljc index b6f5ee7e..dd8531bd 100644 --- a/test/datomish/db_test.cljc +++ b/test/datomish/db_test.cljc @@ -214,6 +214,24 @@ [:db/add (d/id-literal :db.part/user -1) :name "Petr"] [:db/add (d/id-literal :db.part/user -1) :age 36]]))))))) +(deftest-db test-multistep-upsert conn + ( (get (tempids tx0) -1) 1000)) + (is (> (get (tempids tx0) -1) 1000)) + + ;; This time, we can resolve both, but we have to try -1, succeed, and then resolve -2. + (let [tx1 ( Date: Wed, 5 Oct 2016 15:29:40 -0700 Subject: [PATCH 03/10] Pre: Make testing consistent by sorting fulltext values before inserting. This sorts fulltext values inserted in a single transaction, not across transactions. This makes the rowids assigned in the fulltext_values table internally consistent, even as the order of entities and datoms changes (as the transaction applying algorithm evolves over time). The test changes simply make the fulltext values sort easily. In theory, these fulltext values could be very large, and sorting might be very expensive. In practice, we expect values to differ in their first few characters, so that this is efficient (i.e., proportional to the number of fulltext values inserted and not their size). --- src/common/datomish/db.cljc | 4 ++-- test/datomish/db_test.cljc | 42 ++++++++++++++++++------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/common/datomish/db.cljc b/src/common/datomish/db.cljc index f0d9c0d5..a76611f5 100644 --- a/src/common/datomish/db.cljc +++ b/src/common/datomish/db.cljc @@ -383,7 +383,7 @@ (ref? a) ; index_vaet (unique? a) ; unique_value tag]])) - ops + (sort-by (fn [[_ _ _ v]] v) ops) ;; Make testing easier by sorting by string values. TODO: discuss expense. (range initial-many-searchid 999999999)) ["UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL"])) @@ -412,7 +412,7 @@ "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0) VALUES " "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, 0, ?)") e a searchid tx tag]])) - ops + (sort-by (fn [[_ _ _ v]] v) ops) ;; Make testing easier by sorting by string values. TODO: discuss expense. (range initial-one-searchid 999999999)) ["UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL"])) diff --git a/test/datomish/db_test.cljc b/test/datomish/db_test.cljc index dd8531bd..3cb47020 100644 --- a/test/datomish/db_test.cljc +++ b/test/datomish/db_test.cljc @@ -644,11 +644,11 @@ (let [tx0 (:tx ( Date: Wed, 12 Oct 2016 11:27:57 -0700 Subject: [PATCH 04/10] Pre: Make = (d/db conn) tx)))))) + + (testing "Fails for missing fulltext entities" + (is (thrown-with-msg? + ExceptionInfo #"No entity found for lookup-ref" + ( Date: Wed, 12 Oct 2016 11:51:17 -0700 Subject: [PATCH 05/10] Rewrite resolve-id-literals to use bulk SQLite schema a v) - yield-datom - (fn [rows] - (when-let [row (first rows)] - (row->Datom schema row)))] - (go-pair - (->> - ;; TODO: generalize columns. - ["SELECT e, a, v, tx, 1 AS added FROM all_datoms - WHERE index_avet = 1 AND a = ? AND value_type_tag = ? AND v = ? - LIMIT 1" a tag v] - - (s/all-rows (:sqlite-connection db)) - id-av (fn [[op id-literal a v]] [id-literal [a v]]) + ;; Like {[id-literal [[:a1 :v1] [:a2 :v2] ...]] ...}. + id->avs (util/group-by-kv ->id-av upserts-e) + ;; Like [[:a1 :v1] [:a2 v2] ...]. + avs (apply concat (vals id->avs)) + ;; Like {[[:a1 :v1] e1] ...}. + av->e (es (fn [avs] (set (keep (partial get av->e) avs))) + + id->es (util/mapvals avs->es id->avs)] + + (into {} + ;; nil is dropped. + (map (fn [[id es]] + (when-let [e (first es)] + (when (second es) + (raise "Conflicting upsert: " id " resolves" + " to more than one entid " es + {:error :transact/upsert :tempid id :entids es})) + [id e]))) + id->es))))) + +(defn evolve-upserts-e [id->e upserts-e] + (let [evolve1 + (fn [[op id-e a v]] + (let [e* (get id->e id-e)] + (if e* + [:upserted [op e* a v]] + [:allocations-e [op id-e a v]])))] + (util/group-by-kv evolve1 upserts-e))) + +(defn evolve-upserts-ev [id->e upserts-ev] + "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map + whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op id-e a id-v]] + (let [e* (get id->e id-e) + v* (get id->e id-v)] + (if e* + (if v* + [:resolved [op e* a v*]] + [:allocations-v [op e* a id-v]]) + (if v* + [:upserts-e [op id-e a v*]] + [:upserts-ev [op id-e a id-v]]))))] + (util/group-by-kv evolve1 upserts-ev))) + +(defn evolve-allocations-e [id->e allocations-e] + "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map + whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op id-e a v]] + (let [e* (get id->e id-e)] + (if e* + [:resolved [op e* a v]] + [:allocations-e [op id-e a v]])))] + (util/group-by-kv evolve1 allocations-e))) + +(defn evolve-allocations-v [id->e allocations-v] + "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map + whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op e a id-v]] + (let [v* (get id->e id-v)] + (if v* + [:resolved [op e a v*]] + [:allocations-v [op e a id-v]])))] + (util/group-by-kv evolve1 allocations-v))) + +(defn evolve-allocations-ev [id->e allocations-ev] + "Given a map id->e of id-literals to integer entids, evolve the entities in allocations-ev. Returns a + map whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op id-e a id-v]] + (let [e* (get id->e id-e) + v* (get id->e id-v)] + (if e* + (if v* + [:resolved [op e* a v*]] + [:allocations-v [op e* a id-v]]) + (if v* + [:allocations-e [op id-e a v*]] + [:allocations-ev [op id-e a id-v]]))))] + (util/group-by-kv evolve1 allocations-ev))) + +(defn e will upsert; some will not. + Some :upserts-e evolve to become actual :upserts (they upserted!); any other :upserts-e evolve to + become :allocations-e (they did not upsert, and will not upsert this transaction). + + Using the newly upserted id-literals, some :upserts-ev evolve to become :resolved; + some :upserts-ev evolve to become :upserts-e; and some :upserts-ev remain :upserts-ev. + + Likewise, some :allocations-ev evolve to become :allocations-e, :allocations-v, or :resolved; some + :allocations-e evolve to become :resolved; and some :allocations-v evolve to become :resolved. + + If we have *new* :upserts-e (i.e., some :upserts-ev become :upserts-e), then we may be able to + make more progress. We recurse, trying to resolve these new :upserts-e. + + Eventually we will have no :upserts-e. At this point, :upserts-ev become :allocations-ev, and now + we have :entities, :upserted, :resolved, and various :allocations-*. + + As a future optimization, :upserts do not need to be inserted; they upserted, so they already + exist in the DB. (We still need to verify uniqueness and ensure no overlapping can occur.) + Similary, :allocations-* do not need to be checked for existence, so they can be written to the DB + faster." + (go-pair + (let [{:keys [upserted resolved upserts-ev upserts-e allocations-ev allocations-e allocations-v entities]} evolution] + (if-not upserts-e + ;; No more progress to be made. Any upserts-ev must just be allocations. + {:allocations-ev (concat upserts-ev allocations-ev) + :allocations-e allocations-e + :allocations-v allocations-v + :upserted upserted + :resolved resolved + :entities entities} + ;; Progress can be made. Try to evolve further. + (let [id->e (e ;; TODO: ensure we handle conflicting upserts across generations correctly here. + :upserted upserted + :resolved resolved + :entities entities} + (evolve-upserts-ev id->e upserts-ev) + (evolve-upserts-e id->e upserts-e) + (evolve-allocations-ev id->e allocations-ev) + (evolve-allocations-e id->e allocations-e) + (evolve-allocations-v id->e allocations-v))))))) + +;; TODO: do this in one step, rather than iterating. +(defn allocate [report evolution] + "Given a maximally evolved map of generations, allocate entids for all id-literals that did not + get upserted." + (let [{:keys [tempids upserted resolved allocations-ev allocations-e allocations-v entities]} evolution + initial-report (assoc report :tempids tempids)] + (loop [report + (assoc initial-report + ;; TODO: drop :upserted, they already exist in the DB; and don't search for + ;; :allocations-*, they definitely don't already exist in the DB. + :entities (concat upserted resolved entities)) + + es + (concat allocations-ev allocations-e allocations-v)] + (let [[[op e a v :as entity] & entities] es] + (cond + (nil? entity) + report + + (id-literal? e) + (let [eid (or (get-in report [:tempids e]) (-next-eid! (:part-map-atom report) e))] + (recur (allocate-eid report e eid) (cons [op eid a v] entities))) + + (id-literal? v) + (let [eid (or (get-in report [:tempids v]) (-next-eid! (:part-map-atom report) v))] + (recur (allocate-eid report v eid) (cons [op e a eid] entities))) + + true + (recur (transact-entity report entity) entities) + ))))) + (defn Date: Thu, 13 Oct 2016 20:37:10 -0700 Subject: [PATCH 06/10] Review comment: style nits. --- src/common/datomish/transact.cljc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/common/datomish/transact.cljc b/src/common/datomish/transact.cljc index aad0f440..309afdfb 100644 --- a/src/common/datomish/transact.cljc +++ b/src/common/datomish/transact.cljc @@ -350,19 +350,16 @@ :entities - not :db/add, or no id-literals." {:pre [(sequential? entity)]} (let [[op e a v] entity - e? (id-literal? e) - a? (id-literal? a) v? (id-literal? v)] - (cond - (not= (first entity) :db/add) ;; TODO: verify no id-literals appear. - :entities - - a? + (when (id-literal? a) (raise "id-literal attributes are not yet supported: " entity {:error :transact/no-id-literal-attributes - :entity entity }) + :entity entity })) + (cond + (not= op :db/add) ;; TODO: verify no id-literals appear. + :entities - e? + (id-literal? e) (if (unique-identity? a) (if v? :upserts-ev @@ -383,7 +380,7 @@ Returns a map of id-literals to integer entids, with keys only those id-literals that mapped to existing entities." (go-pair - (if (seq upserts-e) + (when (seq upserts-e) (let [->id-av (fn [[op id-literal a v]] [id-literal [a v]]) ;; Like {[id-literal [[:a1 :v1] [:a2 :v2] ...]] ...}. id->avs (util/group-by-kv ->id-av upserts-e) From 00c72f9188378d8da6b62ed66de0156271ab357f Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Thu, 13 Oct 2016 20:43:39 -0700 Subject: [PATCH 07/10] Review comment: fix "Like {...}" map examples. --- src/common/datomish/transact.cljc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/datomish/transact.cljc b/src/common/datomish/transact.cljc index 309afdfb..2a744cff 100644 --- a/src/common/datomish/transact.cljc +++ b/src/common/datomish/transact.cljc @@ -282,7 +282,7 @@ {:pre [(db/db? db) (report? report)]} (let [unique-identity? (memoize (partial ds/unique-identity? (db/schema db))) - ;; Map lookup-ref -> entities containing lookup-ref, like {[[:a :v] [[[:a :v] :b :w] ...]] ...}. + ;; Map lookup-ref -> entities containing lookup-ref, like {[:a :v] [[(lookup-ref :a :v) :b :w] ...], ...}. groups (group-by (partial keep db/lookup-ref?) (:entities report)) ;; Entities with no lookup-ref are grouped under the key (lazy-seq). entities (get groups (lazy-seq)) ;; No lookup-refs? Pass through. @@ -382,11 +382,11 @@ (go-pair (when (seq upserts-e) (let [->id-av (fn [[op id-literal a v]] [id-literal [a v]]) - ;; Like {[id-literal [[:a1 :v1] [:a2 :v2] ...]] ...}. + ;; Like {id-literal [[:a1 :v1] [:a2 :v2] ...], ...}. id->avs (util/group-by-kv ->id-av upserts-e) ;; Like [[:a1 :v1] [:a2 v2] ...]. avs (apply concat (vals id->avs)) - ;; Like {[[:a1 :v1] e1] ...}. + ;; Like {[:a1 :v1] e1, ...}. av->e (es (fn [avs] (set (keep (partial get av->e) avs))) From caa9d2d7cb89a60b2658f0db8ea4072eec48fb31 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Thu, 13 Oct 2016 20:55:45 -0700 Subject: [PATCH 08/10] Review comment: prefer dissoc and update to destructuring. --- src/common/datomish/transact.cljc | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/common/datomish/transact.cljc b/src/common/datomish/transact.cljc index 2a744cff..d7dcb17b 100644 --- a/src/common/datomish/transact.cljc +++ b/src/common/datomish/transact.cljc @@ -474,7 +474,8 @@ First, resolve :upserts-e against the database. Some [a v] -> e will upsert; some will not. Some :upserts-e evolve to become actual :upserts (they upserted!); any other :upserts-e evolve to - become :allocations-e (they did not upsert, and will not upsert this transaction). + become :allocations-e (they did not upsert, and will not upsert this transaction). All :upserts-e + will evolve out of the :upserts-e generation: each one upserts or does not. Using the newly upserted id-literals, some :upserts-ev evolve to become :resolved; some :upserts-ev evolve to become :upserts-e; and some :upserts-ev remain :upserts-ev. @@ -490,20 +491,19 @@ As a future optimization, :upserts do not need to be inserted; they upserted, so they already exist in the DB. (We still need to verify uniqueness and ensure no overlapping can occur.) - Similary, :allocations-* do not need to be checked for existence, so they can be written to the DB + Similarly, :allocations-* do not need to be checked for existence, so they can be written to the DB faster." (go-pair - (let [{:keys [upserted resolved upserts-ev upserts-e allocations-ev allocations-e allocations-v entities]} evolution] - (if-not upserts-e + (let [upserts-e (seq (:upserts-e evolution)) + id->e (and upserts-e + (e ;; No more progress to be made. Any upserts-ev must just be allocations. - {:allocations-ev (concat upserts-ev allocations-ev) - :allocations-e allocations-e - :allocations-v allocations-v - :upserted upserted - :resolved resolved - :entities entities} + (update + (dissoc evolution :upserts-ev :upserts-e) + :allocations-ev concat (:upserts-ev evolution)) ;; Progress can be made. Try to evolve further. - (let [id->e (e ;; TODO: ensure we handle conflicting upserts across generations correctly here. From 679ab8cf7d9da77a57368f064498efcd81e2a4ab Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Thu, 13 Oct 2016 21:31:03 -0700 Subject: [PATCH 09/10] Review comment: explain why upserts between generational steps don't conflict. --- src/common/datomish/transact.cljc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/common/datomish/transact.cljc b/src/common/datomish/transact.cljc index d7dcb17b..37296cec 100644 --- a/src/common/datomish/transact.cljc +++ b/src/common/datomish/transact.cljc @@ -506,10 +506,19 @@ (let [{:keys [upserted resolved upserts-ev allocations-ev allocations-e allocations-v entities]} evolution] (merge-with concat - {:tempids id->e ;; TODO: ensure we handle conflicting upserts across generations correctly here. - :upserted upserted + {:upserted upserted :resolved resolved - :entities entities} + :entities entities + ;; The keys of the id->e map are unique between generation steps, so we can simply + ;; concat tempids. Suppose that id->e and id->e* are two such mappings, resolved on + ;; subsequent evolutionary steps, and that id is a key in the intersection of the two + ;; key sets. This can't happen: if id maps to e via id->e, all instances of id have + ;; been evolved forward (replaced with e) before we try to resolve the next set of + ;; :upserts-e. That is, we'll never successfully upsert the same id-literal in more + ;; than one generation step. (We might upsert the same id-literal to multiple entids + ;; via distinct [a v] pairs in a single generation step; in this case, + ;; e} (evolve-upserts-ev id->e upserts-ev) (evolve-upserts-e id->e upserts-e) (evolve-allocations-ev id->e allocations-ev) From 3670c5cce7ce65cee9236d61745327415dd4623b Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Thu, 13 Oct 2016 21:40:33 -0700 Subject: [PATCH 10/10] Review comment: save allocations when evolving. --- src/common/datomish/transact.cljc | 35 ++++++++++++++----------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/common/datomish/transact.cljc b/src/common/datomish/transact.cljc index 37296cec..892ad243 100644 --- a/src/common/datomish/transact.cljc +++ b/src/common/datomish/transact.cljc @@ -406,18 +406,17 @@ (defn evolve-upserts-e [id->e upserts-e] (let [evolve1 - (fn [[op id-e a v]] - (let [e* (get id->e id-e)] - (if e* - [:upserted [op e* a v]] - [:allocations-e [op id-e a v]])))] + (fn [[op id-e a v :as entity]] + (if-let [e* (get id->e id-e)] + [:upserted [op e* a v]] + [:allocations-e entity]))] (util/group-by-kv evolve1 upserts-e))) (defn evolve-upserts-ev [id->e upserts-ev] "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map whose keys are generations and whose values are vectors of entities in those generations." (let [evolve1 - (fn [[op id-e a id-v]] + (fn [[op id-e a id-v :as entity]] (let [e* (get id->e id-e) v* (get id->e id-v)] (if e* @@ -426,36 +425,34 @@ [:allocations-v [op e* a id-v]]) (if v* [:upserts-e [op id-e a v*]] - [:upserts-ev [op id-e a id-v]]))))] + [:upserts-ev entity]))))] (util/group-by-kv evolve1 upserts-ev))) (defn evolve-allocations-e [id->e allocations-e] "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map whose keys are generations and whose values are vectors of entities in those generations." (let [evolve1 - (fn [[op id-e a v]] - (let [e* (get id->e id-e)] - (if e* - [:resolved [op e* a v]] - [:allocations-e [op id-e a v]])))] + (fn [[op id-e a v :as entity]] + (if-let [e* (get id->e id-e)] + [:resolved [op e* a v]] + [:allocations-e entity]))] (util/group-by-kv evolve1 allocations-e))) (defn evolve-allocations-v [id->e allocations-v] "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map whose keys are generations and whose values are vectors of entities in those generations." (let [evolve1 - (fn [[op e a id-v]] - (let [v* (get id->e id-v)] - (if v* - [:resolved [op e a v*]] - [:allocations-v [op e a id-v]])))] + (fn [[op e a id-v :as entity]] + (if-let [v* (get id->e id-v)] + [:resolved [op e a v*]] + [:allocations-v entity]))] (util/group-by-kv evolve1 allocations-v))) (defn evolve-allocations-ev [id->e allocations-ev] "Given a map id->e of id-literals to integer entids, evolve the entities in allocations-ev. Returns a map whose keys are generations and whose values are vectors of entities in those generations." (let [evolve1 - (fn [[op id-e a id-v]] + (fn [[op id-e a id-v :as entity]] (let [e* (get id->e id-e) v* (get id->e id-v)] (if e* @@ -464,7 +461,7 @@ [:allocations-v [op e* a id-v]]) (if v* [:allocations-e [op id-e a v*]] - [:allocations-ev [op id-e a id-v]]))))] + [:allocations-ev entity]))))] (util/group-by-kv evolve1 allocations-ev))) (defn