diff --git a/src/common/datomish/db.cljc b/src/common/datomish/db.cljc index f0d9c0d5..a240ed39 100644 --- a/src/common/datomish/db.cljc +++ b/src/common/datomish/db.cljc @@ -121,10 +121,6 @@ (SQLite schema a v) - yield-datom - (fn [rows] - (when-let [row (first rows)] - (row->Datom schema row)))] - (go-pair - (->> - ;; TODO: generalize columns. - ["SELECT e, a, v, tx, 1 AS added FROM all_datoms - WHERE index_avet = 1 AND a = ? AND value_type_tag = ? AND v = ? - LIMIT 1" a tag v] - - (s/all-rows (:sqlite-connection db)) - entities containing lookup-ref, like {[[:a :v] [[[:a :v] :b :w] ...]] ...}. + ;; Map lookup-ref -> entities containing lookup-ref, like {[:a :v] [[(lookup-ref :a :v) :b :w] ...], ...}. groups (group-by (partial keep db/lookup-ref?) (:entities report)) ;; Entities with no lookup-ref are grouped under the key (lazy-seq). entities (get groups (lazy-seq)) ;; No lookup-refs? Pass through. @@ -335,6 +335,224 @@ (defn- transact-entity [report entity] (update-in report [:entities] conj entity)) +(defn id-literal-generation [unique-identity? entity] + "Group entities possibly containing id-literals into 'generations'. + + Entities are grouped into one of the following generations: + + :upserts-ev - 'complex upserts' that look like [:db/add -1 a -2] where a is :db.unique/identity; + + :upserts-e - 'simple upserts' that look like [:db/add -1 a v] where a is :db.unique/identity; + + :allocations-{e,v,ev} - things like [:db/add -1 b v], [:db/add e b -2], or [:db/add -3 b -4] where + b is *not* :db.unique/identity, or like [:db/add e a -5] where a is :db.unique/identity; + + :entities - not :db/add, or no id-literals." + {:pre [(sequential? entity)]} + (let [[op e a v] entity + v? (id-literal? v)] + (when (id-literal? a) + (raise "id-literal attributes are not yet supported: " entity + {:error :transact/no-id-literal-attributes + :entity entity })) + (cond + (not= op :db/add) ;; TODO: verify no id-literals appear. + :entities + + (id-literal? e) + (if (unique-identity? a) + (if v? + :upserts-ev + :upserts-e) + (if v? + :allocations-ev + :allocations-e)) + + v? + :allocations-v + + true + :entities))) + +(defn id-av (fn [[op id-literal a v]] [id-literal [a v]]) + ;; Like {id-literal [[:a1 :v1] [:a2 :v2] ...], ...}. + id->avs (util/group-by-kv ->id-av upserts-e) + ;; Like [[:a1 :v1] [:a2 v2] ...]. + avs (apply concat (vals id->avs)) + ;; Like {[:a1 :v1] e1, ...}. + av->e (es (fn [avs] (set (keep (partial get av->e) avs))) + + id->es (util/mapvals avs->es id->avs)] + + (into {} + ;; nil is dropped. + (map (fn [[id es]] + (when-let [e (first es)] + (when (second es) + (raise "Conflicting upsert: " id " resolves" + " to more than one entid " es + {:error :transact/upsert :tempid id :entids es})) + [id e]))) + id->es))))) + +(defn evolve-upserts-e [id->e upserts-e] + (let [evolve1 + (fn [[op id-e a v :as entity]] + (if-let [e* (get id->e id-e)] + [:upserted [op e* a v]] + [:allocations-e entity]))] + (util/group-by-kv evolve1 upserts-e))) + +(defn evolve-upserts-ev [id->e upserts-ev] + "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map + whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op id-e a id-v :as entity]] + (let [e* (get id->e id-e) + v* (get id->e id-v)] + (if e* + (if v* + [:resolved [op e* a v*]] + [:allocations-v [op e* a id-v]]) + (if v* + [:upserts-e [op id-e a v*]] + [:upserts-ev entity]))))] + (util/group-by-kv evolve1 upserts-ev))) + +(defn evolve-allocations-e [id->e allocations-e] + "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map + whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op id-e a v :as entity]] + (if-let [e* (get id->e id-e)] + [:resolved [op e* a v]] + [:allocations-e entity]))] + (util/group-by-kv evolve1 allocations-e))) + +(defn evolve-allocations-v [id->e allocations-v] + "Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map + whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op e a id-v :as entity]] + (if-let [v* (get id->e id-v)] + [:resolved [op e a v*]] + [:allocations-v entity]))] + (util/group-by-kv evolve1 allocations-v))) + +(defn evolve-allocations-ev [id->e allocations-ev] + "Given a map id->e of id-literals to integer entids, evolve the entities in allocations-ev. Returns a + map whose keys are generations and whose values are vectors of entities in those generations." + (let [evolve1 + (fn [[op id-e a id-v :as entity]] + (let [e* (get id->e id-e) + v* (get id->e id-v)] + (if e* + (if v* + [:resolved [op e* a v*]] + [:allocations-v [op e* a id-v]]) + (if v* + [:allocations-e [op id-e a v*]] + [:allocations-ev entity]))))] + (util/group-by-kv evolve1 allocations-ev))) + +(defn e will upsert; some will not. + Some :upserts-e evolve to become actual :upserts (they upserted!); any other :upserts-e evolve to + become :allocations-e (they did not upsert, and will not upsert this transaction). All :upserts-e + will evolve out of the :upserts-e generation: each one upserts or does not. + + Using the newly upserted id-literals, some :upserts-ev evolve to become :resolved; + some :upserts-ev evolve to become :upserts-e; and some :upserts-ev remain :upserts-ev. + + Likewise, some :allocations-ev evolve to become :allocations-e, :allocations-v, or :resolved; some + :allocations-e evolve to become :resolved; and some :allocations-v evolve to become :resolved. + + If we have *new* :upserts-e (i.e., some :upserts-ev become :upserts-e), then we may be able to + make more progress. We recurse, trying to resolve these new :upserts-e. + + Eventually we will have no :upserts-e. At this point, :upserts-ev become :allocations-ev, and now + we have :entities, :upserted, :resolved, and various :allocations-*. + + As a future optimization, :upserts do not need to be inserted; they upserted, so they already + exist in the DB. (We still need to verify uniqueness and ensure no overlapping can occur.) + Similarly, :allocations-* do not need to be checked for existence, so they can be written to the DB + faster." + (go-pair + (let [upserts-e (seq (:upserts-e evolution)) + id->e (and upserts-e + (e + ;; No more progress to be made. Any upserts-ev must just be allocations. + (update + (dissoc evolution :upserts-ev :upserts-e) + :allocations-ev concat (:upserts-ev evolution)) + ;; Progress can be made. Try to evolve further. + (let [{:keys [upserted resolved upserts-ev allocations-ev allocations-e allocations-v entities]} evolution] + (merge-with + concat + {:upserted upserted + :resolved resolved + :entities entities + ;; The keys of the id->e map are unique between generation steps, so we can simply + ;; concat tempids. Suppose that id->e and id->e* are two such mappings, resolved on + ;; subsequent evolutionary steps, and that id is a key in the intersection of the two + ;; key sets. This can't happen: if id maps to e via id->e, all instances of id have + ;; been evolved forward (replaced with e) before we try to resolve the next set of + ;; :upserts-e. That is, we'll never successfully upsert the same id-literal in more + ;; than one generation step. (We might upsert the same id-literal to multiple entids + ;; via distinct [a v] pairs in a single generation step; in this case, + ;; e} + (evolve-upserts-ev id->e upserts-ev) + (evolve-upserts-e id->e upserts-e) + (evolve-allocations-ev id->e allocations-ev) + (evolve-allocations-e id->e allocations-e) + (evolve-allocations-v id->e allocations-v))))))) + +;; TODO: do this in one step, rather than iterating. +(defn allocate [report evolution] + "Given a maximally evolved map of generations, allocate entids for all id-literals that did not + get upserted." + (let [{:keys [tempids upserted resolved allocations-ev allocations-e allocations-v entities]} evolution + initial-report (assoc report :tempids tempids)] + (loop [report + (assoc initial-report + ;; TODO: drop :upserted, they already exist in the DB; and don't search for + ;; :allocations-*, they definitely don't already exist in the DB. + :entities (concat upserted resolved entities)) + + es + (concat allocations-ev allocations-e allocations-v)] + (let [[[op e a v :as entity] & entities] es] + (cond + (nil? entity) + report + + (id-literal? e) + (let [eid (or (get-in report [:tempids e]) (-next-eid! (:part-map-atom report) e))] + (recur (allocate-eid report e eid) (cons [op eid a v] entities))) + + (id-literal? v) + (let [eid (or (get-in report [:tempids v]) (-next-eid! (:part-map-atom report) v))] + (recur (allocate-eid report v eid) (cons [op e a eid] entities))) + + true + (recur (transact-entity report entity) entities) + ))))) + (defn (get (tempids tx0) -1) 1000)) + (is (> (get (tempids tx0) -1) 1000)) + + ;; This time, we can resolve both, but we have to try -1, succeed, and then resolve -2. + (let [tx1 (= (d/db conn) tx)))))) + + (testing "Fails for missing fulltext entities" + (is (thrown-with-msg? + ExceptionInfo #"No entity found for lookup-ref" + (