Rewrite id-literal resolution to be faster. r=rnewman (#88)

This commit is contained in:
Nick Alexander 2016-10-14 10:18:30 -07:00
commit 1ddf37163c
5 changed files with 319 additions and 97 deletions

View file

@ -121,10 +121,6 @@
(<bootstrapped? [db]
"Return true if this database has no transactions yet committed.")
(<av
[db a v]
"Search for a single matching datom using the AVET index.")
(<avs
[db avs]
"Search for many matching datoms using the AVET index.
@ -383,7 +379,7 @@
(ref? a) ; index_vaet
(unique? a) ; unique_value
tag]]))
ops
(sort-by (fn [[_ _ _ v]] v) ops) ;; Make testing easier by sorting by string values. TODO: discuss expense.
(range initial-many-searchid 999999999))
["UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL"]))
@ -412,7 +408,7 @@
"INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0) VALUES "
"(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, 0, ?)")
e a searchid tx tag]]))
ops
(sort-by (fn [[_ _ _ v]] v) ops) ;; Make testing easier by sorting by string values. TODO: discuss expense.
(range initial-one-searchid 999999999))
["UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL"]))
@ -642,25 +638,6 @@
(:bootstrapped)
(not= 0))))
(<av [db a v]
(let [schema (.-schema db) ;; TODO: understand why (schema db) fails.
a (entid db a)
[v tag] (ds/->SQLite schema a v)
yield-datom
(fn [rows]
(when-let [row (first rows)]
(row->Datom schema row)))]
(go-pair
(->>
;; TODO: generalize columns.
["SELECT e, a, v, tx, 1 AS added FROM all_datoms
WHERE index_avet = 1 AND a = ? AND value_type_tag = ? AND v = ?
LIMIT 1" a tag v]
(s/all-rows (:sqlite-connection db))
<?
yield-datom))))
(<avs
[db avs]
{:pre [(sequential? avs)]}
@ -691,7 +668,7 @@
(apply str "WITH t(searchid, a, v, value_type_tag) AS (VALUES "
(apply str (repeater (count chunk))) ;; TODO: join?
") SELECT t.searchid, d.e
FROM t, datoms AS d
FROM t, all_datoms AS d
WHERE d.index_avet IS NOT 0 AND d.a = t.a AND d.value_type_tag = t.value_type_tag AND d.v = t.v")
;; Bindings.

View file

@ -282,7 +282,7 @@
{:pre [(db/db? db) (report? report)]}
(let [unique-identity? (memoize (partial ds/unique-identity? (db/schema db)))
;; Map lookup-ref -> entities containing lookup-ref, like {[[:a :v] [[[:a :v] :b :w] ...]] ...}.
;; Map lookup-ref -> entities containing lookup-ref, like {[:a :v] [[(lookup-ref :a :v) :b :w] ...], ...}.
groups (group-by (partial keep db/lookup-ref?) (:entities report))
;; Entities with no lookup-ref are grouped under the key (lazy-seq).
entities (get groups (lazy-seq)) ;; No lookup-refs? Pass through.
@ -335,6 +335,224 @@
(defn- transact-entity [report entity]
(update-in report [:entities] conj entity))
(defn id-literal-generation [unique-identity? entity]
"Group entities possibly containing id-literals into 'generations'.
Entities are grouped into one of the following generations:
:upserts-ev - 'complex upserts' that look like [:db/add -1 a -2] where a is :db.unique/identity;
:upserts-e - 'simple upserts' that look like [:db/add -1 a v] where a is :db.unique/identity;
:allocations-{e,v,ev} - things like [:db/add -1 b v], [:db/add e b -2], or [:db/add -3 b -4] where
b is *not* :db.unique/identity, or like [:db/add e a -5] where a is :db.unique/identity;
:entities - not :db/add, or no id-literals."
{:pre [(sequential? entity)]}
(let [[op e a v] entity
v? (id-literal? v)]
(when (id-literal? a)
(raise "id-literal attributes are not yet supported: " entity
{:error :transact/no-id-literal-attributes
:entity entity }))
(cond
(not= op :db/add) ;; TODO: verify no id-literals appear.
:entities
(id-literal? e)
(if (unique-identity? a)
(if v?
:upserts-ev
:upserts-e)
(if v?
:allocations-ev
:allocations-e))
v?
:allocations-v
true
:entities)))
(defn <resolve-upserts-e [db upserts-e]
"Given a sequence of :upserts-e, query the database to try to map them to existing entities.
Returns a map of id-literals to integer entids, with keys only those id-literals that mapped to
existing entities."
(go-pair
(when (seq upserts-e)
(let [->id-av (fn [[op id-literal a v]] [id-literal [a v]])
;; Like {id-literal [[:a1 :v1] [:a2 :v2] ...], ...}.
id->avs (util/group-by-kv ->id-av upserts-e)
;; Like [[:a1 :v1] [:a2 v2] ...].
avs (apply concat (vals id->avs))
;; Like {[:a1 :v1] e1, ...}.
av->e (<? (db/<avs db avs))
avs->es (fn [avs] (set (keep (partial get av->e) avs)))
id->es (util/mapvals avs->es id->avs)]
(into {}
;; nil is dropped.
(map (fn [[id es]]
(when-let [e (first es)]
(when (second es)
(raise "Conflicting upsert: " id " resolves"
" to more than one entid " es
{:error :transact/upsert :tempid id :entids es}))
[id e])))
id->es)))))
(defn evolve-upserts-e [id->e upserts-e]
(let [evolve1
(fn [[op id-e a v :as entity]]
(if-let [e* (get id->e id-e)]
[:upserted [op e* a v]]
[:allocations-e entity]))]
(util/group-by-kv evolve1 upserts-e)))
(defn evolve-upserts-ev [id->e upserts-ev]
"Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map
whose keys are generations and whose values are vectors of entities in those generations."
(let [evolve1
(fn [[op id-e a id-v :as entity]]
(let [e* (get id->e id-e)
v* (get id->e id-v)]
(if e*
(if v*
[:resolved [op e* a v*]]
[:allocations-v [op e* a id-v]])
(if v*
[:upserts-e [op id-e a v*]]
[:upserts-ev entity]))))]
(util/group-by-kv evolve1 upserts-ev)))
(defn evolve-allocations-e [id->e allocations-e]
"Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map
whose keys are generations and whose values are vectors of entities in those generations."
(let [evolve1
(fn [[op id-e a v :as entity]]
(if-let [e* (get id->e id-e)]
[:resolved [op e* a v]]
[:allocations-e entity]))]
(util/group-by-kv evolve1 allocations-e)))
(defn evolve-allocations-v [id->e allocations-v]
"Given a map id->e of id-literals to integer entids, evolve the given entities. Returns a map
whose keys are generations and whose values are vectors of entities in those generations."
(let [evolve1
(fn [[op e a id-v :as entity]]
(if-let [v* (get id->e id-v)]
[:resolved [op e a v*]]
[:allocations-v entity]))]
(util/group-by-kv evolve1 allocations-v)))
(defn evolve-allocations-ev [id->e allocations-ev]
"Given a map id->e of id-literals to integer entids, evolve the entities in allocations-ev. Returns a
map whose keys are generations and whose values are vectors of entities in those generations."
(let [evolve1
(fn [[op id-e a id-v :as entity]]
(let [e* (get id->e id-e)
v* (get id->e id-v)]
(if e*
(if v*
[:resolved [op e* a v*]]
[:allocations-v [op e* a id-v]])
(if v*
[:allocations-e [op id-e a v*]]
[:allocations-ev entity]))))]
(util/group-by-kv evolve1 allocations-ev)))
(defn <evolve [db evolution]
"Evolve a map of generations {:upserts-e [...], :upserts-ev [...], ...} as much as possible.
The algorithm is as follows.
First, resolve :upserts-e against the database. Some [a v] -> e will upsert; some will not.
Some :upserts-e evolve to become actual :upserts (they upserted!); any other :upserts-e evolve to
become :allocations-e (they did not upsert, and will not upsert this transaction). All :upserts-e
will evolve out of the :upserts-e generation: each one upserts or does not.
Using the newly upserted id-literals, some :upserts-ev evolve to become :resolved;
some :upserts-ev evolve to become :upserts-e; and some :upserts-ev remain :upserts-ev.
Likewise, some :allocations-ev evolve to become :allocations-e, :allocations-v, or :resolved; some
:allocations-e evolve to become :resolved; and some :allocations-v evolve to become :resolved.
If we have *new* :upserts-e (i.e., some :upserts-ev become :upserts-e), then we may be able to
make more progress. We recurse, trying to resolve these new :upserts-e.
Eventually we will have no :upserts-e. At this point, :upserts-ev become :allocations-ev, and now
we have :entities, :upserted, :resolved, and various :allocations-*.
As a future optimization, :upserts do not need to be inserted; they upserted, so they already
exist in the DB. (We still need to verify uniqueness and ensure no overlapping can occur.)
Similarly, :allocations-* do not need to be checked for existence, so they can be written to the DB
faster."
(go-pair
(let [upserts-e (seq (:upserts-e evolution))
id->e (and upserts-e
(<? (<resolve-upserts-e db upserts-e)))]
(if-not id->e
;; No more progress to be made. Any upserts-ev must just be allocations.
(update
(dissoc evolution :upserts-ev :upserts-e)
:allocations-ev concat (:upserts-ev evolution))
;; Progress can be made. Try to evolve further.
(let [{:keys [upserted resolved upserts-ev allocations-ev allocations-e allocations-v entities]} evolution]
(merge-with
concat
{:upserted upserted
:resolved resolved
:entities entities
;; The keys of the id->e map are unique between generation steps, so we can simply
;; concat tempids. Suppose that id->e and id->e* are two such mappings, resolved on
;; subsequent evolutionary steps, and that id is a key in the intersection of the two
;; key sets. This can't happen: if id maps to e via id->e, all instances of id have
;; been evolved forward (replaced with e) before we try to resolve the next set of
;; :upserts-e. That is, we'll never successfully upsert the same id-literal in more
;; than one generation step. (We might upsert the same id-literal to multiple entids
;; via distinct [a v] pairs in a single generation step; in this case,
;; <resolve-upserts-e will throw.)
:tempids id->e}
(evolve-upserts-ev id->e upserts-ev)
(evolve-upserts-e id->e upserts-e)
(evolve-allocations-ev id->e allocations-ev)
(evolve-allocations-e id->e allocations-e)
(evolve-allocations-v id->e allocations-v)))))))
;; TODO: do this in one step, rather than iterating.
(defn allocate [report evolution]
"Given a maximally evolved map of generations, allocate entids for all id-literals that did not
get upserted."
(let [{:keys [tempids upserted resolved allocations-ev allocations-e allocations-v entities]} evolution
initial-report (assoc report :tempids tempids)]
(loop [report
(assoc initial-report
;; TODO: drop :upserted, they already exist in the DB; and don't search for
;; :allocations-*, they definitely don't already exist in the DB.
:entities (concat upserted resolved entities))
es
(concat allocations-ev allocations-e allocations-v)]
(let [[[op e a v :as entity] & entities] es]
(cond
(nil? entity)
report
(id-literal? e)
(let [eid (or (get-in report [:tempids e]) (-next-eid! (:part-map-atom report) e))]
(recur (allocate-eid report e eid) (cons [op eid a v] entities)))
(id-literal? v)
(let [eid (or (get-in report [:tempids v]) (-next-eid! (:part-map-atom report) v))]
(recur (allocate-eid report v eid) (cons [op e a eid] entities)))
true
(recur (transact-entity report entity) entities)
)))))
(defn <resolve-id-literals
"Upsert uniquely identified literals when possible and allocate new entids for all other id literals.
@ -358,57 +576,16 @@
{:pre [(db/db? db) (report? report)]}
(go-pair
(let [keyfn (fn [[op e a v]]
(if (and (id-literal? e)
(not-any? id-literal? [a v]))
(- 5)
(- (count (filter id-literal? [e a v])))))
initial-report (assoc report :entities []) ;; TODO.
initial-entities (sort-by keyfn (:entities report))]
(loop [report initial-report
es initial-entities]
(let [[[op e a v :as entity] & entities] es]
(cond
(nil? entity)
report
(let [schema (db/schema db)
unique-identity? (memoize (partial ds/unique-identity? schema))
(and (not= op :db/add)
(or (id-literal? e)
(id-literal? a)
(id-literal? v)))
(raise "id-literals are resolved for :db/add only"
{:error :transact/syntax
:op entity })
generations
(group-by (partial id-literal-generation unique-identity?) (:entities report))
;; Upsert!
(and (id-literal? e)
(ds/unique-identity? (db/schema db) a)
(not-any? id-literal? [a v]))
(let [upserted-eid (:e (<? (db/<av db a v)))
allocated-eid (get-in report [:tempids e])]
(if (and upserted-eid allocated-eid (not= upserted-eid allocated-eid))
(<? (<retry-with-tempid db initial-report initial-entities e upserted-eid)) ;; TODO: not initial report, just the sorted entities here.
(let [eid (or upserted-eid allocated-eid (-next-eid! (:part-map-atom report) e))]
(recur (allocate-eid report e eid) (cons [op eid a v] entities)))))
;; Start allocating and retrying. We try with e last, so as to eventually upsert it.
(id-literal? v)
;; We can't fail with unbound literals here, since we could have multiple.
(let [eid (or (get-in report [:tempids v]) (-next-eid! (:part-map-atom report) e))]
(recur (allocate-eid report v eid) (cons [op e a eid] entities)))
(id-literal? a)
;; TODO: should we even allow id-literal attributes? Datomic fails in some cases here.
(let [eid (or (get-in report [:tempids a]) (-next-eid! (:part-map-atom report) e))]
(recur (allocate-eid report a eid) (cons [op e eid v] entities)))
(id-literal? e)
(let [eid (or (get-in report [:tempids e]) (-next-eid! (:part-map-atom report) e))]
(recur (allocate-eid report e eid) (cons [op eid a v] entities)))
true
(recur (transact-entity report entity) entities)
))))))
evolution
(<? (<evolve db generations))
]
(allocate report evolution))))
(defn- transact-report [report datom]
(update-in report [:tx-data] conj datom))

View file

@ -154,3 +154,17 @@
(defn unlimited-buffer []
(UnlimitedBuffer. #?(:cljs (array) :clj (java.util.LinkedList.))))
(defn group-by-kv
"Returns a map of the elements of coll keyed by the first element of
the result of f on each element. The value at each key will be a
vector of the second element of the result of f on the corresponding
elements, in the order they appeared in coll."
{:static true}
[f coll]
(persistent!
(reduce
(fn [ret x]
(let [[k v] (f x)]
(assoc! ret k (conj (get ret k []) v))))
(transient {}) coll)))

View file

@ -214,6 +214,24 @@
[:db/add (d/id-literal :db.part/user -1) :name "Petr"]
[:db/add (d/id-literal :db.part/user -1) :age 36]])))))))
(deftest-db test-multistep-upsert conn
(<? (d/<transact! conn test-schema))
;; The upsert algorithm will first try to resolve -1, fail, and then allocate both -1 and -2.
(let [tx0 (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :name "Ivan" :email "@1"}
{:db/id (d/id-literal :db.part/user -2) :name "Petr" :friends (d/id-literal :db.part/user -1)}]))]
;; Sanity checks that these are freshly allocated, not resolved.
(is (> (get (tempids tx0) -1) 1000))
(is (> (get (tempids tx0) -1) 1000))
;; This time, we can resolve both, but we have to try -1, succeed, and then resolve -2.
(let [tx1 (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :name "Ivan" :email "@1"}
{:db/id (d/id-literal :db.part/user -2) :name "Petr" :friends (d/id-literal :db.part/user -1)}]))]
;; Ensure these are resolved, not freshly allocated.
(is (= (tempids tx0)
(tempids tx1))))))
(deftest-db test-map-upsert conn
;; Not having DB-as-value really hurts us here. This test only works because all upserts
;; succeed on top of each other, so we never need to reset the underlying store.
@ -626,11 +644,11 @@
(let [tx0 (:tx (<? (d/<transact! conn retract-schema)))]
(let [report (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :test/long [12345 123456]}
{:db/id (d/id-literal :db.part/user -1) :test/fulltext ["first fulltext value" "second fulltext value"]}]))
{:db/id (d/id-literal :db.part/user -1) :test/fulltext ["1 fulltext value" "2 fulltext value"]}]))
eid1 (get-in report [:tempids (d/id-literal :db.part/user -1)])]
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "first fulltext value"]
[2 "second fulltext value"]]))
[[1 "1 fulltext value"]
[2 "2 fulltext value"]]))
(is (= (<? (<datoms-after (d/db conn) tx0))
#{[eid1 :test/fulltext 1]
[eid1 :test/fulltext 2]
@ -647,13 +665,13 @@
(let [tx0 (:tx (<? (d/<transact! conn retract-schema)))]
(testing "retractAttribute, fulltext"
(let [report (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :test/fulltext ["first fulltext value" "second fulltext value"]}
(let [report (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :test/fulltext ["1 fulltext value" "2 fulltext value"]}
{:db/id (d/id-literal :db.part/user -2) :test/ref (d/id-literal :db.part/user -1)}]))
eid1 (get-in report [:tempids (d/id-literal :db.part/user -1)])
eid2 (get-in report [:tempids (d/id-literal :db.part/user -2)])]
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "first fulltext value"]
[2 "second fulltext value"]]))
[[1 "1 fulltext value"]
[2 "2 fulltext value"]]))
(is (= (<? (<datoms-after (d/db conn) tx0))
#{[eid1 :test/fulltext 1]
[eid1 :test/fulltext 2]
@ -663,8 +681,8 @@
(let [{tx1 :tx} (<? (d/<transact! conn [[:db.fn/retractAttribute eid2 :test/ref]]))]
;; fulltext values are not purged.
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "first fulltext value"]
[2 "second fulltext value"]]))
[[1 "1 fulltext value"]
[2 "2 fulltext value"]]))
(is (= (<? (<datoms-after (d/db conn) tx0))
#{[eid1 :test/fulltext 1]
[eid1 :test/fulltext 2]})))
@ -672,8 +690,8 @@
(let [{tx2 :tx} (<? (d/<transact! conn [[:db.fn/retractAttribute eid1 :test/fulltext]]))]
;; fulltext values are not purged.
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "first fulltext value"]
[2 "second fulltext value"]]))
[[1 "1 fulltext value"]
[2 "2 fulltext value"]]))
(is (= (<? (<datoms-after (d/db conn) tx0))
#{}))))))))
@ -718,12 +736,12 @@
(let [tx0 (:tx (<? (d/<transact! conn retract-schema)))]
(let [report (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :test/long [12345 123456]}
{:db/id (d/id-literal :db.part/user -2) :test/fulltext ["first fulltext value" "second fulltext value"]}]))
{:db/id (d/id-literal :db.part/user -2) :test/fulltext ["1 fulltext value" "2 fulltext value"]}]))
eid1 (get-in report [:tempids (d/id-literal :db.part/user -1)])
eid2 (get-in report [:tempids (d/id-literal :db.part/user -2)])]
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "first fulltext value"]
[2 "second fulltext value"]]))
[[1 "1 fulltext value"]
[2 "2 fulltext value"]]))
(is (= (<? (<datoms-after (d/db conn) tx0))
#{[eid2 :test/fulltext 1]
[eid2 :test/fulltext 2]
@ -740,16 +758,16 @@
(let [tx0 (:tx (<? (d/<transact! conn retract-schema)))]
(testing "retractEntity, fulltext"
(let [report (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :test/fulltext ["first fulltext value" "second fulltext value"]}
(let [report (<? (d/<transact! conn [{:db/id (d/id-literal :db.part/user -1) :test/fulltext ["1 fulltext value" "2 fulltext value"]}
{:db/id (d/id-literal :db.part/user -2) :test/ref (d/id-literal :db.part/user -1)}
{:db/id (d/id-literal :db.part/user -3) :test/fulltext "other fulltext value"}]))
{:db/id (d/id-literal :db.part/user -3) :test/fulltext "3 fulltext value"}]))
eid1 (get-in report [:tempids (d/id-literal :db.part/user -1)])
eid2 (get-in report [:tempids (d/id-literal :db.part/user -2)])
eid3 (get-in report [:tempids (d/id-literal :db.part/user -3)])]
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "first fulltext value"]
[2 "second fulltext value"]
[3 "other fulltext value"]]))
[[1 "1 fulltext value"]
[2 "2 fulltext value"]
[3 "3 fulltext value"]]))
(is (= (<? (<datoms-after (d/db conn) tx0))
#{[eid1 :test/fulltext 1]
[eid1 :test/fulltext 2]
@ -768,9 +786,9 @@
(let [{tx1 :tx} (<? (d/<transact! conn [[:db.fn/retractEntity eid3]]))]
;; fulltext values are not purged.
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "first fulltext value"]
[2 "second fulltext value"]
[3 "other fulltext value"]]))
[[1 "1 fulltext value"]
[2 "2 fulltext value"]
[3 "3 fulltext value"]]))
(is (= (<? (<datoms-after (d/db conn) tx0))
#{[eid1 :test/fulltext 1]
[eid1 :test/fulltext 2]
@ -914,6 +932,37 @@
ExceptionInfo #"Lookup-ref found with non-unique-identity attribute"
(<? (d/<transact! conn [[:db/add 1 :friends (d/lookup-ref :aka "The Magician")]])))))))
(deftest-db test-fulltext-lookup-refs conn
(let [schema [{:db/id (d/id-literal :db.part/db -1)
:db/ident :test/fulltext
:db/valueType :db.type/string
:db/fulltext true
:db/unique :db.unique/identity}
{:db/id :db.part/db :db.install/attribute (d/id-literal :db.part/db -1)}
{:db/id (d/id-literal :db.part/db -2)
:db/ident :test/other
:db/valueType :db.type/string
:db/fulltext true
:db/cardinality :db.cardinality/one}
{:db/id :db.part/db :db.install/attribute (d/id-literal :db.part/db -2)}
]
tx0 (:tx (<? (d/<transact! conn schema)))]
(testing "Can look up fulltext refs"
(<? (d/<transact! conn [[:db/add 101 :test/fulltext "test this"]]))
(let [{tx :tx} (<? (d/<transact! conn [{:db/id (d/lookup-ref :test/fulltext "test this") :test/other "test other"}]))]
(is (= (<? (<fulltext-values (d/db conn)))
[[1 "test this"]
[2 "test other"]]))
(is (= #{[101 :test/other 2]} ;; Values are raw; 2 is the rowid into fulltext_values.
(<? (<datoms>= (d/db conn) tx))))))
(testing "Fails for missing fulltext entities"
(is (thrown-with-msg?
ExceptionInfo #"No entity found for lookup-ref"
(<? (d/<transact! conn [[:db/add (d/lookup-ref :test/fulltext "not found") :test/other "test random"]])))))))
#_ (time (t/run-tests))
#_ (time (clojure.test/test-vars [#'test-lookup-refs]))

View file

@ -46,3 +46,8 @@
(is (util/unblocking-chan? (a/chan (a/sliding-buffer 10))))
(is (util/unblocking-chan? (a/chan (util/unlimited-buffer))))
(is (not (util/unblocking-chan? (a/chan (a/buffer 10))))))
(deftest test-group-by-kvs
(are [m xs] (= m (util/group-by-kv identity xs))
{:a [1 2] :b [3]}
[[:a 1] [:a 2] [:b 3]]))