Improve lookup-ref correctness and performance; reduce in-DB garbage. r=rnewman

Before, the parts of the lookup-ref test that passed took about ~2100ms
on my device.  After, the entire lookup-ref test takes 350ms, a
significant speed-up.  I did not try to plot the graphs of the two
approaches as the number of lookup-refs increased, since the new
approach is significantly better in all cases I can think of.
This commit is contained in:
Nick Alexander 2016-09-30 17:00:54 -07:00
commit 1fe0cbeaf0
7 changed files with 297 additions and 121 deletions

View file

@ -32,6 +32,8 @@
(def id-literal db/id-literal) (def id-literal db/id-literal)
(def lookup-ref db/lookup-ref)
(def db transact/db) (def db transact/db)
(def entid db/entid) (def entid db/entid)

View file

@ -63,6 +63,23 @@
(defn id-literal? [x] (defn id-literal? [x]
(instance? TempId x)) (instance? TempId x))
(defrecord LookupRef [a v])
(defn lookup-ref
[a v]
(if (and
(or (keyword? a)
(integer? a))
v)
(->LookupRef a v)
(raise (str "Lookup-ref with bad attribute " a " or value " v
{:error :transact/bad-lookup-ref, :a a, :v v}))))
(defn lookup-ref? [x]
"Return `x` if `x` is like [:attr value], nil otherwise."
(when (instance? LookupRef x)
x))
(defprotocol IClock (defprotocol IClock
(now (now
[clock] [clock]
@ -105,6 +122,13 @@
[db a v] [db a v]
"Search for a single matching datom using the AVET index.") "Search for a single matching datom using the AVET index.")
(<avs
[db avs]
"Search for many matching datoms using the AVET index.
Take [[a0 v0] [a1 v1] ...] and return a map {[a0 v0] e0}. If no datom [e1 a1 v1] exists, the
key [a1 v1] is not present in the returned map.")
(<apply-entities (<apply-entities
[db tx entities] [db tx entities]
"Apply entities to the store, returning sequence of datoms transacted.") "Apply entities to the store, returning sequence of datoms transacted.")
@ -172,9 +196,7 @@
:table-alias source/gensym-table-alias :table-alias source/gensym-table-alias
:make-constraints nil})) :make-constraints nil}))
;; TODO: make this not do the tx_lookup. We could achieve this by having additional special values (defn- retractAttributes->queries [oeas tx]
;; of added0, or by separating the tx_lookup table into before and after tables.
(defn- retractAttributes->queries [eas tx]
(let [where-part (let [where-part
"(e = ? AND a = ?)" "(e = ? AND a = ?)"
@ -183,21 +205,23 @@
(fn [chunk] (fn [chunk]
(cons (cons
(apply str (apply str
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag) "INSERT INTO temp.tx_lookup_after (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag,
SELECT e, a, v, ?, 0, value_type_tag, v, value_type_tag rid, e, a, v, tx, value_type_tag)
SELECT e, a, v, ?, 0, value_type_tag, v, value_type_tag,
rowid, e, a, v, ?, value_type_tag
FROM datoms FROM datoms
WHERE " WHERE "
(repeater (count chunk))) (repeater (count chunk)))
(cons
tx
(cons (cons
tx tx
(mapcat (fn [[_ e a]] (mapcat (fn [[_ e a]]
[e a]) [e a])
chunk)))) chunk)))))
(partition-all (quot (dec max-sql-vars) 2) eas)))) (partition-all (quot (- max-sql-vars 2) 2) oeas))))
;; TODO: make this not do the tx_lookup. We could achieve this by having additional special values (defn- retractEntities->queries [oes tx]
;; of added0, or by separating the tx_lookup table into before and after tables.
(defn- retractEntities->queries [es tx]
(let [ref-tag (sqlite-schema/->tag :db.type/ref) (let [ref-tag (sqlite-schema/->tag :db.type/ref)
;; TODO: include index_vaet flag here, so we can use that index to speed up the deletion. ;; TODO: include index_vaet flag here, so we can use that index to speed up the deletion.
@ -209,27 +233,31 @@
(fn [chunk] (fn [chunk]
(cons (cons
(apply str (apply str
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag) "INSERT INTO temp.tx_lookup_after (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag,
SELECT e, a, v, ?, 0, value_type_tag, v, value_type_tag rid, e, a, v, tx, value_type_tag)
SELECT e, a, v, ?, 0, value_type_tag, v, value_type_tag,
rowid, e, a, v, ?, value_type_tag
FROM datoms FROM datoms
WHERE " WHERE "
(repeater (count chunk))) (repeater (count chunk)))
(cons
tx
(cons (cons
tx tx
(mapcat (fn [[_ e]] (mapcat (fn [[_ e]]
[e e]) [e e])
chunk)))) chunk)))))
(partition-all (quot (dec max-sql-vars) 2) es)))) (partition-all (quot (- max-sql-vars 2) 2) oes))))
(defn- retractions->queries [retractions tx fulltext? ->SQLite] (defn- retractions->queries [retractions tx fulltext? ->SQLite]
(let (let
[f-q [f-q
"WITH vv AS (SELECT rowid FROM fulltext_values WHERE text = ?) "WITH vv AS (SELECT rowid FROM fulltext_values WHERE text = ?)
INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag) INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag)
VALUES (?, ?, (SELECT rowid FROM vv), ?, 0, ?, (SELECT rowid FROM vv), ?)" VALUES (?, ?, (SELECT rowid FROM vv), ?, 0, ?, (SELECT rowid FROM vv), ?)"
non-f-q non-f-q
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag) "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag)
VALUES (?, ?, ?, ?, 0, ?, ?, ?)"] VALUES (?, ?, ?, ?, 0, ?, ?, ?)"]
(map (map
(fn [[_ e a v]] (fn [[_ e a v]]
@ -242,7 +270,7 @@
retractions))) retractions)))
(defn- non-fts-many->queries [ops tx ->SQLite indexing? ref? unique?] (defn- non-fts-many->queries [ops tx ->SQLite indexing? ref? unique?]
(let [q "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " (let [q "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES "
values-part values-part
;; e0, a0, v0, tx0, added0, value_type_tag0 ;; e0, a0, v0, tx0, added0, value_type_tag0
@ -290,7 +318,7 @@
[(cons [(cons
(apply (apply
str str
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES "
(first-repeater (count chunk))) (first-repeater (count chunk)))
(mapcat (fn [[_ e a v]] (mapcat (fn [[_ e a v]]
(let [[v tag] (->SQLite a v)] (let [[v tag] (->SQLite a v)]
@ -304,7 +332,7 @@
(cons (cons
(apply (apply
str str
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0) VALUES " "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0) VALUES "
(second-repeater (count chunk))) (second-repeater (count chunk)))
(mapcat (fn [[_ e a v]] (mapcat (fn [[_ e a v]]
(let [[v tag] (->SQLite a v)] (let [[v tag] (->SQLite a v)]
@ -341,10 +369,10 @@
[["INSERT INTO fulltext_values_view (text, searchid) VALUES (?, ?)" [["INSERT INTO fulltext_values_view (text, searchid) VALUES (?, ?)"
v searchid] v searchid]
;; Second query: tx_lookup. ;; Second query: lookup.
[(str [(str
"WITH vv(rowid) AS (SELECT rowid FROM fulltext_values WHERE searchid = ?) " "WITH vv(rowid) AS (SELECT rowid FROM fulltext_values WHERE searchid = ?) "
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES "
"(?, ?, (SELECT rowid FROM vv), ?, 1, ?, ?, ?, 1, ?, (SELECT rowid FROM vv), ?)") "(?, ?, (SELECT rowid FROM vv), ?, 1, ?, ?, ?, 1, ?, (SELECT rowid FROM vv), ?)")
searchid searchid
e a tx tag e a tx tag
@ -365,10 +393,10 @@
[["INSERT INTO fulltext_values_view (text, searchid) VALUES (?, ?)" [["INSERT INTO fulltext_values_view (text, searchid) VALUES (?, ?)"
v searchid] v searchid]
;; Second and third queries: tx_lookup. ;; Second and third queries: lookup.
[(str [(str
"WITH vv(rowid) AS (SELECT rowid FROM fulltext_values WHERE searchid = ?) " "WITH vv(rowid) AS (SELECT rowid FROM fulltext_values WHERE searchid = ?) "
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES "
"(?, ?, (SELECT rowid FROM vv), ?, 1, ?, ?, ?, 1, ?, (SELECT rowid FROM vv), ?)") "(?, ?, (SELECT rowid FROM vv), ?, 1, ?, ?, ?, 1, ?, (SELECT rowid FROM vv), ?)")
searchid searchid
e a tx tag e a tx tag
@ -378,7 +406,7 @@
tag] tag]
[(str [(str
"INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0) VALUES " "INSERT INTO temp.tx_lookup_before (e0, a0, v0, tx0, added0, value_type_tag0) VALUES "
"(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, 0, ?)") "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, 0, ?)")
e a searchid tx tag]])) e a searchid tx tag]]))
ops ops
@ -390,33 +418,43 @@
(try (try
(doseq [q queries] (doseq [q queries]
(<? (s/execute! conn q))) (<? (s/execute! conn q)))
(catch #?(:clj java.sql.SQLException :cljs js/Error) e (catch #?(:clj java.lang.Exception :cljs js/Error) e
(throw (ex-info exception-message {} e)))))) (throw (ex-info exception-message {} e))))))
(defn- -preamble-drop [conn] (defn- -preamble-drop [conn]
(let [preamble-drop-index ["DROP INDEX IF EXISTS id_tx_lookup_added"]
preamble-delete-tx-lookup ["DELETE FROM tx_lookup"]]
(go-pair (go-pair
(p :preamble (p :preamble
(doseq [q [preamble-drop-index preamble-delete-tx-lookup]] (doseq [q [;; XXX ["DROP INDEX IF EXISTS temp.idx_tx_lookup_before_added"]
(<? (s/execute! conn q))))))) (sqlite-schema/create-temp-tx-lookup-statement "temp.tx_lookup_before")
(sqlite-schema/create-temp-tx-lookup-statement "temp.tx_lookup_after")
;; TODO: move later, into -build-transaction.
;; temp goes on index name, not table name. See http://stackoverflow.com/a/22308016.
(sqlite-schema/create-temp-tx-lookup-eavt-statement "temp.idx_tx_lookup_before_eavt" "tx_lookup_before")
(sqlite-schema/create-temp-tx-lookup-eavt-statement "temp.idx_tx_lookup_after_eavt" "tx_lookup_after")
["DELETE FROM temp.tx_lookup_before"]
["DELETE FROM temp.tx_lookup_after"]]]
(<? (s/execute! conn q))))))
(defn- -after-drop [conn] (defn- -after-drop [conn]
(go-pair (go-pair
(doseq [q [;; The lookup table takes space on disk, so we purge it aggressively. (p :postamble
["DROP INDEX IF EXISTS id_tx_lookup_added"] ;; TODO: delete tx_lookup_before after filling tx_lookup_after.
["DELETE FROM tx_lookup"]]] (doseq [q [;; XXX ["DROP INDEX IF EXISTS temp.idx_tx_lookup_before_added"]
(<? (s/execute! conn q))))) ["DROP INDEX IF EXISTS temp.idx_tx_lookup_before_eavt"]
["DROP INDEX IF EXISTS temp.idx_tx_lookup_after_eavt"]
["DELETE FROM temp.tx_lookup_before"]
["DELETE FROM temp.tx_lookup_after"]]]
(<? (s/execute! conn q))))))
(defn- -build-transaction [conn tx] (defn- -build-transaction [conn tx]
(let [build-indices ["CREATE INDEX IF NOT EXISTS idx_tx_lookup_added ON tx_lookup (added0)"] (let [build-indices ["CREATE INDEX IF NOT EXISTS temp.idx_tx_lookup_added ON tx_lookup_before (added0)"]
;; First is fast, only one table walk: lookup by exact eav. ;; First is fast, only one table walk: lookup by exact eav.
;; Second is slower, but still only one table walk: lookup old value by ea. ;; Second is slower, but still only one table walk: lookup old value by ea.
insert-into-tx-lookup insert-into-tx-lookup
["INSERT INTO tx_lookup ["INSERT INTO temp.tx_lookup_after
SELECT t.e0, t.a0, t.v0, t.tx0, t.added0 + 2, t.value_type_tag0, t.index_avet0, t.index_vaet0, t.index_fulltext0, t.unique_value0, t.sv, t.svalue_type_tag, d.rowid, d.e, d.a, d.v, d.tx, d.value_type_tag SELECT t.e0, t.a0, t.v0, t.tx0, t.added0, t.value_type_tag0, t.index_avet0, t.index_vaet0, t.index_fulltext0, t.unique_value0, t.sv, t.svalue_type_tag, d.rowid, d.e, d.a, d.v, d.tx, d.value_type_tag
FROM tx_lookup AS t FROM temp.tx_lookup_before AS t
LEFT JOIN datoms AS d LEFT JOIN datoms AS d
ON t.e0 = d.e AND ON t.e0 = d.e AND
t.a0 = d.a AND t.a0 = d.a AND
@ -425,8 +463,8 @@
t.sv IS NOT NULL t.sv IS NOT NULL
UNION ALL UNION ALL
SELECT t.e0, t.a0, t.v0, t.tx0, t.added0 + 2, t.value_type_tag0, t.index_avet0, t.index_vaet0, t.index_fulltext0, t.unique_value0, t.sv, t.svalue_type_tag, d.rowid, d.e, d.a, d.v, d.tx, d.value_type_tag SELECT t.e0, t.a0, t.v0, t.tx0, t.added0, t.value_type_tag0, t.index_avet0, t.index_vaet0, t.index_fulltext0, t.unique_value0, t.sv, t.svalue_type_tag, d.rowid, d.e, d.a, d.v, d.tx, d.value_type_tag
FROM tx_lookup AS t, FROM temp.tx_lookup_before AS t,
datoms AS d datoms AS d
WHERE t.sv IS NULL AND WHERE t.sv IS NULL AND
t.e0 = d.e AND t.e0 = d.e AND
@ -435,14 +473,14 @@
t-datoms-not-already-present t-datoms-not-already-present
["INSERT INTO transactions (e, a, v, tx, added, value_type_tag) ["INSERT INTO transactions (e, a, v, tx, added, value_type_tag)
SELECT e0, a0, v0, ?, 1, value_type_tag0 SELECT e0, a0, v0, ?, 1, value_type_tag0
FROM tx_lookup FROM temp.tx_lookup_after
WHERE added0 IS 3 AND e IS NULL" tx] ;; TODO: get rid of magic value 3. WHERE added0 IS 1 AND e IS NULL" tx]
t-retract-datoms-carefully t-retract-datoms-carefully
["INSERT INTO transactions (e, a, v, tx, added, value_type_tag) ["INSERT INTO transactions (e, a, v, tx, added, value_type_tag)
SELECT e, a, v, ?, 0, value_type_tag SELECT e, a, v, ?, 0, value_type_tag
FROM tx_lookup FROM temp.tx_lookup_after
WHERE added0 IS 2 AND ((sv IS NOT NULL) OR (sv IS NULL AND v0 IS NOT v)) AND v IS NOT NULL" tx] ;; TODO: get rid of magic value 2. WHERE added0 IS 0 AND ((sv IS NOT NULL) OR (sv IS NULL AND v0 IS NOT v)) AND v IS NOT NULL" tx]
] ]
(go-pair (go-pair
(doseq [q [build-indices insert-into-tx-lookup (doseq [q [build-indices insert-into-tx-lookup
@ -455,13 +493,13 @@
["INSERT INTO datoms (e, a, v, tx, value_type_tag, index_avet, index_vaet, index_fulltext, unique_value) ["INSERT INTO datoms (e, a, v, tx, value_type_tag, index_avet, index_vaet, index_fulltext, unique_value)
SELECT e0, a0, v0, ?, value_type_tag0, SELECT e0, a0, v0, ?, value_type_tag0,
index_avet0, index_vaet0, index_fulltext0, unique_value0 index_avet0, index_vaet0, index_fulltext0, unique_value0
FROM tx_lookup FROM temp.tx_lookup_after
WHERE added0 IS 3 AND e IS NULL" tx] ;; TODO: get rid of magic value 3. WHERE added0 IS 1 AND e IS NULL" tx]
;; TODO: retract fulltext datoms correctly. ;; TODO: retract fulltext datoms correctly.
d-retract-datoms-carefully d-retract-datoms-carefully
["WITH ids AS (SELECT l.rid FROM tx_lookup AS l WHERE l.added0 IS 2 AND ((l.sv IS NOT NULL) OR (l.sv IS NULL AND l.v0 IS NOT l.v))) ["WITH ids AS (SELECT l.rid FROM temp.tx_lookup_after AS l WHERE l.added0 IS 0 AND ((l.sv IS NOT NULL) OR (l.sv IS NULL AND l.v0 IS NOT l.v)))
DELETE FROM datoms WHERE rowid IN ids" ;; TODO: get rid of magic value 2. DELETE FROM datoms WHERE rowid IN ids"
]] ]]
(-run-queries conn [d-datoms-not-already-present d-retract-datoms-carefully] (-run-queries conn [d-datoms-not-already-present d-retract-datoms-carefully]
"Transaction violates unique constraint"))) "Transaction violates unique constraint")))
@ -620,6 +658,61 @@
<? <?
yield-datom)))) yield-datom))))
(<avs
[db avs]
{:pre [(sequential? avs)]}
(go-pair
(let [schema
(.-schema db)
values-part
"(?, ?, ?, ?)"
repeater
(memoize (fn [n] (interpose ", " (repeat n values-part))))
exec
(partial s/execute! (:sqlite-connection db))
;; Map [a v] -> searchid.
av->searchid
(into {} (map vector avs (range)))
;; Each query takes 4 variables per item. So we partition into max-sql-vars / 4.
qs
(map
(fn [chunk]
(cons
;; Query string.
(apply str "WITH t(searchid, a, v, value_type_tag) AS (VALUES "
(apply str (repeater (count chunk))) ;; TODO: join?
") SELECT t.searchid, d.e
FROM t, datoms AS d
WHERE d.index_avet IS NOT 0 AND d.a = t.a AND d.value_type_tag = t.value_type_tag AND d.v = t.v")
;; Bindings.
(mapcat (fn [[[a v] searchid]]
(let [a (entid db a)
[v tag] (ds/->SQLite schema a v)]
[searchid a v tag]))
chunk)))
(partition-all (quot max-sql-vars 4) av->searchid))
;; Map searchid -> e. There's a generic reduce that takes [pair-chan] lurking in here.
searchid->e
(loop [coll (transient {})
qs qs]
(let [[q & qs] qs]
(if q
(let [rs (<? (s/all-rows (:sqlite-connection db) q))
coll* (reduce conj! coll (map (juxt :searchid :e) rs))]
(recur coll* qs))
(persistent! coll))))
]
(util/mapvals (partial get searchid->e) av->searchid))))
(<apply-entities [db tx entities] (<apply-entities [db tx entities]
{:pre [(db? db) (sequential? entities)]} {:pre [(db? db) (sequential? entities)]}
(-<apply-entities db tx entities)) (-<apply-entities db tx entities))

View file

@ -29,6 +29,15 @@
(filter #(not (= :db/txInstant (second %)))) (filter #(not (= :db/txInstant (second %))))
(set)))) (set))))
(defn <datoms>= [db tx]
(go-pair
(->>
(s/all-rows (:sqlite-connection db) ["SELECT e, a, v, tx FROM datoms WHERE tx >= ?" tx])
(<?)
(mapv #(vector (:e %) (db/ident db (:a %)) (:v %)))
(filter #(not (= :db/txInstant (second %))))
(set))))
(defn <datoms [db] (defn <datoms [db]
(<datoms-after db 0)) (<datoms-after db 0))

View file

@ -26,24 +26,6 @@
"CREATE UNIQUE INDEX idx_datoms_eavt ON datoms (e, a, value_type_tag, v)" "CREATE UNIQUE INDEX idx_datoms_eavt ON datoms (e, a, value_type_tag, v)"
"CREATE UNIQUE INDEX idx_datoms_aevt ON datoms (a, e, value_type_tag, v)" "CREATE UNIQUE INDEX idx_datoms_aevt ON datoms (a, e, value_type_tag, v)"
;; n.b., v0/value_type_tag0 can be NULL, in which case we look up v from datoms;
;; and the datom columns are NULL into the LEFT JOIN fills them in.
;; TODO: update comment about sv.
"CREATE TABLE tx_lookup (e0 INTEGER NOT NULL, a0 SMALLINT NOT NULL, v0 BLOB NOT NULL, tx0 INTEGER NOT NULL, added0 TINYINT NOT NULL,
value_type_tag0 SMALLINT NOT NULL,
index_avet0 TINYINT, index_vaet0 TINYINT,
index_fulltext0 TINYINT,
unique_value0 TINYINT,
sv BLOB,
svalue_type_tag SMALLINT,
rid INTEGER,
e INTEGER, a SMALLINT, v BLOB, tx INTEGER, value_type_tag SMALLINT)"
;; Note that `id_tx_lookup_added` is created and dropped
;; after insertion, which makes insertion slightly faster.
;; Prevent overlapping transactions. TODO: drop added0?
"CREATE UNIQUE INDEX idx_tx_lookup_eavt ON tx_lookup (e0, a0, v0, added0, value_type_tag0) WHERE sv IS NOT NULL"
;; Opt-in index: only if a has :db/index true. ;; Opt-in index: only if a has :db/index true.
"CREATE UNIQUE INDEX idx_datoms_avet ON datoms (a, value_type_tag, v, e) WHERE index_avet IS NOT 0" "CREATE UNIQUE INDEX idx_datoms_avet ON datoms (a, value_type_tag, v, e) WHERE index_avet IS NOT 0"
@ -115,6 +97,35 @@
"CREATE TABLE parts (part TEXT NOT NULL PRIMARY KEY, start INTEGER NOT NULL, idx INTEGER NOT NULL)" "CREATE TABLE parts (part TEXT NOT NULL PRIMARY KEY, start INTEGER NOT NULL, idx INTEGER NOT NULL)"
]) ])
(defn create-temp-tx-lookup-statement [table-name]
;; n.b., v0/value_type_tag0 can be NULL, in which case we look up v from datoms;
;; and the datom columns are NULL into the LEFT JOIN fills them in.
;; The table-name is not escaped in any way, in order to allow "temp.dotted" names.
;; TODO: update comment about sv.
[(str "CREATE TABLE IF NOT EXISTS " table-name
" (e0 INTEGER NOT NULL, a0 SMALLINT NOT NULL, v0 BLOB NOT NULL, tx0 INTEGER NOT NULL, added0 TINYINT NOT NULL,
value_type_tag0 SMALLINT NOT NULL,
index_avet0 TINYINT, index_vaet0 TINYINT,
index_fulltext0 TINYINT,
unique_value0 TINYINT,
sv BLOB,
svalue_type_tag SMALLINT,
rid INTEGER,
e INTEGER, a SMALLINT, v BLOB, tx INTEGER, value_type_tag SMALLINT)")])
(defn create-temp-tx-lookup-eavt-statement [idx-name table-name]
;; Note that the consuming code creates and drops the indexes
;; manually, which makes insertion slightly faster.
;; This index prevents overlapping transactions.
;; The idx-name and table-name are not escaped in any way, in order
;; to allow "temp.dotted" names.
;; TODO: drop added0?
[(str "CREATE UNIQUE INDEX IF NOT EXISTS "
idx-name
" ON "
table-name
" (e0, a0, v0, added0, value_type_tag0) WHERE sv IS NOT NULL")])
(defn <create-current-version (defn <create-current-version
[db] [db]
(->> (->>

View file

@ -61,7 +61,7 @@
db-after ;; The DB after the transaction. db-after ;; The DB after the transaction.
tx ;; The tx ID represented by the transaction in this report; refer :db/tx. tx ;; The tx ID represented by the transaction in this report; refer :db/tx.
txInstant ;; The timestamp instant when the the transaction was processed/committed in this report; refer :db/txInstant. txInstant ;; The timestamp instant when the the transaction was processed/committed in this report; refer :db/txInstant.
entities ;; The set of entities (like [:db/add e a v tx]) processed. entities ;; The set of entities (like [:db/add e a v]) processed.
tx-data ;; The set of datoms applied to the database, like (Datom. e a v tx added). tx-data ;; The set of datoms applied to the database, like (Datom. e a v tx added).
tempids ;; The map from id-literal -> numeric entid. tempids ;; The map from id-literal -> numeric entid.
part-map ;; Map {:db.part/user {:start 0x10000 :idx 0x10000}, ...}. part-map ;; Map {:db.part/user {:start 0x10000 :idx 0x10000}, ...}.
@ -118,7 +118,7 @@
true true
entity)) entity))
(defn maybe-ident->entid [db [op e a v tx :as orig]] (defn maybe-ident->entid [db [op e a v :as orig]]
;; We have to handle all ops, including those when a or v are not defined. ;; We have to handle all ops, including those when a or v are not defined.
(let [e (db/entid db e) (let [e (db/entid db e)
a (db/entid db a) a (db/entid db a)
@ -127,8 +127,8 @@
(db/entid db v))] (db/entid db v))]
(when (and a (not (integer? a))) (when (and a (not (integer? a)))
(raise "Unknown attribute " a (raise "Unknown attribute " a
{:form orig :attribute a})) {:form orig :attribute a :entity orig}))
[op e a v tx])) [op e a v]))
(defrecord Transaction [db tempids entities]) (defrecord Transaction [db tempids entities])
@ -250,49 +250,43 @@
;; Extract the current txInstant for the report. ;; Extract the current txInstant for the report.
(->> (update-txInstant db*))))) (->> (update-txInstant db*)))))
(defn- lookup-ref? [x]
"Return `x` if `x` is like [:attr value], false otherwise."
(and (sequential? x)
(= (count x) 2)
(or (keyword? (first x))
(integer? (first x)))
x))
(defn <resolve-lookup-refs [db report] (defn <resolve-lookup-refs [db report]
{:pre [(db/db? db) (report? report)]} {:pre [(db/db? db) (report? report)]}
(let [entities (:entities report)] (let [unique-identity? (memoize (partial ds/unique-identity? (db/schema db)))
;; TODO: meta. ;; Map lookup-ref -> entities containing lookup-ref, like {[[:a :v] [[[:a :v] :b :w] ...]] ...}.
groups (group-by (partial keep db/lookup-ref?) (:entities report))
;; Entities with no lookup-ref are grouped under the key (lazy-seq).
entities (get groups (lazy-seq)) ;; No lookup-refs? Pass through.
to-resolve (dissoc groups (lazy-seq)) ;; The ones with lookup-refs.
;; List [[:a :v] ...] to lookup.
avs (set (map (juxt :a :v) (apply concat (keys to-resolve))))
->av (fn [r] ;; Conditional (juxt :a :v) that passes through nil.
(when r [(:a r) (:v r)]))]
(go-pair (go-pair
(if (empty? entities) (let [av->e (<? (db/<avs db avs))
report resolve1 (fn [field]
(assoc-in (if-let [[a v] (->av (db/lookup-ref? field))]
report [:entities] (if-not (unique-identity? (db/entid db a))
;; We can't use `for` because go-pair doesn't traverse function boundaries. (raise "Lookup-ref found with non-unique-identity attribute " a " and value " v
;; Apologies for the tortured nested loop. {:error :transact/lookup-ref-with-non-unique-identity-attribute
(loop [[op & entity] (first entities) :a a
next (rest entities) :v v})
acc []]
(if (nil? op)
acc
(recur (first next)
(rest next)
(conj acc
(loop [field (first entity)
rem (rest entity)
acc [op]]
(if (nil? field)
acc
(recur (first rem)
(rest rem)
(conj acc
(if-let [[a v] (lookup-ref? field)]
(or (or
;; The lookup might fail! If so, throw. (get av->e [a v])
(:e (<? (db/<av db a v))) (raise "No entity found for lookup-ref with attribute " a " and value " v
(raise "No entity found with attr " a " and val " v "." {:error :transact/lookup-ref-not-found
{:a a :v v})) :a a
field)))))))))))))) :v v})))
field))
resolve (fn [entity]
(mapv resolve1 entity))]
(assoc
report
:entities
(concat
entities
(map resolve (apply concat (vals to-resolve)))))))))
(declare <resolve-id-literals) (declare <resolve-id-literals)

View file

@ -47,6 +47,7 @@
(explode-entity-a-v db entity v straight-a eid) (explode-entity-a-v db entity v straight-a eid)
(and (map? v) (and (map? v)
(not (db/lookup-ref? v))
(not (db/id-literal? v))) (not (db/id-literal? v)))
;; Another entity is given as a nested map. ;; Another entity is given as a nested map.
(if (ds/ref? (db/schema db) straight-a*) (if (ds/ref? (db/schema db) straight-a*)

View file

@ -10,7 +10,7 @@
[cljs.core.async.macros :as a :refer [go]])) [cljs.core.async.macros :as a :refer [go]]))
(:require (:require
[datomish.api :as d] [datomish.api :as d]
[datomish.db.debug :refer [<datoms-after <transactions-after <shallow-entity <fulltext-values]] [datomish.db.debug :refer [<datoms-after <datoms>= <transactions-after <shallow-entity <fulltext-values]]
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise cond-let]] [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise cond-let]]
[datomish.schema :as ds] [datomish.schema :as ds]
[datomish.simple-schema] [datomish.simple-schema]
@ -849,5 +849,71 @@
(is (= (map #(dissoc %1 :db/id) (datomish.simple-schema/simple-schema->schema in)) (is (= (map #(dissoc %1 :db/id) (datomish.simple-schema/simple-schema->schema in))
expected))))) expected)))))
(deftest-db test-lookup-refs conn
(let [{tx0 :tx} (<? (d/<transact! conn test-schema))
{tx1 :tx} (<? (d/<transact! conn [[:db/add 1 :name "Ivan"]
[:db/add 2 :name "Phil"]
[:db/add 3 :name "Petr"]]))]
(testing "Looks up entity refs"
(let [{tx :tx} (<? (d/<transact! conn [[:db/add (d/lookup-ref :name "Ivan") :aka "Devil"]
[:db/add (d/lookup-ref :name "Phil") :email "@1"]]))]
(is (= #{[1 :name "Ivan"]
[2 :name "Phil"]
[3 :name "Petr"]
[1 :aka "Devil"]
[2 :email "@1"]}
(<? (<datoms>= (d/db conn) tx1))))))
(testing "Looks up value refs"
(let [{tx :tx} (<? (d/<transact! conn [[:db/add 1 :friends (d/lookup-ref :name "Petr")]
[:db/add 3 :friends (d/lookup-ref :name "Ivan")]]))]
(is (= #{[1 :friends 3]
[3 :friends 1]}
(<? (<datoms>= (d/db conn) tx))))))
(testing "Looks up entity refs in maps"
(let [{tx :tx} (<? (d/<transact! conn [{:db/id (d/lookup-ref :name "Phil") :friends 1}]))]
(is (= #{[2 :friends 1]}
(<? (<datoms>= (d/db conn) tx))))))
(testing "Looks up value refs in maps"
(let [{tx :tx} (<? (d/<transact! conn [{:db/id 2 :friends (d/lookup-ref :name "Petr")}]))]
(is (= #{[2 :friends 3]}
(<? (<datoms>= (d/db conn) tx))))))
(testing "Looks up value refs in sequences in maps"
(let [{tx :tx} (<? (d/<transact! conn [{:db/id 1 :friends [(d/lookup-ref :name "Ivan") (d/lookup-ref :name "Phil")]}]))]
(is (= #{[1 :friends 1]
[1 :friends 2]}
(<? (<datoms>= (d/db conn) tx))))))
(testing "Looks up refs when there are more than 999 refs (all present)"
(let
[bound (* 999 2)
make-add #(vector :db/add (+ 1000 %) :name (str "Ivan-" %))
make-ref #(-> {:db/id (d/lookup-ref :name (str "Ivan-" %)) :email (str "Ivan-" % "@" %)})
{tx-data1 :tx-data} (<? (d/<transact! conn (map make-add (range bound))))
{tx-data2 :tx-data} (<? (d/<transact! conn (map make-ref (range bound))))]
(is (= bound (dec (count tx-data1)))) ;; Each :name is new; dec to account for :db/tx.
(is (= bound (dec (count tx-data2)))) ;; Each lookup-ref exists, each :email is new; dec for :db/tx.
))
(testing "Fails for missing entities"
(is (thrown-with-msg?
ExceptionInfo #"No entity found for lookup-ref"
(<? (d/<transact! conn [[:db/add (d/lookup-ref :name "Mysterioso") :aka "The Magician"]]))))
(is (thrown-with-msg?
ExceptionInfo #"No entity found for lookup-ref"
(<? (d/<transact! conn [[:db/add 1 :friends (d/lookup-ref :name "Mysterioso")]])))))
(testing "Fails for non-identity attributes"
(is (thrown-with-msg?
ExceptionInfo #"Lookup-ref found with non-unique-identity attribute"
(<? (d/<transact! conn [[:db/add (d/lookup-ref :aka "The Magician") :email "@2"]]))))
(is (thrown-with-msg?
ExceptionInfo #"Lookup-ref found with non-unique-identity attribute"
(<? (d/<transact! conn [[:db/add 1 :friends (d/lookup-ref :aka "The Magician")]])))))))
#_ (time (t/run-tests)) #_ (time (t/run-tests))
#_ (time (clojure.test/test-vars [#'test-lookup-refs]))