From 71446a3af50a7d677e41770224674b54355a2f88 Mon Sep 17 00:00:00 2001 From: Nick Alexander Date: Mon, 8 Aug 2016 23:30:45 -0700 Subject: [PATCH] Completely rewrite main transaction logic to be faster. This is almost complete; it passes the test suite save for retracting fulltext datoms correctly. There's a lot to say about this approach, but I don't have time to give too many details. The broad outline is as follows. We collect datoms to add and retract in a tx_lookup table. Depending on flags ("search value" sv and "search value type tag" svalue_type_tag) we "complete" the tx_lookup table by joining matching datoms. This allows us to find datoms that are present (and should not be added as part of the transaction, or should be retracted as part of the transaction, or should be replaced as part of the transaction. We complete the tx_lookup (in place!) in two separate INSERTs to avoid a quadratic two-table walk (explain the queries to observe that both INSERTs walk the lookup table once and then use the datoms indexes to complete the matching values). We could simplify the code by using multiple lookup tables, both for the two cases of search parameters (eav vs. ea) and for the incomplete and completed rows. Right now we differentiate the former with NULL checks, and the latter by incrementing the added0 column. It performs well enough, so I haven't tried to understand the performance of separating these things. After the tx_lookup table is completed, we build the transaction from it; and update the datoms materialized view table as well. Observe the careful handling of the "search value" sv parameters to handle replacing :db.cardinality/one datoms. Finally, we read the processed transaction back to produce to the API. This is strictly to match the Datomic API; we might make allow to skip this, since many consumers will not want to stream this over the wire. Rough timings show the transactor processing a single >50k datom transaction in about 3.5s, of which less than 0.5s is spent in the expensive joins. Further, repeating the processing of the same transaction is only about 3.5s again! That's the worst possible for the joins, since every single inserted datom will already be present in the database, making the most expensive join match every row. --- project.clj | 1 + src/datomish/db.cljc | 206 +++++++++++++++++++++----------- src/datomish/sqlite_schema.cljc | 24 +++- src/datomish/transact.cljc | 156 ++++++------------------ test/datomish/db_test.cljc | 58 +++++---- 5 files changed, 231 insertions(+), 214 deletions(-) diff --git a/project.clj b/project.clj index 18cfb7c5..7d62c1b5 100644 --- a/project.clj +++ b/project.clj @@ -9,6 +9,7 @@ [datascript "0.15.1"] [honeysql "0.8.0"] [com.datomic/datomic-free "0.9.5359"] + [com.taoensso/tufte "1.0.2"] [jamesmacaulay/cljs-promises "0.1.0"]] :cljsbuild {:builds {:release { diff --git a/src/datomish/db.cljc b/src/datomish/db.cljc index 8762c448..3b59a884 100644 --- a/src/datomish/db.cljc +++ b/src/datomish/db.cljc @@ -14,10 +14,13 @@ [datomish.query.source :as source] [datomish.query :as query] [datomish.datom :as dd :refer [datom datom? #?@(:cljs [Datom])]] - [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]] + [datomish.util :as util + #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]] [datomish.schema :as ds] [datomish.sqlite :as s] [datomish.sqlite-schema :as sqlite-schema] + [taoensso.tufte :as tufte + #?(:cljs :refer-macros :clj :refer) [defnp p profiled profile]] #?@(:clj [[datomish.pair-chan :refer [go-pair !]]]) #?@(:cljs [[datomish.pair-chan] @@ -91,19 +94,13 @@ (> - {:select [:e :a :v :tx [1 :added]] - :from [:all_datoms] - :where [:and [:= :e e] [:= :a a]]} - (s/format) ;; TODO: format these statements only once. - - (s/all-rows (:sqlite-connection db)) - (Datom (.-schema db)))))) - - (SQLite schema a v)] - (go-pair - (->> - {:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns. - :from [:all_datoms] - :where [:and [:= :e e] [:= :a a] [:= :value_type_tag tag] [:= :v v]]} - (s/format) ;; TODO: format these statements only once. - - (s/all-rows (:sqlite-connection db)) - (Datom (.-schema db))))))) ;; TODO: understand why (schema db) fails. - (SQLite schema a v)] (go-pair @@ -246,39 +215,6 @@ (mapv (partial row->Datom (.-schema db))))))) ;; TODO: understand why (schema db) fails. - (SQLite schema a v) - fulltext? (ds/fulltext? schema a)] - ;; Append to transaction log. - (SQLite schema a v) + fulltext? (ds/fulltext? schema a)] + (cond + (= op :db/add) + (let [v (if fulltext? + (> + (s/all-rows (:sqlite-connection db) ["SELECT * FROM transactions WHERE tx = ?" tx]) + (Datom schema))))] + tx-data)))) + (!]]]) #?@(:cljs [[datomish.pair-chan] @@ -334,124 +337,35 @@ (ds/ensure-valid-value schema a v))) report)) -(defn- datom. - (doseq [[e a v tx added :as datom] (:tx-data report)] - - (when added - ;; Check for violated :db/unique constraint between datom and existing store. - (when (ds/unique? schema a) - (when-let [found (first (tx-data [db report] - {:pre [(db/db? db) (report? report)]} - (go-pair - (let [initial-report report - {tx :tx} report - schema (db/schema db)] - (loop [report initial-report - es (:entities initial-report)] - (let [[[op e a v :as entity] & entities] es] - (cond - (nil? entity) - report - - (= op :db/add) - (if (ds/multival? schema a) - (if (empty? ( report - (transact-report (datom e a (.-v old-datom) tx false)) - (transact-report (datom e a v tx true))) - entities)) - (recur (transact-report report (datom e a v tx true)) entities))) - - (= op :db/retract) - (if (first (> - report - (preprocess db) + (let [> + report + (preprocess db) - (tx-data db) - ( - db + (collect-db-install-assertions db) + (p :collect-db-install-assertions)) - (db/ + db - (db/> (p :apply-db-ident-assertions)) - (db/> (p :apply-db-install-assertions))) + ] (-> report (assoc-in [:db-after] db-after))))) diff --git a/test/datomish/db_test.cljc b/test/datomish/db_test.cljc index 2433fd1d..8f875dcd 100644 --- a/test/datomish/db_test.cljc +++ b/test/datomish/db_test.cljc @@ -412,32 +412,48 @@ tx0 (:tx (