Support :db/fulltext true.

Internally, we use SQLite's FTS4 to maintain a fulltext_values table of
unique "text" values.  Fulltext indexed datoms have value v that is the
rowid into fulltext_values.  We manually maintain the map between rowid
and value in the transactor.

For convenience, we expose two views interpolating the real text values
into the datoms structure.
This commit is contained in:
Nick Alexander 2016-08-03 15:42:04 -07:00
parent 13f33a4915
commit f25838a1eb
4 changed files with 160 additions and 14 deletions

View file

@ -80,6 +80,24 @@
v (:v row)]
(Datom. e a (ds/<-SQLite schema a v) (:tx row) (and (some? (:added row)) (not= 0 (:added row))))))
(defn- <insert-fulltext-value [db value]
(go-pair
;; This dance is necessary to keep fulltext_values.text unique. We want uniqueness so that we
;; can work with string values, maintaining consistency throughout the transactor
;; implementation. (Without this, we'd need to handle a [rowid text] pair specially when
;; comparing in the transactor.) Unfortunately, it's not possible to declare a unique
;; constraint on a virtual table, including an FTS table. External content tables (see
;; http://www.sqlite.org/fts3.html#section_6_2_2) don't appear to address our use case, so we
;; maintain uniqueness ourselves.
(let [rowid
(if-let [row (first (<? (s/all-rows (:sqlite-connection db) ["SELECT rowid FROM fulltext_values WHERE text = ?" value])))]
(:rowid row)
(do
(<? (s/execute! (:sqlite-connection db) ["INSERT INTO fulltext_values VALUES (?)" value]))
(:rowid (first (<? (s/all-rows (:sqlite-connection db) ["SELECT last_insert_rowid() AS rowid"]))))))
]
rowid)))
(defrecord DB [sqlite-connection schema idents current-tx]
;; idents is map {ident -> entid} of known idents. See http://docs.datomic.com/identity.html#idents.
IDB
@ -100,7 +118,7 @@
(go-pair
(->>
{:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns.
:from [:datoms]
:from [:all_datoms]
:where (cons :and (map #(vector := %1 %2) [:e :a :v :tx] (take-while (comp not nil?) [e a v tx])))} ;; Must drop nils.
(sql/format)
@ -115,7 +133,7 @@
(go-pair
(->>
{:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns.
:from [:datoms]
:from [:all_datoms]
:where [:and [:= :a a] [:= :v v] [:= :index_avet 1]]}
(sql/format)
@ -130,22 +148,31 @@
;; TODO: batch insert, batch delete.
(doseq [datom datoms]
(let [[e a v tx added] datom
v (ds/->SQLite (.-schema db) a v)] ;; TODO: understand why (schema db) fails.
schema (.-schema db) ;; TODO: understand why (schema db) fails.
v (ds/->SQLite schema a v)
fulltext? (ds/fulltext? schema a)]
;; Append to transaction log.
(<? (exec
["INSERT INTO transactions VALUES (?, ?, ?, ?, ?)" e a v tx (if added 1 0)]))
;; Update materialized datom view.
(if (.-added datom)
(<? (exec
["INSERT INTO datoms VALUES (?, ?, ?, ?, ?, ?, ?, ?)" e a v tx
(ds/indexing? (.-schema db) a) ;; index_avet
(ds/ref? (.-schema db) a) ;; index_vaet
(ds/unique-value? (.-schema db) a) ;; unique_value
(ds/unique-identity? (.-schema db) a) ;; unique_identity
]))
(<? (exec
;; TODO: verify this is correct.
["DELETE FROM datoms WHERE (e = ? AND a = ? AND v = ?)" e a v]))))))
(let [v (if fulltext?
(<? (<insert-fulltext-value db v))
v)]
(<? (exec
["INSERT INTO datoms VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)" e a v tx
(ds/indexing? schema a) ;; index_avet
(ds/ref? schema a) ;; index_vaet
fulltext? ;; index_fulltext
(ds/unique-value? schema a) ;; unique_value
(ds/unique-identity? schema a) ;; unique_identity
])))
(if fulltext?
(<? (exec
;; TODO: in the future, purge fulltext values from the fulltext_datoms table.
["DELETE FROM datoms WHERE (e = ? AND a = ? AND v IN (SELECT rowid FROM fulltext_values WHERE text = ?))" e a v]))
(<? (exec
["DELETE FROM datoms WHERE (e = ? AND a = ? AND v = ?)" e a v])))))))
db))
(<advance-tx [db]

View file

@ -36,6 +36,10 @@
:cljs [^boolean indexing?]) [schema attr]
(is-attr? schema attr :db/index))
(defn #?@(:clj [^Boolean fulltext?]
:cljs [^boolean fulltext?]) [schema attr]
(is-attr? schema attr :db/fulltext))
(defn #?@(:clj [^Boolean unique?]
:cljs [^boolean unique?]) [schema attr]
(is-attr? schema attr :db/unique))
@ -64,6 +68,7 @@
(= v :db.unique/identity) [:db/unique :db.unique/identity :db/index]
(= v :db.unique/value) [:db/unique :db.unique/value :db/index]
(= [k v] [:db/index true]) [:db/index]
(= [k v] [:db/fulltext true]) [:db/fulltext :db/index]
(= k :db/valueType) [v]))
(defn- multimap [e m]

View file

@ -20,6 +20,7 @@
(def v1-statements
["CREATE TABLE datoms (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL,
index_avet TINYINT NOT NULL DEFAULT 0, index_vaet TINYINT NOT NULL DEFAULT 0,
index_fulltext TINYINT NOT NULL DEFAULT 0,
unique_value TINYINT NOT NULL DEFAULT 0, unique_identity TINYINT NOT NULL DEFAULT 0)"
"CREATE INDEX eavt ON datoms (e, a)" ;; No v -- that's an opt-in index.
"CREATE INDEX aevt ON datoms (a, e)" ;; No v -- that's an opt-in index.
@ -35,9 +36,37 @@
;; validate the transactor implementation.
"CREATE UNIQUE INDEX unique_identity ON datoms (a, v) WHERE unique_identity = 1"
"CREATE INDEX fulltext ON datoms (v) WHERE index_fulltext = 1"
"CREATE TABLE transactions (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL, added TINYINT NOT NULL DEFAULT 1)"
"CREATE INDEX tx ON transactions (tx)"
;; Fulltext indexing.
;; A fulltext indexed value v is an integer rowid referencing fulltext_values.
;; Optional settings:
;; tokenize="porter"
;; prefix='2,3'
;; By default we use Unicode-aware tokenizing (particularly for case folding), but preserve
;; diacritics.
"CREATE VIRTUAL TABLE fulltext_values
USING FTS4 (text NOT NULL, tokenize=unicode61 \"remove_diacritics=0\")"
;; A view transparently interpolating fulltext indexed values into the datom structure.
"CREATE VIEW fulltext_datoms AS
SELECT e, a, fulltext_values.text AS v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity
FROM datoms JOIN fulltext_values ON datoms.v = fulltext_values.rowid
WHERE datoms.index_fulltext = 1"
;; A view transparently interpolating all entities (fulltext and non-fulltext) into the datom structure.
"CREATE VIEW all_datoms AS
SELECT e, a, v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity
FROM datoms
WHERE index_fulltext != 1
UNION ALL
SELECT e, a, v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity
FROM fulltext_datoms"
;; Materialized views of the schema.
"CREATE TABLE idents (ident TEXT NOT NULL PRIMARY KEY, entid INTEGER UNIQUE NOT NULL)"
"CREATE TABLE schema (ident TEXT NOT NULL, attr TEXT NOT NULL, value TEXT NOT NULL, FOREIGN KEY (ident) REFERENCES idents (ident))"
@ -49,7 +78,10 @@
(->>
#(go-pair
(doseq [statement v1-statements]
(<? (s/execute! db [statement])))
(try
(<? (s/execute! db [statement]))
(catch #?(:clj Throwable :cljs js/Error) e
(throw (ex-info "Failed to execute statement" {:statement statement} e)))))
(<? (s/set-user-version db current-version))
(<? (s/get-user-version db)))
(s/in-transaction! db)))

View file

@ -69,6 +69,13 @@
(defn- <transactions [db]
(<transactions-after db 0))
(defn- <fulltext-values [db]
(go-pair
(->>
(s/all-rows (:sqlite-connection db) ["SELECT rowid, text FROM fulltext_values"])
(<?)
(mapv #(vector (:rowid %) (:text %))))))
(defn tx [report]
(get-in report [:db-after :current-tx]))
@ -461,3 +468,78 @@
(finally
(<? (dm/close-db db)))))))
(deftest-async test-fulltext
(with-tempfile [t (tempfile)]
(let [c (<? (s/<sqlite-connection t))
db (<? (dm/<db-with-sqlite-connection c test-schema))
conn (dm/connection-with-db db)
now 0xdeadbeef
schema [{:db/id (dm/id-literal :db.part/db -1)
:db/ident :test/fulltext
:db/valueType :db.type/string
:db/fulltext true
:db/unique :db.unique/identity}
{:db/id :db.part/db :db.install/attribute (dm/id-literal :db.part/db -1)}
{:db/id (dm/id-literal :db.part/db -2)
:db/ident :test/other
:db/valueType :db.type/string
:db/fulltext true
:db/cardinality :db.cardinality/one}
{:db/id :db.part/db :db.install/attribute (dm/id-literal :db.part/db -2)}
]
tx0 (tx (<? (dm/<transact! conn schema now)))]
(try
(testing "Can add fulltext indexed datoms"
(let [r (<? (dm/<transact! conn [[:db/add 101 :test/fulltext "test this"]] now))]
(is (= (<? (<fulltext-values (dm/db conn)))
[[1 "test this"]]))
(is (= (<? (<datoms-after (dm/db conn) tx0))
#{[101 :test/fulltext 1]})) ;; Values are raw; 1 is the rowid into fulltext_values.
))
(testing "Can replace fulltext indexed datoms"
(let [r (<? (dm/<transact! conn [[:db/add 101 :test/fulltext "alternate thing"]] now))]
(is (= (<? (<fulltext-values (dm/db conn)))
[[1 "test this"]
[2 "alternate thing"]]))
(is (= (<? (<datoms-after (dm/db conn) tx0))
#{[101 :test/fulltext 2]})) ;; Values are raw; 2 is the rowid into fulltext_values.
))
(testing "Can upsert keyed by fulltext indexed datoms"
(let [r (<? (dm/<transact! conn [{:db/id (dm/id-literal :db.part/user) :test/fulltext "alternate thing" :test/other "other"}] now))]
(is (= (<? (<fulltext-values (dm/db conn)))
[[1 "test this"]
[2 "alternate thing"]
[3 "other"]]))
(is (= (<? (<datoms-after (dm/db conn) tx0))
#{[101 :test/fulltext 2] ;; Values are raw; 2, 3 are the rowids into fulltext_values.
[101 :test/other 3]}))
))
(testing "Can re-use fulltext indexed datoms"
(let [r (<? (dm/<transact! conn [[:db/add 102 :test/other "test this"]] now))]
(is (= (<? (<fulltext-values (dm/db conn)))
[[1 "test this"]
[2 "alternate thing"]
[3 "other"]]))
(is (= (<? (<datoms-after (dm/db conn) tx0))
#{[101 :test/fulltext 2]
[101 :test/other 3]
[102 :test/other 1]})) ;; Values are raw; 1, 2, 3 are the rowids into fulltext_values.
))
(testing "Can retract fulltext indexed datoms"
(let [r (<? (dm/<transact! conn [[:db/retract 101 :test/fulltext "alternate thing"]] now))]
(is (= (<? (<fulltext-values (dm/db conn)))
[[1 "test this"]
[2 "alternate thing"]
[3 "other"]]))
(is (= (<? (<datoms-after (dm/db conn) tx0))
#{[101 :test/other 3]
[102 :test/other 1]})) ;; Values are raw; 1, 3 are the rowids into fulltext_values.
))
(finally
(<? (dm/close-db db)))))))