Support :db/fulltext true.
Internally, we use SQLite's FTS4 to maintain a fulltext_values table of unique "text" values. Fulltext indexed datoms have value v that is the rowid into fulltext_values. We manually maintain the map between rowid and value in the transactor. For convenience, we expose two views interpolating the real text values into the datoms structure.
This commit is contained in:
parent
13f33a4915
commit
f25838a1eb
4 changed files with 160 additions and 14 deletions
|
@ -80,6 +80,24 @@
|
||||||
v (:v row)]
|
v (:v row)]
|
||||||
(Datom. e a (ds/<-SQLite schema a v) (:tx row) (and (some? (:added row)) (not= 0 (:added row))))))
|
(Datom. e a (ds/<-SQLite schema a v) (:tx row) (and (some? (:added row)) (not= 0 (:added row))))))
|
||||||
|
|
||||||
|
(defn- <insert-fulltext-value [db value]
|
||||||
|
(go-pair
|
||||||
|
;; This dance is necessary to keep fulltext_values.text unique. We want uniqueness so that we
|
||||||
|
;; can work with string values, maintaining consistency throughout the transactor
|
||||||
|
;; implementation. (Without this, we'd need to handle a [rowid text] pair specially when
|
||||||
|
;; comparing in the transactor.) Unfortunately, it's not possible to declare a unique
|
||||||
|
;; constraint on a virtual table, including an FTS table. External content tables (see
|
||||||
|
;; http://www.sqlite.org/fts3.html#section_6_2_2) don't appear to address our use case, so we
|
||||||
|
;; maintain uniqueness ourselves.
|
||||||
|
(let [rowid
|
||||||
|
(if-let [row (first (<? (s/all-rows (:sqlite-connection db) ["SELECT rowid FROM fulltext_values WHERE text = ?" value])))]
|
||||||
|
(:rowid row)
|
||||||
|
(do
|
||||||
|
(<? (s/execute! (:sqlite-connection db) ["INSERT INTO fulltext_values VALUES (?)" value]))
|
||||||
|
(:rowid (first (<? (s/all-rows (:sqlite-connection db) ["SELECT last_insert_rowid() AS rowid"]))))))
|
||||||
|
]
|
||||||
|
rowid)))
|
||||||
|
|
||||||
(defrecord DB [sqlite-connection schema idents current-tx]
|
(defrecord DB [sqlite-connection schema idents current-tx]
|
||||||
;; idents is map {ident -> entid} of known idents. See http://docs.datomic.com/identity.html#idents.
|
;; idents is map {ident -> entid} of known idents. See http://docs.datomic.com/identity.html#idents.
|
||||||
IDB
|
IDB
|
||||||
|
@ -100,7 +118,7 @@
|
||||||
(go-pair
|
(go-pair
|
||||||
(->>
|
(->>
|
||||||
{:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns.
|
{:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns.
|
||||||
:from [:datoms]
|
:from [:all_datoms]
|
||||||
:where (cons :and (map #(vector := %1 %2) [:e :a :v :tx] (take-while (comp not nil?) [e a v tx])))} ;; Must drop nils.
|
:where (cons :and (map #(vector := %1 %2) [:e :a :v :tx] (take-while (comp not nil?) [e a v tx])))} ;; Must drop nils.
|
||||||
(sql/format)
|
(sql/format)
|
||||||
|
|
||||||
|
@ -115,7 +133,7 @@
|
||||||
(go-pair
|
(go-pair
|
||||||
(->>
|
(->>
|
||||||
{:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns.
|
{:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns.
|
||||||
:from [:datoms]
|
:from [:all_datoms]
|
||||||
:where [:and [:= :a a] [:= :v v] [:= :index_avet 1]]}
|
:where [:and [:= :a a] [:= :v v] [:= :index_avet 1]]}
|
||||||
(sql/format)
|
(sql/format)
|
||||||
|
|
||||||
|
@ -130,22 +148,31 @@
|
||||||
;; TODO: batch insert, batch delete.
|
;; TODO: batch insert, batch delete.
|
||||||
(doseq [datom datoms]
|
(doseq [datom datoms]
|
||||||
(let [[e a v tx added] datom
|
(let [[e a v tx added] datom
|
||||||
v (ds/->SQLite (.-schema db) a v)] ;; TODO: understand why (schema db) fails.
|
schema (.-schema db) ;; TODO: understand why (schema db) fails.
|
||||||
|
v (ds/->SQLite schema a v)
|
||||||
|
fulltext? (ds/fulltext? schema a)]
|
||||||
;; Append to transaction log.
|
;; Append to transaction log.
|
||||||
(<? (exec
|
(<? (exec
|
||||||
["INSERT INTO transactions VALUES (?, ?, ?, ?, ?)" e a v tx (if added 1 0)]))
|
["INSERT INTO transactions VALUES (?, ?, ?, ?, ?)" e a v tx (if added 1 0)]))
|
||||||
;; Update materialized datom view.
|
;; Update materialized datom view.
|
||||||
(if (.-added datom)
|
(if (.-added datom)
|
||||||
|
(let [v (if fulltext?
|
||||||
|
(<? (<insert-fulltext-value db v))
|
||||||
|
v)]
|
||||||
(<? (exec
|
(<? (exec
|
||||||
["INSERT INTO datoms VALUES (?, ?, ?, ?, ?, ?, ?, ?)" e a v tx
|
["INSERT INTO datoms VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)" e a v tx
|
||||||
(ds/indexing? (.-schema db) a) ;; index_avet
|
(ds/indexing? schema a) ;; index_avet
|
||||||
(ds/ref? (.-schema db) a) ;; index_vaet
|
(ds/ref? schema a) ;; index_vaet
|
||||||
(ds/unique-value? (.-schema db) a) ;; unique_value
|
fulltext? ;; index_fulltext
|
||||||
(ds/unique-identity? (.-schema db) a) ;; unique_identity
|
(ds/unique-value? schema a) ;; unique_value
|
||||||
]))
|
(ds/unique-identity? schema a) ;; unique_identity
|
||||||
|
])))
|
||||||
|
(if fulltext?
|
||||||
(<? (exec
|
(<? (exec
|
||||||
;; TODO: verify this is correct.
|
;; TODO: in the future, purge fulltext values from the fulltext_datoms table.
|
||||||
["DELETE FROM datoms WHERE (e = ? AND a = ? AND v = ?)" e a v]))))))
|
["DELETE FROM datoms WHERE (e = ? AND a = ? AND v IN (SELECT rowid FROM fulltext_values WHERE text = ?))" e a v]))
|
||||||
|
(<? (exec
|
||||||
|
["DELETE FROM datoms WHERE (e = ? AND a = ? AND v = ?)" e a v])))))))
|
||||||
db))
|
db))
|
||||||
|
|
||||||
(<advance-tx [db]
|
(<advance-tx [db]
|
||||||
|
|
|
@ -36,6 +36,10 @@
|
||||||
:cljs [^boolean indexing?]) [schema attr]
|
:cljs [^boolean indexing?]) [schema attr]
|
||||||
(is-attr? schema attr :db/index))
|
(is-attr? schema attr :db/index))
|
||||||
|
|
||||||
|
(defn #?@(:clj [^Boolean fulltext?]
|
||||||
|
:cljs [^boolean fulltext?]) [schema attr]
|
||||||
|
(is-attr? schema attr :db/fulltext))
|
||||||
|
|
||||||
(defn #?@(:clj [^Boolean unique?]
|
(defn #?@(:clj [^Boolean unique?]
|
||||||
:cljs [^boolean unique?]) [schema attr]
|
:cljs [^boolean unique?]) [schema attr]
|
||||||
(is-attr? schema attr :db/unique))
|
(is-attr? schema attr :db/unique))
|
||||||
|
@ -64,6 +68,7 @@
|
||||||
(= v :db.unique/identity) [:db/unique :db.unique/identity :db/index]
|
(= v :db.unique/identity) [:db/unique :db.unique/identity :db/index]
|
||||||
(= v :db.unique/value) [:db/unique :db.unique/value :db/index]
|
(= v :db.unique/value) [:db/unique :db.unique/value :db/index]
|
||||||
(= [k v] [:db/index true]) [:db/index]
|
(= [k v] [:db/index true]) [:db/index]
|
||||||
|
(= [k v] [:db/fulltext true]) [:db/fulltext :db/index]
|
||||||
(= k :db/valueType) [v]))
|
(= k :db/valueType) [v]))
|
||||||
|
|
||||||
(defn- multimap [e m]
|
(defn- multimap [e m]
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
(def v1-statements
|
(def v1-statements
|
||||||
["CREATE TABLE datoms (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL,
|
["CREATE TABLE datoms (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL,
|
||||||
index_avet TINYINT NOT NULL DEFAULT 0, index_vaet TINYINT NOT NULL DEFAULT 0,
|
index_avet TINYINT NOT NULL DEFAULT 0, index_vaet TINYINT NOT NULL DEFAULT 0,
|
||||||
|
index_fulltext TINYINT NOT NULL DEFAULT 0,
|
||||||
unique_value TINYINT NOT NULL DEFAULT 0, unique_identity TINYINT NOT NULL DEFAULT 0)"
|
unique_value TINYINT NOT NULL DEFAULT 0, unique_identity TINYINT NOT NULL DEFAULT 0)"
|
||||||
"CREATE INDEX eavt ON datoms (e, a)" ;; No v -- that's an opt-in index.
|
"CREATE INDEX eavt ON datoms (e, a)" ;; No v -- that's an opt-in index.
|
||||||
"CREATE INDEX aevt ON datoms (a, e)" ;; No v -- that's an opt-in index.
|
"CREATE INDEX aevt ON datoms (a, e)" ;; No v -- that's an opt-in index.
|
||||||
|
@ -35,9 +36,37 @@
|
||||||
;; validate the transactor implementation.
|
;; validate the transactor implementation.
|
||||||
"CREATE UNIQUE INDEX unique_identity ON datoms (a, v) WHERE unique_identity = 1"
|
"CREATE UNIQUE INDEX unique_identity ON datoms (a, v) WHERE unique_identity = 1"
|
||||||
|
|
||||||
|
"CREATE INDEX fulltext ON datoms (v) WHERE index_fulltext = 1"
|
||||||
|
|
||||||
"CREATE TABLE transactions (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL, added TINYINT NOT NULL DEFAULT 1)"
|
"CREATE TABLE transactions (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL, added TINYINT NOT NULL DEFAULT 1)"
|
||||||
"CREATE INDEX tx ON transactions (tx)"
|
"CREATE INDEX tx ON transactions (tx)"
|
||||||
|
|
||||||
|
;; Fulltext indexing.
|
||||||
|
;; A fulltext indexed value v is an integer rowid referencing fulltext_values.
|
||||||
|
|
||||||
|
;; Optional settings:
|
||||||
|
;; tokenize="porter"
|
||||||
|
;; prefix='2,3'
|
||||||
|
;; By default we use Unicode-aware tokenizing (particularly for case folding), but preserve
|
||||||
|
;; diacritics.
|
||||||
|
"CREATE VIRTUAL TABLE fulltext_values
|
||||||
|
USING FTS4 (text NOT NULL, tokenize=unicode61 \"remove_diacritics=0\")"
|
||||||
|
|
||||||
|
;; A view transparently interpolating fulltext indexed values into the datom structure.
|
||||||
|
"CREATE VIEW fulltext_datoms AS
|
||||||
|
SELECT e, a, fulltext_values.text AS v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity
|
||||||
|
FROM datoms JOIN fulltext_values ON datoms.v = fulltext_values.rowid
|
||||||
|
WHERE datoms.index_fulltext = 1"
|
||||||
|
|
||||||
|
;; A view transparently interpolating all entities (fulltext and non-fulltext) into the datom structure.
|
||||||
|
"CREATE VIEW all_datoms AS
|
||||||
|
SELECT e, a, v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity
|
||||||
|
FROM datoms
|
||||||
|
WHERE index_fulltext != 1
|
||||||
|
UNION ALL
|
||||||
|
SELECT e, a, v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity
|
||||||
|
FROM fulltext_datoms"
|
||||||
|
|
||||||
;; Materialized views of the schema.
|
;; Materialized views of the schema.
|
||||||
"CREATE TABLE idents (ident TEXT NOT NULL PRIMARY KEY, entid INTEGER UNIQUE NOT NULL)"
|
"CREATE TABLE idents (ident TEXT NOT NULL PRIMARY KEY, entid INTEGER UNIQUE NOT NULL)"
|
||||||
"CREATE TABLE schema (ident TEXT NOT NULL, attr TEXT NOT NULL, value TEXT NOT NULL, FOREIGN KEY (ident) REFERENCES idents (ident))"
|
"CREATE TABLE schema (ident TEXT NOT NULL, attr TEXT NOT NULL, value TEXT NOT NULL, FOREIGN KEY (ident) REFERENCES idents (ident))"
|
||||||
|
@ -49,7 +78,10 @@
|
||||||
(->>
|
(->>
|
||||||
#(go-pair
|
#(go-pair
|
||||||
(doseq [statement v1-statements]
|
(doseq [statement v1-statements]
|
||||||
(<? (s/execute! db [statement])))
|
(try
|
||||||
|
(<? (s/execute! db [statement]))
|
||||||
|
(catch #?(:clj Throwable :cljs js/Error) e
|
||||||
|
(throw (ex-info "Failed to execute statement" {:statement statement} e)))))
|
||||||
(<? (s/set-user-version db current-version))
|
(<? (s/set-user-version db current-version))
|
||||||
(<? (s/get-user-version db)))
|
(<? (s/get-user-version db)))
|
||||||
(s/in-transaction! db)))
|
(s/in-transaction! db)))
|
||||||
|
|
|
@ -69,6 +69,13 @@
|
||||||
(defn- <transactions [db]
|
(defn- <transactions [db]
|
||||||
(<transactions-after db 0))
|
(<transactions-after db 0))
|
||||||
|
|
||||||
|
(defn- <fulltext-values [db]
|
||||||
|
(go-pair
|
||||||
|
(->>
|
||||||
|
(s/all-rows (:sqlite-connection db) ["SELECT rowid, text FROM fulltext_values"])
|
||||||
|
(<?)
|
||||||
|
(mapv #(vector (:rowid %) (:text %))))))
|
||||||
|
|
||||||
(defn tx [report]
|
(defn tx [report]
|
||||||
(get-in report [:db-after :current-tx]))
|
(get-in report [:db-after :current-tx]))
|
||||||
|
|
||||||
|
@ -461,3 +468,78 @@
|
||||||
|
|
||||||
(finally
|
(finally
|
||||||
(<? (dm/close-db db)))))))
|
(<? (dm/close-db db)))))))
|
||||||
|
|
||||||
|
(deftest-async test-fulltext
|
||||||
|
(with-tempfile [t (tempfile)]
|
||||||
|
(let [c (<? (s/<sqlite-connection t))
|
||||||
|
db (<? (dm/<db-with-sqlite-connection c test-schema))
|
||||||
|
conn (dm/connection-with-db db)
|
||||||
|
now 0xdeadbeef
|
||||||
|
schema [{:db/id (dm/id-literal :db.part/db -1)
|
||||||
|
:db/ident :test/fulltext
|
||||||
|
:db/valueType :db.type/string
|
||||||
|
:db/fulltext true
|
||||||
|
:db/unique :db.unique/identity}
|
||||||
|
{:db/id :db.part/db :db.install/attribute (dm/id-literal :db.part/db -1)}
|
||||||
|
{:db/id (dm/id-literal :db.part/db -2)
|
||||||
|
:db/ident :test/other
|
||||||
|
:db/valueType :db.type/string
|
||||||
|
:db/fulltext true
|
||||||
|
:db/cardinality :db.cardinality/one}
|
||||||
|
{:db/id :db.part/db :db.install/attribute (dm/id-literal :db.part/db -2)}
|
||||||
|
]
|
||||||
|
tx0 (tx (<? (dm/<transact! conn schema now)))]
|
||||||
|
(try
|
||||||
|
(testing "Can add fulltext indexed datoms"
|
||||||
|
(let [r (<? (dm/<transact! conn [[:db/add 101 :test/fulltext "test this"]] now))]
|
||||||
|
(is (= (<? (<fulltext-values (dm/db conn)))
|
||||||
|
[[1 "test this"]]))
|
||||||
|
(is (= (<? (<datoms-after (dm/db conn) tx0))
|
||||||
|
#{[101 :test/fulltext 1]})) ;; Values are raw; 1 is the rowid into fulltext_values.
|
||||||
|
))
|
||||||
|
|
||||||
|
(testing "Can replace fulltext indexed datoms"
|
||||||
|
(let [r (<? (dm/<transact! conn [[:db/add 101 :test/fulltext "alternate thing"]] now))]
|
||||||
|
(is (= (<? (<fulltext-values (dm/db conn)))
|
||||||
|
[[1 "test this"]
|
||||||
|
[2 "alternate thing"]]))
|
||||||
|
(is (= (<? (<datoms-after (dm/db conn) tx0))
|
||||||
|
#{[101 :test/fulltext 2]})) ;; Values are raw; 2 is the rowid into fulltext_values.
|
||||||
|
))
|
||||||
|
|
||||||
|
(testing "Can upsert keyed by fulltext indexed datoms"
|
||||||
|
(let [r (<? (dm/<transact! conn [{:db/id (dm/id-literal :db.part/user) :test/fulltext "alternate thing" :test/other "other"}] now))]
|
||||||
|
(is (= (<? (<fulltext-values (dm/db conn)))
|
||||||
|
[[1 "test this"]
|
||||||
|
[2 "alternate thing"]
|
||||||
|
[3 "other"]]))
|
||||||
|
(is (= (<? (<datoms-after (dm/db conn) tx0))
|
||||||
|
#{[101 :test/fulltext 2] ;; Values are raw; 2, 3 are the rowids into fulltext_values.
|
||||||
|
[101 :test/other 3]}))
|
||||||
|
))
|
||||||
|
|
||||||
|
(testing "Can re-use fulltext indexed datoms"
|
||||||
|
(let [r (<? (dm/<transact! conn [[:db/add 102 :test/other "test this"]] now))]
|
||||||
|
(is (= (<? (<fulltext-values (dm/db conn)))
|
||||||
|
[[1 "test this"]
|
||||||
|
[2 "alternate thing"]
|
||||||
|
[3 "other"]]))
|
||||||
|
(is (= (<? (<datoms-after (dm/db conn) tx0))
|
||||||
|
#{[101 :test/fulltext 2]
|
||||||
|
[101 :test/other 3]
|
||||||
|
[102 :test/other 1]})) ;; Values are raw; 1, 2, 3 are the rowids into fulltext_values.
|
||||||
|
))
|
||||||
|
|
||||||
|
(testing "Can retract fulltext indexed datoms"
|
||||||
|
(let [r (<? (dm/<transact! conn [[:db/retract 101 :test/fulltext "alternate thing"]] now))]
|
||||||
|
(is (= (<? (<fulltext-values (dm/db conn)))
|
||||||
|
[[1 "test this"]
|
||||||
|
[2 "alternate thing"]
|
||||||
|
[3 "other"]]))
|
||||||
|
(is (= (<? (<datoms-after (dm/db conn) tx0))
|
||||||
|
#{[101 :test/other 3]
|
||||||
|
[102 :test/other 1]})) ;; Values are raw; 1, 3 are the rowids into fulltext_values.
|
||||||
|
))
|
||||||
|
|
||||||
|
(finally
|
||||||
|
(<? (dm/close-db db)))))))
|
||||||
|
|
Loading…
Reference in a new issue