diff --git a/project.clj b/project.clj index 9468470c..7d62c1b5 100644 --- a/project.clj +++ b/project.clj @@ -8,6 +8,8 @@ [org.clojure/core.async "0.2.385"] [datascript "0.15.1"] [honeysql "0.8.0"] + [com.datomic/datomic-free "0.9.5359"] + [com.taoensso/tufte "1.0.2"] [jamesmacaulay/cljs-promises "0.1.0"]] :cljsbuild {:builds {:release { @@ -48,9 +50,12 @@ [org.clojure/tools.nrepl "0.2.10"] [org.clojure/java.jdbc "0.6.2-alpha1"] [org.xerial/sqlite-jdbc "3.8.11.2"]] + :jvm-opts ["-Xss4m"] :repl-options {:nrepl-middleware [cemerick.piggieback/wrap-cljs-repl]} :plugins [[lein-cljsbuild "1.1.3"] - [lein-doo "0.1.6"]] + [lein-doo "0.1.6"] + [venantius/ultra "0.4.1"] + [com.jakemccrary/lein-test-refresh "0.16.0"]] }} :doo {:build "test"} diff --git a/src/datomish/d.clj b/src/datomish/d.clj new file mode 100644 index 00000000..21587416 --- /dev/null +++ b/src/datomish/d.clj @@ -0,0 +1,196 @@ +(ns datomish.d + (:require [datomic.db :as db])) + +(use '[datomic.api :only [q db] :as d]) +(use 'clojure.pprint) + +(def uri "datomic:free://localhost:4334//news") +(d/create-database uri) +(def conn (d/connect uri)) + +(def schema-tx (read-string (slurp "src/datomish/schema.edn"))) +(println "schema-tx:") +(pprint schema-tx) + + +;; alter/attribute does not retract/assert install/attribute. +;; @(d/transact conn [{:db/id :news/baz +;; :db/cardinality :db.cardinality/one +;; :db.alter/_attribute :db.part/db}]) + +@(d/transact conn schema-tx) + +;; add some data: +(def data-tx [{:news/title "Rain Tomorrow", :db/id #db/id[:db.part/user -1000001]}]) + +@(d/transact conn data-tx) + +(def x + @(d/transact conn [[:db/add #db/id[:db.part/user -1] :news/title "Rain Tomorrow 1" #db/id[:db.part/tx -1]] + [:db/add #db/id[:db.part/user -2] :news/title "Rain Tomorrow 2" #db/id[:db.part/tx -2]] + [:db/add #db/id[:db.part/tx -2] :news/title "Test"]])) + + +;; This drops the tx entirely! +(def x + @(d/transact conn [[:db/add #db/id[:db.part/user -1] :news/title "Rain Tomorrow 3" 13194139534684]])) + +(def x + @(d/transact conn [[:db/add #db/id[:db.part/user -1] :news/title "Rain Tomorrow 3" 13194139534684] + {:db/id #db/id[:db.part/db -1] :db/ident :test/test2 :db.install/_partition :db.part/db} + [:db/add #db/id[:db.part/tx -2] :news/title "Test"]])) + +(def x + @(d/transact conn [[:db/add #db/id[:test/test2 -1] :news/title "Rain Tomorrow 5"]])) + +;; [:db/add #db/id[:db.part/user -2] :news/title "Rain Tomorrow 2" #db/id[:db.part/tx -2]] +;; [:db/add #db/id[:db.part/tx -2] :news/title "Test"]])) + + + + +(def results (q '[:find ?n :where [?n :news/title]] (db conn))) +(println (count results)) +(pprint results) +(pprint (first results)) + +(def id (ffirst results)) +(def entity (-> conn db (d/entity id))) + +;; display the entity map's keys +(pprint (keys entity)) + +;; display the value of the entity's community name +(println (:news/title entity)) + +;; @(d/transact conn [[:db/retract ) +@(d/transact conn [[:db/add 17592186045427 :news/title "Test"]]) + +;; 1. Caused by datomic.impl.Exceptions$IllegalArgumentExceptionInfo +;; :db.error/datoms-conflict Two datoms in the same transaction +;; conflict {:d1 [17592186045427 :news/title "Test" 13194139534372 +;; true], :d2 [17592186045427 :news/title "Test2" 13194139534372 +;; true]} + +;; 1. Caused by datomic.impl.Exceptions$IllegalArgumentExceptionInfo +;; :db.error/not-an-entity Unable to resolve entity: [:news/foobar +;; "a"] in datom [[:news/foobar "a"] :news/foobar "b"] +;; {:db/error :db.error/not-an-entity} +;; @(d/transact conn [[:db/add #db/id[db.part/user -1] :news/foobar "a"] +;; [:db/add #db/id[db.part/user -2] :news/zot "a"]]) + +;; @(d/transact conn [[:db/add #db/id[db.part/user -1] :news/baz #db/id[db.part/user -2]] +;; [:db/add #db/id[db.part/user -2] :news/baz #db/id[db.part/user -1]]]) +;; {:db-before datomic.db.Db@a0166706, :db-after datomic.db.Db@5d0d6a62, :tx-data [#datom[13194139534399 50 #inst "2016-07-20T04:45:19.553-00:00" 13194139534399 true] #datom[17592186045504 68 17592186045505 13194139534399 true] #datom[17592186045505 68 17592186045504 13194139534399 true]], :tempids {-9223350046622220289 17592186045504, -9223350046622220290 17592186045505}} + +;; @(d/transact conn [[:db/add [:news/foobar "a"] :news/foobar "b"]]) + +;; @(d/transact conn [[:db/retract 17592186045505 68 17592186045504 13194139534399]]) + +;; 1. Caused by datomic.impl.Exceptions$IllegalArgumentExceptionInfo +;; :db.error/not-a-data-function Unable to resolve data function: +;; {:news/foobar "a", :news/zot "a", :news/title "test"} +;; {:db/error :db.error/not-a-data-function} + +;; 1. Caused by datomic.impl.Exceptions$IllegalArgumentExceptionInfo +;; :db.error/entity-missing-db-id Missing :db/id +;; {:input {:news/foobar "a", :news/zot "a", :news/title "test"}, :db/error :db.error/entity-missing-db-id} +;; @(d/transact conn [{:news/foobar "a" :news/zot "a" :news/title "test"}]) + +;; ;; 1. Caused by datomic.impl.Exceptions$IllegalStateExceptionInfo +;; ;; :db.error/unique-conflict Unique conflict: :news/zot, value: a +;; ;; already held by: 17592186045489 asserted for: 17592186045483 +;; ;; {:db/error :db.error/unique-conflict} +;; @(d/transact conn [{:db/id [:news/foobar "a"] :news/zot "a" :news/title "test"}]) + +;; ;; 1. Caused by datomic.impl.Exceptions$IllegalStateExceptionInfo +;; ;; :db.error/unique-conflict Unique conflict: :news/zot, value: b +;; ;; already held by: 17592186045492 asserted for: 17592186045483 +;; @(d/transact conn [ +;; {:db/id #db/id[db.part/user -1] :news/foobar "c" :news/zot "c"}]) +;; ;; {:db/id #db/id[db.part/user -1] :news/zot "a"} ]) + +;; ;; 1. Caused by datomic.impl.Exceptions$IllegalStateExceptionInfo +;; ;; :db.error/unique-conflict Unique conflict: :news/foobar, value: b +;; ;; already held by: 17592186045478 asserted for: 1 +;; ;; {:db/error :db.error/unique-conflict} + +;; @(d/transact conn [[:db/add 17592186045478 :news/foobar "b"]]) +;; @(d/transact conn [[:db/add 17592186045478 :news/foobar "a"]]) + +;; ;; Datomic accepts two different id-literals resolving to the same entid. +(def txx #db/id[db.part/tx]) +(def x @(d/transact conn [[:db/add #db/id[db.part/user -1] :news/foobar "c"] + [:db/add #db/id[db.part/user -2] :news/zot "c"] + [:db/add txx :news/title "x"] + [:db/add #db/id[db.part/tx -5] :news/title "x"] + ])) + +(def x @(d/transact conn [[:db/add #db/id[db.part/user -1] :news/baz #db/id[db.part/tx]]])) + +;; ;; 1. Caused by datomic.impl.Exceptions$IllegalArgumentExceptionInfo +;; ;; :db.error/tempid-not-an-entity tempid used only as value in +;; ;; transaction +;; ;; {:db/error :db.error/tempid-not-an-entity} +;; @(d/transact conn [[:db/add #db/id[db.part/user -1] :news/baz #db/id[db.part/user -3]] +;; [:db/add #db/id[db.part/user -2] :news/baz #db/id[db.part/user -3]]]) + +;; ;; 2. Unhandled java.util.concurrent.ExecutionException +;; ;; java.lang.IndexOutOfBoundsException +;; @(d/transact conn [[:db/add #db/id[db.part/user -1] :db/ident :news/zot] +;; [:db/add #db/id[db.part/user -2] #db/id[db.part/user -1] "c"]]) + +(vec + (q '[:find ?added ?a ?v ?tx + :where + [17592186045427 ?a ?v ?tx ?added] + ;; [(>= ?e 17592186045427)] + ;; [(tx-ids ?log ?t1 ?t2) [?tx ...]] + ;; [(tx-data ?log ?tx) [[?e ?a ?v _ ?added]]] + ] + (d/db conn))) +;; [[true 63 "Test" 13194139534324]] + +(vec + (q '[:find ?e ?added ?a ?v ?tx + :in ?log ?t1 ?t2 + :where + [(tx-ids ?log ?t1 ?t2) [?tx ...]] + [(tx-data ?log ?tx) [[?e ?a ?v _ ?added]]] + [(= ?e 17592186045427)] + ] + (d/log conn) #inst "2013-08-01" #inst "2017-01-01")) + +[[17592186045427 false 63 "Rain Tomorrow" 13194139534324] [17592186045427 true 63 "Test" 13194139534324] [17592186045427 false 63 "Test" 13194139534325] [17592186045427 true 63 "Rain Tomorrow" 13194139534322]] +;; [[17592186045427 false 63 "Rain Tomorrow" 13194139534324] [17592186045427 true 63 "Test" 13194139534324] [17592186045427 true 63 "Rain Tomorrow" 13194139534322]] + +@(d/transact conn [[:db/retract 17592186045427 :news/title "Test"]]) + + + +(sort-by first (d/q '[:find ?a ?ident :where + [?e ?a ?ident] + [_ :db.install/attribute ?e]] (db/bootstrap-db))) + +(def x (db/bootstrap-db)) + +(pprint (vec (map (juxt :e :a :v :tx :added) (filter #(= 13194139533312 (:tx %)) (d/datoms x :eavt))))) +(pprint (sort (set (map (juxt :tx) (d/datoms x :eavt))))) + +(def tx0 13194139533312) +(def tx1 13194139533366) +(def tx2 13194139533368) +(def tx3 13194139533375) + +(pprint + (sort-by first (d/q '[:find ?e ?an ?v ?tx + :in $ ?tx + :where + [?e ?a ?v ?tx true] + [?a :db/ident ?an] + ] x tx3))) + + +;; (d/datoms x :eavt)))) + +;; 13194139533312 diff --git a/src/datomish/db.cljc b/src/datomish/db.cljc index 12c4f34e..6608a972 100644 --- a/src/datomish/db.cljc +++ b/src/datomish/db.cljc @@ -8,16 +8,19 @@ [datomish.pair-chan :refer [go-pair !]]]) #?@(:cljs [[datomish.pair-chan] @@ -34,6 +37,9 @@ (uncaughtException [_ thread ex] (println ex "Uncaught exception on" (.getName thread)))))) +(def max-sql-vars 999) ;; TODO: generalize. + + ;; ---------------------------------------------------------------------------- ;; define data-readers to be made available to EDN readers. in CLJS ;; they're magically available. in CLJ, data_readers.clj may or may @@ -55,7 +61,7 @@ (->TempId part idx))) (defn id-literal? [x] - (and (instance? TempId x))) + (instance? TempId x)) (defprotocol IClock (now @@ -74,13 +80,13 @@ [db] "Return the schema of this database.") - (idents - [db] - "Return the known idents of this database, as a map from keyword idents to entids.") + (entid + [db ident] + "Returns the entity id associated with a symbolic keyword, or the id itself if passed.") - (current-tx - [db] - "TODO: document this interface.") + (ident + [db eid] + "Returns the keyword associated with an id, or the key itself if passed.") (in-transaction! [db chan-fn] @@ -88,29 +94,29 @@ commit the transaction; otherwise, rollback the transaction. Returns a pair-chan resolving to the pair-chan returned by `chan-fn`.") - ( entid} of known idents. See http://docs.datomic.com/identity.html#idents. +(defn datoms-attribute-transform + [db x] + {:pre [(db? db)]} + (entid db x)) + +(defn datoms-constant-transform + [db x] + {:pre [(db? db)]} + (sqlite-schema/->SQLite x)) + +(defn datoms-source [db] + (source/map->DatomsSource + {:table :datoms + :schema (:schema db) + :fulltext-table :fulltext_datoms + :fulltext-values :fulltext_values + :fulltext-view :all_datoms + :columns [:e :a :v :tx :added] + :attribute-transform (partial datoms-attribute-transform db) + :constant-transform (partial datoms-constant-transform db) + :table-alias source/gensym-table-alias + :make-constraints nil})) + +(defn- retractions->queries [retractions tx fulltext? ->SQLite] + (let + [f-q + "WITH vv AS (SELECT rowid FROM fulltext_values WHERE text = ?) + INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag) + VALUES (?, ?, (SELECT rowid FROM vv), ?, 0, ?, (SELECT rowid FROM vv), ?)" + + non-f-q + "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, sv, svalue_type_tag) + VALUES (?, ?, ?, ?, 0, ?, ?, ?)"] + (map + (fn [[_ e a v]] + (let [[v tag] (->SQLite a v)] + (if (fulltext? a) + [f-q + v e a tx tag tag] + [non-f-q + e a v tx tag v tag]))) + retractions))) + +(defn- non-fts-many->queries [ops tx ->SQLite indexing? ref? unique?] + (let [q "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " + + values-part + ;; e0, a0, v0, tx0, added0, value_type_tag0 + ;; index_avet0, index_vaet0, index_fulltext0, + ;; unique_value0, sv, svalue_type_tag + "(?, ?, ?, ?, 1, ?, ?, ?, 0, ?, ?, ?)" + + repeater (memoize (fn [n] (interpose ", " (repeat n values-part))))] + + ;; This query takes ten variables per item. So we partition into max-sql-vars / 10. + (map + (fn [chunk] + (cons + ;; Query string. + (apply str q (repeater (count chunk))) + + ;; Bindings. + (mapcat (fn [[_ e a v]] + (let [[v tag] (->SQLite a v)] + [e a v tx tag + (indexing? a) ; index_avet + (ref? a) ; index_vaet + (unique? a) ; unique_value + v tag])) + chunk))) + + (partition-all (quot max-sql-vars 10) ops)))) + +(defn- non-fts-one->queries [ops tx ->SQLite indexing? ref? unique?] + (let [first-values-part + ;; TODO: order value and tag closer together. + ;; flags0 + ;; sv, svalue_type_tag + "(?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?)" + first-repeater (memoize (fn [n] (interpose ", " (repeat n first-values-part)))) + + second-values-part + "(?, ?, ?, ?, ?, ?)" + second-repeater (memoize (fn [n] (interpose ", " (repeat n second-values-part)))) + ] + + ;; :db.cardinality/one takes two queries. + (mapcat + (fn [chunk] + [(cons + (apply + str + "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " + (first-repeater (count chunk))) + (mapcat (fn [[_ e a v]] + (let [[v tag] (->SQLite a v)] + [e a v tx 1 tag + (indexing? a) ; index_avet + (ref? a) ; index_vaet + (unique? a) ; unique_value + v tag])) + chunk)) + + (cons + (apply + str + "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0) VALUES " + (second-repeater (count chunk))) + (mapcat (fn [[_ e a v]] + (let [[v tag] (->SQLite a v)] + [e a v tx 0 tag])) + chunk))]) + (partition-all (quot max-sql-vars 11) ops)))) + +(def initial-many-searchid 2000) ; Just to make it more obvious in the DB. +(def initial-one-searchid 5000) + +;;; An FTS insertion happens in two parts. +;;; +;;; Firstly, we ensure that the fulltext value is present in the store. +;;; This is effectively an INSERT OR IGNORE… but FTS tables don't support +;;; uniqueness constraints. So we do it through a trigger on a view. +;;; +;;; When we insert the value, we pass with it a searchid. We'll use this +;;; later when inserting the datom, then we'll throw it away. The FTS table +;;; only contains searchids for the duration of the transaction that uses +;;; them. +;;; +;;; Secondly, we insert a row just like for non-FTS. The only difference +;;; is that the value is the rowid into the fulltext_values table. +(defn- fts-many->queries [ops tx ->SQLite indexing? ref? unique?] + ;; TODO: operations with the same text value should be + ;; coordinated here! + ;; It'll work fine without so long as queries are executed + ;; in order and not combined, but even so it's inefficient. + (conj + (mapcat + (fn [[_ e a v] searchid] + (let [[v tag] (->SQLite a v)] + ;; First query: ensure the value exists. + [["INSERT INTO fulltext_values_view (text, searchid) VALUES (?, ?)" + v searchid] + + ;; Second query: tx_lookup. + [(str + "WITH vv(rowid) AS (SELECT rowid FROM fulltext_values WHERE searchid = ?) " + "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " + "(?, ?, (SELECT rowid FROM vv), ?, 1, ?, ?, ?, 1, ?, (SELECT rowid FROM vv), ?)") + searchid + e a tx tag + (indexing? a) ; index_avet + (ref? a) ; index_vaet + (unique? a) ; unique_value + tag]])) + ops + (range initial-many-searchid 999999999)) + ["UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL"])) + +(defn fts-one->queries [ops tx ->SQLite indexing? ref? unique?] + (conj + (mapcat + (fn [[_ e a v] searchid] + (let [[v tag] (->SQLite a v)] + ;; First query: ensure the value exists. + [["INSERT INTO fulltext_values_view (text, searchid) VALUES (?, ?)" + v searchid] + + ;; Second and third queries: tx_lookup. + [(str + "WITH vv(rowid) AS (SELECT rowid FROM fulltext_values WHERE searchid = ?) " + "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0, index_avet0, index_vaet0, index_fulltext0, unique_value0, sv, svalue_type_tag) VALUES " + "(?, ?, (SELECT rowid FROM vv), ?, 1, ?, ?, ?, 1, ?, (SELECT rowid FROM vv), ?)") + searchid + e a tx tag + (indexing? a) ; index_avet + (ref? a) ; index_vaet + (unique? a) ; unique_value + tag] + + [(str + "INSERT INTO tx_lookup (e0, a0, v0, tx0, added0, value_type_tag0) VALUES " + "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, 0, ?)") + e a searchid tx tag]])) + ops + (range initial-one-searchid 999999999)) + ["UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL"])) + +(defn- -run-queries [conn queries exception-message] + (go-pair + (try + (doseq [q queries] + (SQLite (partial ds/->SQLite schema) + fulltext? (memoize (partial ds/fulltext? schema)) + many? (memoize (fn [a] (ds/multival? schema a))) + indexing? (memoize (fn [a] (ds/indexing? schema a))) + ref? (memoize (fn [a] (ds/ref? schema a))) + unique? (memoize (fn [a] (ds/unique? schema a))) + conn (:sqlite-connection db) + + ;; Collect all the queries we need to run. + queries (atom []) + operations (group-by first entities)] + + (when-not (clojure.set/subset? (keys operations) #{:db/retract :db/add}) + (raise (str "Unknown operations " (keys operations)) + {:error :transact/syntax, :operations (dissoc operations :db/retract :db/add)})) + + ;; We can turn all non-FTS operations into simple SQL queries that we run serially. + ;; FTS queries require us to get a rowid from the FTS table and use that for + ;; insertion, so we need another pass. + ;; We can't just freely use `go-pair` here, because this function is so complicated + ;; that ClojureScript blows the stack trying to compile it. + + (when-let [retractions (:db/retract operations)] + (swap! + queries concat (retractions->queries retractions tx fulltext? ->SQLite))) + + ;; We want to partition our additions into four groups according to two + ;; characteristics: whether they require writing to the FTS value table, + ;; and whether the attribute has a 'many' cardinality constraint. Each of + ;; these four requires different queries. + (let [additions + (group-by (fn [[op e a v]] + (if (fulltext? a) + (if (many? a) + :fts-many + :fts-one) + (if (many? a) + :non-fts-many + :non-fts-one))) + (:db/add operations)) + transforms + {:fts-one fts-one->queries + :fts-many fts-many->queries + :non-fts-one non-fts-one->queries + :non-fts-many non-fts-many->queries}] + + (doseq [[key ops] additions] + (when-let [transform (key transforms)] + (swap! + queries concat + (transform ops tx ->SQLite indexing? ref? unique?))))) + + ;; Now run each query. + ;; This code is a little tortured to avoid blowing the compiler stack in cljs. + + (go-pair + (Datom schema) + (Context (source/datoms-source db) nil nil)) + (query-context [db] (context/make-context (datoms-source db))) (schema [db] (.-schema db)) - (idents [db] (.-idents db)) + (entid [db ident] + (if (keyword? ident) + (get (.-ident-map db) ident ident) + ident)) - (current-tx - [db] - (inc (:current-tx db))) + (ident [db eid] + (if-not (keyword? eid) + (get (.-ident-map db) eid eid) + eid)) (in-transaction! [db chan-fn] (s/in-transaction! (:sqlite-connection db) chan-fn)) - ;; TODO: use q for searching? Have q use this for searching for a single pattern? - (SQLite schema a v))] ;; We assume e and a are always given. - (go-pair - (->> - {:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns. - :from [:all_datoms] - :where (cons :and (map #(vector := %1 %2) [:e :a :v] (take-while (comp not nil?) [e a v])))} ;; Must drop nils. - (s/format) - - (s/all-rows (:sqlite-connection db)) - (Datom (.-schema db))))))) ;; TODO: understand why (schema db) fails. - - (SQLite schema a v)] - (go-pair - (->> - {:select [:e :a :v :tx [1 :added]] ;; TODO: generalize columns. - :from [:all_datoms] - :where [:and [:= :a a] [:= :v v] [:= :index_avet 1]]} - (s/format) - - (s/all-rows (:sqlite-connection db)) - (Datom (.-schema db))))))) ;; TODO: understand why (schema db) fails. - - (SQLite schema a v) - fulltext? (ds/fulltext? schema a)] - ;; Append to transaction log. - ( + (:sqlite-connection db) + (s/all-rows ["SELECT EXISTS(SELECT 1 FROM transactions LIMIT 1) AS bootstrapped"]) + (SQLite schema a v) + yield-datom + (fn [rows] + (when-let [row (first rows)] + (row->Datom schema row)))] + (go-pair + (->> + ;; TODO: generalize columns. + ["SELECT e, a, v, tx, 1 AS added FROM all_datoms + WHERE index_avet = 1 AND a = ? AND value_type_tag = ? AND v = ? + LIMIT 1" a tag v] + + (s/all-rows (:sqlite-connection db)) + SQLite (get-in ds/value-type-map [:db.type/keyword :->SQLite]) ;; TODO: make this a protocol. - exec (partial s/execute! (:sqlite-connection db))] ;; TODO: batch insert. (doseq [[ident entid] added-idents] (SQLite ident) entid])))) - db)) + ["INSERT INTO idents VALUES (?, ?)" (sqlite-schema/->SQLite ident) entid])))) - (SQLite (get-in ds/value-type-map [:db.type/keyword :->SQLite]) ;; TODO: make this a protocol. - exec (partial s/execute! (:sqlite-connection db))] + (let [exec (partial s/execute! (:sqlite-connection db))] ;; TODO: batch insert. (doseq [[ident attr-map] fragment] (doseq [[attr value] attr-map] (SQLite ident) (->SQLite attr) (->SQLite value)]))))) - db)) + ["INSERT INTO schema VALUES (?, ?, ?)" (sqlite-schema/->SQLite ident) (sqlite-schema/->SQLite attr) (sqlite-schema/->SQLite value)]))))) + + (let [symbolic-schema (merge-with merge (:symbolic-schema db) fragment) + schema (ds/schema (into {} (map (fn [[k v]] [(entid db k) v]) symbolic-schema)))] + (assoc db + :symbolic-schema symbolic-schema + :schema schema)))) (close-db [db] (s/close (.-sqlite-connection db))) @@ -261,6 +609,24 @@ :cljs (.getTime (js/Date.))))) +(defn with-ident [db ident entid] + (update db :ident-map #(assoc % ident entid, entid ident))) + +(defn db [sqlite-connection idents schema] + {:pre [(map? idents) + (every? keyword? (keys idents)) + (map? schema) + (every? keyword? (keys schema))]} + (let [entid-schema (ds/schema (into {} (map (fn [[k v]] [(k idents) v]) schema))) ;; TODO: fail if ident missing. + ident-map (into idents (clojure.set/map-invert idents))] + (map->DB + {:sqlite-connection sqlite-connection + :ident-map ident-map + :symbolic-schema schema + :schema entid-schema + ;; TODO :parts + }))) + ;; TODO: factor this into the overall design. (defn !]]]) + #?@(:cljs [[datomish.pair-chan] + [cljs.core.async :as a :refer [chan !]]])) + #?(:clj + (:import + [datomish.datom Datom]))) + +(defn > + (s/all-rows (:sqlite-connection db) ["SELECT e, a, v, tx FROM datoms WHERE tx > ?" tx]) + (> + (s/all-rows (:sqlite-connection db) ["SELECT a, v FROM datoms WHERE e = ?" eid]) + (> + (s/all-rows (:sqlite-connection db) ["SELECT e, a, v, tx, added FROM transactions WHERE tx > ? ORDER BY tx ASC, e, a, v, added" tx]) + (> + (s/all-rows (:sqlite-connection db) ["SELECT rowid, text FROM fulltext_values"]) + ( (integer entid), like {:db/ident 0}." - (let [<-SQLite (get-in ds/value-type-map [:db.type/keyword :<-SQLite])] ;; TODO: make this a protocol. - (go-pair - (let [rows (> - {:select [:ident :entid] :from [:idents]} - (s/format) - (s/all-rows sqlite-connection)))] - (into {} (map (fn [row] [(<-SQLite (:ident row)) (:entid row)])) rows))))) - -(defn > + {:select [:ident :entid] :from [:idents]} + (s/format) + (s/all-rows sqlite-connection)))] + (into {} (map (fn [row] [(sqlite-schema/<-SQLite :db.type/keyword (:ident row)) (:entid row)])) rows)))) (defn (map (keyword attribute -> keyword value)), like {:db/ident {:db/cardinality :db.cardinality/one}}." - (let [<-SQLite (get-in ds/value-type-map [:db.type/keyword :<-SQLite])] ;; TODO: make this a protocol. - (go-pair + (go-pair + (->> (->> - (->> - {:select [:ident :attr :value] :from [:schema]} - (s/format) - (s/all-rows sqlite-connection)) - (= current-tx 0) - current-tx (max current-tx tx0)] - (when-not bootstrapped + (let [db (db/db sqlite-connection bootstrap/idents bootstrap/symbolic-schema) + bootstrapped? ( (db/map->DB - {:sqlite-connection sqlite-connection - :idents bootstrap/idents - :symbolic-schema bootstrap/symbolic-schema - :schema (ds/schema (into {} (map (fn [[k v]] [(k bootstrap/idents) v]) bootstrap/symbolic-schema))) ;; TODO: fail if ident missing. - :current-tx current-tx}) - ;; We use db + ;; We use DB - {:sqlite-connection sqlite-connection - :idents idents - :symbolic-schema symbolic-schema - :schema (ds/schema (into {} (map (fn [[k v]] [(k idents) v]) symbolic-schema))) ;; TODO: fail if ident missing. - :current-tx (inc current-tx)}))))) + (db/db sqlite-connection idents symbolic-schema))))) diff --git a/src/datomish/exec_repl.cljc b/src/datomish/exec_repl.cljc index cfe8b5ea..66f2bfcc 100644 --- a/src/datomish/exec_repl.cljc +++ b/src/datomish/exec_repl.cljc @@ -9,6 +9,7 @@ [datomish.pair-chan :refer [go-pair ?v 1438748166567751)] [?page :page/title ?title] [?page :page/url ?url] ] {}))))))) #_ (go-pair (let [connection (!]]]) + #?@(:cljs [[datomish.pair-chan] + [cljs.core.async :as a :refer [chan !]]]))) + +(def places-schema-fragment + [{:db/id (db/id-literal :db.part/user) + :db/ident :page/url + :db/unique :db.unique/identity + :db/valueType :db.type/string ;; TODO: uri + :db.install/_attribute :db.part/db} + {:db/id (db/id-literal :db.part/user) + :db/ident :page/guid + :db/unique :db.unique/identity + :db/valueType :db.type/string ;; TODO: uuid or guid? + :db.install/_attribute :db.part/db} + {:db/id (db/id-literal :db.part/user) + :db/ident :page/title + :db/cardinality :db.cardinality/one + :db/valueType :db.type/string + :db.install/_attribute :db.part/db} + {:db/id (db/id-literal :db.part/user) + :db/ident :page/visitAt + :db/cardinality :db.cardinality/many + :db/valueType :db.type/long ;; TODO: instant + :db.install/_attribute :db.part/db} + ]) + + +(defn- place->entity [[id rows]] + (let [title (:title (first rows)) + required {:db/id (db/id-literal :db.part/user) + :page/url (:url (first rows)) + :page/guid (:guid (first rows))} + visits (keep :visit_date rows)] + + (util/assoc-if required + :page/title title + :page/visitAt visits))) + +(defn import-titles [conn places-connection] + (go-pair + (let [rows + (entity (group-by :id rows))))))) + +(defn import-titles-from-path [db places] + (go-pair + (let [conn (transact/connection-with-db db) + pdb (sql-clause [context] - (merge - {:select (projection/sql-projection context) + (let [inner + (merge + {:select (projection/sql-projection-for-relation context) - ;; Always SELECT DISTINCT, because Datalog is set-based. - ;; TODO: determine from schema analysis whether we can avoid - ;; the need to do this. - :modifiers [:distinct]} - (clauses/cc->partial-subquery (:cc context)))) + ;; Always SELECT DISTINCT, because Datalog is set-based. + ;; TODO: determine from schema analysis whether we can avoid + ;; the need to do this. + :modifiers [:distinct]} + (clauses/cc->partial-subquery (:cc context)))] + (if (:has-aggregates? context) + (merge + (when-not (empty? (:group-by-vars context)) + ;; We shouldn't need to account for types here, until we account for + ;; `:or` clauses that bind from different attributes. + {:group-by (map util/var->sql-var (:group-by-vars context))}) + {:select (projection/sql-projection-for-aggregation context :preag) + :modifiers [:distinct] + :from [:preag] + :with {:preag inner}}) + inner))) (defn context->sql-string [context args] (-> @@ -56,8 +68,9 @@ (sql/format args :quoting sql-quoting-style))) (defn- validate-with [with] - (when-not (nil? with) - (raise-str "`with` not supported."))) + (when-not (or (nil? with) + (every? #(instance? Variable %1) with)) + (raise "Complex :with not supported." {:with with}))) (defn- validate-in [in] (when (nil? in) @@ -91,10 +104,15 @@ (let [{:keys [find in with where]} find] ; Destructure the Datalog query. (validate-with with) (validate-in in) - (let [external-bindings (in->bindings in)] + (let [external-bindings (in->bindings in) + elements (:elements find) + known-types {} + group-by-vars (projection/extract-group-by-vars elements with)] (assoc context - :elements (:elements find) - :cc (clauses/patterns->cc (:default-source context) where external-bindings))))) + :elements elements + :group-by-vars group-by-vars + :has-aggregates? (not (nil? group-by-vars)) + :cc (clauses/patterns->cc (:default-source context) where known-types external-bindings))))) (defn find->sql-clause "Take a parsed `find` expression and turn it into a structured SQL @@ -116,21 +134,22 @@ [q] (dp/parse-query q)) -(comment - (def sql-quoting-style nil) - (datomish.query/find->sql-string - (datomish.query.context/->Context (datomish.query.source/datoms-source nil) nil nil) - (datomish.query/parse - '[:find ?timestampMicros ?page :in $ ?latest :where - [?page :page/starred true ?t] - [?t :db/txInstant ?timestampMicros] - (not [(> ?t ?latest)]) ]) - {:latest 5}) -) +#_ +(def sql-quoting-style nil) #_ (datomish.query/find->sql-string - (datomish.query.context/->Context (datomish.query.source/datoms-source nil) nil nil) + (datomish.query.context/make-context (datomish.query.source/datoms-source nil)) + (datomish.query/parse + '[:find ?timestampMicros ?page :in $ ?latest :where + [?page :page/starred true ?t] + [?t :db/txInstant ?timestampMicros] + (not [(> ?t ?latest)]) ]) + {:latest 5}) + +#_ +(datomish.query/find->sql-string + (datomish.query.context/make-context (datomish.query.source/datoms-source nil)) (datomish.query/parse '[:find ?page :in $ ?latest :where [?page :page/url "http://example.com/"] diff --git a/src/datomish/query/cc.cljc b/src/datomish/query/cc.cljc index 324bc3d3..3f211783 100644 --- a/src/datomish/query/cc.cljc +++ b/src/datomish/query/cc.cljc @@ -8,6 +8,7 @@ :refer [attribute-in-source constant-in-source]] [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]] + [honeysql.core :as sql] [datascript.parser :as dp #?@(:cljs [:refer @@ -52,27 +53,97 @@ ;; ;; `from` is a list of [source alias] pairs, suitable for passing to honeysql. ;; `bindings` is a map from var to qualified columns. +;; `known-types` is a map from var to type keyword (e.g., :db.type/ref) +;; `extracted-types` is a mapping, similar to `bindings`, but used to pull +;; type tags out of the store at runtime. ;; `wheres` is a list of fragments that can be joined by `:and`. -(defrecord ConjoiningClauses [source from external-bindings bindings wheres]) +(defrecord ConjoiningClauses + [source + from ; [[:datoms 'datoms123]] + external-bindings ; {?var0 (sql/param :foobar)} + bindings ; {?var1 :datoms123.v} + known-types ; {?var1 :db.type/integer} + extracted-types ; {?var2 :datoms123.value_type_tag} + wheres ; [[:= :datoms123.v 15]] + ctes ; {:name {:select …}} + ]) -(defn bind-column-to-var [cc variable col] - (let [var (:symbol variable)] - (util/conj-in cc [:bindings var] col))) +(defn bind-column-to-var [cc variable table position] + (let [var (:symbol variable) + col (sql/qualify table (name position)) + bound (util/append-in cc [:bindings var] col)] + (if (or (not (= position :v)) + (contains? (:known-types cc) var) + (contains? (:extracted-types cc) var)) + ;; Type known; no need to accumulate a type-binding. + bound + (let [tag-col (sql/qualify table :value_type_tag)] + (assoc-in bound [:extracted-types var] tag-col))))) -(defn constrain-column-to-constant [cc col position value] - (util/conj-in cc [:wheres] - [:= col (if (= :a position) - (attribute-in-source (:source cc) value) - (constant-in-source (:source cc) value))])) +(defn constrain-column-to-constant [cc table position value] + (let [col (sql/qualify table (name position))] + (util/append-in cc + [:wheres] + [:= col (if (= :a position) + (attribute-in-source (:source cc) value) + (constant-in-source (:source cc) value))]))) -(defn augment-cc [cc from bindings wheres] +(defprotocol ITypeTagged (->tag-codes [x])) + +(extend-protocol ITypeTagged + #?@(:cljs + [string (->tag-codes [x] #{4 10 11 12}) + Keyword (->tag-codes [x] #{13}) ; TODO: what about idents? + boolean (->tag-codes [x] #{1}) + number (->tag-codes [x] + (if (integer? x) + #{0 4 5} ; Could be a ref or a number or a date. + #{4 5}))]) ; Can't be a ref. + #?@(:clj + [String (->tag-codes [x] #{10}) + clojure.lang.Keyword (->tag-codes [x] #{13}) ; TODO: what about idents? + Boolean (->tag-codes [x] #{1}) + Integer (->tag-codes [x] #{0 5}) ; Could be a ref or a number. + Long (->tag-codes [x] #{0 5}) ; Could be a ref or a number. + Float (->tag-codes [x] #{5}) + Double (->tag-codes [x] #{5}) + java.util.UUID (->tag-codes [x] #{11}) + java.util.Date (->tag-codes [x] #{4}) + java.net.URI (->tag-codes [x] #{12})])) + +(defn constrain-value-column-to-constant + "Constrain a `v` column. Note that this can contribute *two* + constraints: one for the column itself, and one for the type tag. + We don't need to do this if the attribute is known and thus + constrains the type." + [cc table-alias value] + (let [possible-type-codes (->tag-codes value) + aliased (sql/qualify table-alias (name :value_type_tag)) + clauses (map + (fn [code] [:= aliased code]) + possible-type-codes)] + (util/concat-in cc [:wheres] + ;; Type checks then value checks. + [(case (count clauses) + 0 (raise-str "Unexpected number of clauses.") + 1 (first clauses) + (cons :or clauses)) + [:= (sql/qualify table-alias (name :v)) + (constant-in-source (:source cc) value)]]))) + +(defn augment-cc [cc from bindings extracted-types wheres] (assoc cc :from (concat (:from cc) from) :bindings (merge-with concat (:bindings cc) bindings) + :extracted-types (merge (:extracted-types cc) extracted-types) :wheres (concat (:wheres cc) wheres))) (defn merge-ccs [left right] - (augment-cc left (:from right) (:bindings right) (:wheres right))) + (augment-cc left + (:from right) + (:bindings right) + (:extracted-types right) + (:wheres right))) (defn- bindings->where "Take a bindings map like @@ -115,16 +186,19 @@ (impose-external-bindings (assoc cc :wheres ;; Note that the order of clauses here means that cross-pattern var bindings - ;; come first. That's OK: the SQL engine considers these altogether. - (concat (bindings->where (:bindings cc)) - (:wheres cc))))) + ;; come last That's OK: the SQL engine considers these altogether. + (concat (:wheres cc) + (bindings->where (:bindings cc)))))) -(defn binding-for-symbol-or-throw [cc symbol] +(defn binding-for-symbol [cc symbol] (let [internal-bindings (symbol (:bindings cc)) external-bindings (symbol (:external-bindings cc))] (or (first internal-bindings) - (first external-bindings) - (raise-str "No bindings yet for " symbol)))) + (first external-bindings)))) + +(defn binding-for-symbol-or-throw [cc symbol] + (or (binding-for-symbol cc symbol) + (raise-str "No bindings yet for " symbol))) (defn argument->value "Take a value from an argument list and resolve it against the CC. diff --git a/src/datomish/query/clauses.cljc b/src/datomish/query/clauses.cljc index 69920f41..5052b3d0 100644 --- a/src/datomish/query/clauses.cljc +++ b/src/datomish/query/clauses.cljc @@ -4,64 +4,96 @@ (ns datomish.query.clauses (:require - [datomish.query.cc :as cc] - [datomish.query.functions :as functions] - [datomish.query.source - :refer [attribute-in-source - constant-in-source - source->from - source->constraints]] - [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]] - [datascript.parser :as dp - #?@(:cljs - [:refer - [ - Constant - DefaultSrc - Function - Not - Or - Pattern - Placeholder - PlainSymbol - Predicate - Variable - ]])] - [honeysql.core :as sql] - [clojure.string :as str] - ) + [datomish.query.cc :as cc] + [datomish.query.functions :as functions] + [datomish.query.source + :refer [pattern->schema-value-type + attribute-in-source + constant-in-source + source->from + source->constraints]] + [datomish.schema :as schema] + [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]] + [datascript.parser :as dp + #?@(:cljs + [:refer + [ + Constant + DefaultSrc + Function + Not + Or + Pattern + Placeholder + PlainSymbol + Predicate + Variable + ]])] + [honeysql.core :as sql] + [clojure.string :as str] + ) #?(:clj - (:import - [datascript.parser - Constant - DefaultSrc - Function - Not - Or - Pattern - Placeholder - PlainSymbol - Predicate - Variable - ]))) + (:import + [datascript.parser + Constant + DefaultSrc + Function + Not + Or + Pattern + Placeholder + PlainSymbol + Predicate + Variable + ]))) ;; Pattern building is recursive, so we need forward declarations. (declare Not->NotJoinClause not-join->where-fragment simple-or? simple-or->cc) +(defn- check-or-apply-value-type [cc value-type pattern-part] + (if (nil? value-type) + cc + (condp instance? pattern-part + Placeholder + cc + + Variable + (let [var-sym (:symbol pattern-part)] + (if-let [existing-type (var-sym (:known-types cc))] + (if (= existing-type value-type) + cc + (raise "Var " var-sym " already has type " existing-type "; this pattern wants " value-type + {:pattern pattern-part :value-type value-type})) + (assoc-in cc [:known-types var-sym] value-type))) + + Constant + (do + (or (and (= :db.type/ref value-type) + (or (keyword? (:value pattern-part)) ; ident + (integer? (:value pattern-part)))) ; entid + (schema/ensure-value-matches-type value-type (:value pattern-part))) + cc)))) + (defn- apply-pattern-clause-for-alias "This helper assumes that `cc` has already established a table association for the provided alias." [cc alias pattern] - (let [places (map vector - (:pattern pattern) - (:columns (:source cc)))] + (let [pattern (:pattern pattern) + columns (:columns (:source cc)) + places (map vector pattern columns) + value-type (pattern->schema-value-type (:source cc) pattern)] ; Optional; e.g., :db.type/string (reduce (fn [cc [pattern-part ; ?x, :foo/bar, 42 position]] ; :a - (let [col (sql/qualify alias (name position))] ; :datoms123.a + (let [cc (case position + ;; TODO: we should be able to constrain :e and :a to be + ;; entities... but the type checker expects that to be an int. + :v (check-or-apply-value-type cc value-type pattern-part) + :e (check-or-apply-value-type cc :db.type/ref pattern-part) + cc)] (condp instance? pattern-part ;; Placeholders don't contribute any bindings, nor do ;; they constrain the query -- there's no need to produce @@ -70,10 +102,16 @@ cc Variable - (cc/bind-column-to-var cc pattern-part col) + (cc/bind-column-to-var cc pattern-part alias position) Constant - (cc/constrain-column-to-constant cc col position (:value pattern-part)) + (if (and (nil? value-type) + (= position :v)) + ;; If we don't know the type, but we have a constant, generate + ;; a :wheres clause constraining the accompanying value_type_tag + ;; column. + (cc/constrain-value-column-to-constant cc alias (:value pattern-part)) + (cc/constrain-column-to-constant cc alias position (:value pattern-part))) (raise "Unknown pattern part." {:part pattern-part :clause pattern})))) @@ -105,7 +143,7 @@ (apply-pattern-clause-for-alias ;; Record the new table mapping. - (util/conj-in cc [:from] [table alias]) + (util/append-in cc [:from] [table alias]) ;; Use the new alias for columns. alias @@ -114,7 +152,7 @@ (defn- plain-symbol->sql-predicate-symbol [fn] (when-not (instance? PlainSymbol fn) (raise-str "Predicate functions must be named by plain symbols." fn)) - (#{:> :< :=} (keyword (name (:symbol fn))))) + (#{:> :>= :< :<= := :!=} (keyword (name (:symbol fn))))) (defn apply-predicate-clause [cc predicate] (when-not (instance? Predicate predicate) @@ -124,7 +162,7 @@ (raise-str "Unknown function " (:fn predicate))) (let [args (map (partial cc/argument->value cc) (:args predicate))] - (util/conj-in cc [:wheres] (cons f args))))) + (util/append-in cc [:wheres] (cons f args))))) (defn apply-not-clause [cc not] (when-not (instance? Not not) @@ -136,13 +174,19 @@ ;; fragment, and include the external bindings so that they match up. ;; Otherwise, we need to delay. Right now we're lazy, so we just fail: ;; reorder your query yourself. - (util/conj-in cc [:wheres] - (not-join->where-fragment - (Not->NotJoinClause (:source cc) - (merge-with concat - (:external-bindings cc) - (:bindings cc)) - not)))) + ;; + ;; Note that we don't extract and reuse any types established inside + ;; the `not` clause: perhaps those won't make sense outside. But it's + ;; a filter, so we push the external types _in_. + (util/append-in cc + [:wheres] + (not-join->where-fragment + (Not->NotJoinClause (:source cc) + (:known-types cc) + (merge-with concat + (:external-bindings cc) + (:bindings cc)) + not)))) (defn apply-or-clause [cc orc] (when-not (instance? Or orc) @@ -163,6 +207,7 @@ (if (simple-or? orc) (cc/merge-ccs cc (simple-or->cc (:source cc) + (:known-types cc) (merge-with concat (:external-bindings cc) (:bindings cc)) @@ -200,14 +245,17 @@ [cc patterns] (reduce apply-clause cc patterns)) -(defn patterns->cc [source patterns external-bindings] +(defn patterns->cc [source patterns known-types external-bindings] (cc/expand-where-from-bindings (expand-pattern-clauses (cc/map->ConjoiningClauses {:source source :from [] + :known-types (or known-types {}) + :extracted-types {} :external-bindings (or external-bindings {}) :bindings {} + :ctes {} :wheres []}) patterns))) @@ -218,6 +266,8 @@ [cc] (merge {:from (:from cc)} + (when-not (empty? (:ctes cc)) + {:with (:ctes cc)}) (when-not (empty? (:wheres cc)) {:where (cons :and (:wheres cc))}))) @@ -230,24 +280,23 @@ ;; that a declared variable list is valid for the clauses given. (defrecord NotJoinClause [unify-vars cc]) -(defn make-not-join-clause [source external-bindings unify-vars patterns] - (->NotJoinClause unify-vars (patterns->cc source patterns external-bindings))) - -(defn Not->NotJoinClause [source external-bindings not] +(defn Not->NotJoinClause [source known-types external-bindings not] (when-not (instance? DefaultSrc (:source not)) (raise "Non-default sources are not supported in `not` clauses." {:clause not})) - (make-not-join-clause source external-bindings (:vars not) (:clauses not))) + (map->NotJoinClause + {:unify-vars (:vars not) + :cc (patterns->cc source (:clauses not) known-types external-bindings)})) (defn not-join->where-fragment [not-join] [:not - (if (empty? (:bindings (:cc not-join))) - ;; If the `not` doesn't establish any bindings, it means it only contains - ;; expressions that constrain variables established outside itself. - ;; We can just return an expression. - (cons :and (:wheres (:cc not-join))) + (if (empty? (:bindings (:cc not-join))) + ;; If the `not` doesn't establish any bindings, it means it only contains + ;; expressions that constrain variables established outside itself. + ;; We can just return an expression. + (cons :and (:wheres (:cc not-join))) - ;; If it does establish bindings, then it has to be a subquery. - [:exists (merge {:select [1]} (cc->partial-subquery (:cc not-join)))])]) + ;; If it does establish bindings, then it has to be a subquery. + [:exists (merge {:select [1]} (cc->partial-subquery (:cc not-join)))])]) ;; A simple Or clause is one in which each branch can be evaluated against @@ -288,15 +337,17 @@ (defn simple-or->cc "The returned CC has not yet had bindings expanded." - [source external-bindings orc] + [source known-types external-bindings orc] (validate-or-clause orc) ;; We 'fork' a CC for each pattern, then union them together. ;; We need to build the first in order that the others use the same - ;; column names. + ;; column names and known types. (let [cc (cc/map->ConjoiningClauses {:source source :from [] + :known-types (or known-types {}) + :extracted-types {} :external-bindings (or external-bindings {}) :bindings {} :wheres []}) @@ -307,6 +358,9 @@ ;; That was easy. primary + ;; Note that for a simple `or` clause, the same template is used for each, + ;; so we can simply use the `extracted-types` bindings from `primary`. + ;; A complex `or` is much harder to handle. (let [template (assoc primary :wheres []) alias (second (first (:from template))) ccs (map (partial apply-pattern-clause-for-alias template alias) @@ -315,7 +369,8 @@ ;; Because this is a simple clause, we know that the first pattern established ;; any necessary bindings. ;; Take any new :wheres from each CC and combine them with :or. - (assoc primary :wheres + (assoc primary + :wheres [(cons :or (reduce (fn [acc cc] (let [w (:wheres cc)] diff --git a/src/datomish/query/context.cljc b/src/datomish/query/context.cljc index 9437dc5f..b82814ff 100644 --- a/src/datomish/query/context.cljc +++ b/src/datomish/query/context.cljc @@ -2,8 +2,18 @@ ;; License, v. 2.0. If a copy of the MPL was not distributed with this ;; file, You can obtain one at http://mozilla.org/MPL/2.0/. -;; A context, very simply, holds on to a default source. Eventually -;; it'll also do projection and similar transforms. +;; A context, very simply, holds on to a default source and some knowledge +;; needed for aggregation. (ns datomish.query.context) -(defrecord Context [default-source elements cc]) +(defrecord Context + [ + default-source + elements ; The :find list itself. + has-aggregates? + group-by-vars ; A list of variables from :find and :with, used to generate GROUP BY. + cc ; The main conjoining clause. + ]) + +(defn make-context [source] + (->Context source nil false nil nil)) diff --git a/src/datomish/query/functions.cljc b/src/datomish/query/functions.cljc index 141b0f3e..2117f235 100644 --- a/src/datomish/query/functions.cljc +++ b/src/datomish/query/functions.cljc @@ -6,7 +6,12 @@ (:require [honeysql.format :as fmt] [datomish.query.cc :as cc] - [datomish.query.source :as source] + [datomish.schema :as schema] + [datomish.sqlite-schema :refer [->tag ->SQLite]] + [datomish.query.source + :as source + :refer [attribute-in-source + constant-in-source]] [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]] [datascript.parser :as dp #?@(:cljs @@ -66,10 +71,8 @@ (when-not (and (instance? SrcVar src) (= "$" (name (:symbol src)))) (raise "Non-default sources not supported." {:arg src})) - (when-not (instance? Constant attr) - (raise "Non-constant fulltext attributes not supported." {:arg attr})) - - (when-not (fulltext-attribute? (:source cc) (:value attr)) + (when (and (instance? Constant attr) + (not (fulltext-attribute? (:source cc) (:value attr)))) (raise-str "Attribute " (:value attr) " is not a fulltext-indexed attribute.")) (when-not (and (instance? BindColl bind-coll) @@ -89,6 +92,18 @@ ;; We do not currently support scoring; the score value will always be 0. (let [[src attr search] (:args function) + ;; Note that DataScript's parser won't allow us to write a term like + ;; + ;; [(fulltext $ _ "foo") [[?x]]] + ;; + ;; so we instead have a placeholder attribute. Sigh. + attr-constant (or + (and (instance? Constant attr) + (not (= :any (:value attr))) + (source/attribute-in-source (:source cc) (:value attr))) + (and (instance? Variable attr) + (cc/binding-for-symbol-or-throw cc (:symbol attr)))) + ;; Pull out the symbols for the binding array. [entity value tx score] (map (comp :symbol :variable) ; This will nil-out placeholders. @@ -97,8 +112,8 @@ ;; Find the FTS table name and alias. We might have multiple fulltext ;; expressions so we will generate a query like ;; SELECT ttt.a FROM t1 AS ttt WHERE ttt.t1 MATCH 'string' - [fulltext-table fulltext-alias] (source/source->fulltext-from (:source cc)) ; [:t1 :ttt] - match-column (sql/qualify fulltext-alias fulltext-table) ; :ttt.t1 + [fulltext-table fulltext-alias] (source/source->fulltext-values (:source cc)) ; [:t1 :ttt] + match-column (sql/qualify fulltext-alias fulltext-table) ; :ttt.t1 match-value (cc/argument->value cc search) [datom-table datom-alias] (source/source->non-fulltext-from (:source cc)) @@ -107,22 +122,27 @@ from [[fulltext-table fulltext-alias] [datom-table datom-alias]] - wheres [[:match match-column match-value] ; The FTS match. + extracted-types {} ; TODO + + wheres (concat + [[:match match-column match-value] ; The FTS match. ;; The fulltext rowid-to-datom correspondence. [:= (sql/qualify datom-alias :v) - (sql/qualify fulltext-alias :rowid)] + (sql/qualify fulltext-alias :rowid)]] - ;; The attribute itself must match. - [:= - (sql/qualify datom-alias :a) - (source/attribute-in-source (:source cc) (:value attr))]] + (when attr-constant + ;; If known, the attribute itself must match. + [[:= + (sql/qualify datom-alias :a) + attr-constant]])) ;; Now compose any bindings for entity, value, tx, and score. ;; TODO: do we need to examine existing bindings to capture ;; wheres for any of these? We shouldn't, because the CC will ;; be internally cross-where'd when everything is done... + ;; TODO: bind attribute? bindings (into {} (filter (comp not nil? first) @@ -134,11 +154,97 @@ ;; if this is a variable rather than a placeholder. [score [0]]]))] - (cc/augment-cc cc from bindings wheres))) + (cc/augment-cc cc from bindings extracted-types wheres))) + +;; get-else is how Datalog handles optional attributes. +;; +;; It consists of: +;; * A bound entity +;; * A cardinality-one attribute +;; * A var to bind the value +;; * A default value. +;; +;; We model this as: +;; * A check against known bindings for the entity. +;; * A check against the schema for cardinality-one. +;; * Generating a COALESCE expression with a query inside the projection itself. +;; +;; Note that this will be messy for queries like: +;; +;; [:find ?page ?title :in $ +;; :where [?page :page/url _] +;; [(get-else ?page :page/title "") ?title] +;; [_ :foo/quoted ?title]] +;; +;; or +;; [(some-function ?title)] +;; +;; -- we aren't really establishing a binding, so the subquery will be +;; repeated. But this will do for now. +(defn apply-get-else-clause [cc function] + (let [{:keys [source bindings external-bindings]} cc + schema (:schema source) + + {:keys [args binding]} function + [src e a default-val] args] + + (when-not (instance? BindScalar binding) + (raise-str "Expected scalar binding.")) + (when-not (instance? Variable (:variable binding)) + (raise-str "Expected variable binding.")) + (when-not (instance? Constant a) + (raise-str "Expected constant attribute.")) + (when-not (instance? Constant default-val) + (raise-str "Expected constant default value.")) + (when-not (and (instance? SrcVar src) + (= "$" (name (:symbol src)))) + (raise "Non-default sources not supported." {:arg src})) + + (let [a (attribute-in-source source (:value a)) + a-type (get-in (:schema schema) [a :db/valueType]) + a-tag (->tag a-type) + + default-val (:value default-val) + var (:variable binding)] + + ;; Schema check. + (when-not (and (integer? a) + (not (datomish.schema/multival? schema a))) + (raise-str "Attribute " a " is not cardinality-one.")) + + ;; TODO: type-check the default value. + + (condp instance? e + Variable + (let [e (:symbol e) + e-binding (cc/binding-for-symbol-or-throw cc e)] + + (let [[table _] (source/source->from source a) ; We don't need to alias: single pattern. + ;; These :limit values shouldn't be needed, but sqlite will + ;; appreciate them. + ;; Note that we don't extract type tags here: the attribute + ;; must be known! + subquery {:select + [(sql/call + :coalesce + {:select [:v] + :from [table] + :where [:and + [:= 'a a] + [:= 'e e-binding]] + :limit 1} + (->SQLite default-val))] + :limit 1}] + (-> + (assoc-in cc [:known-types (:symbol var)] a-type) + (util/append-in [:bindings (:symbol var)] subquery)))) + + (raise-str "Can't handle entity" e))))) (def sql-functions ;; Future: versions of this that uses snippet() or matchinfo(). - {"fulltext" apply-fulltext-clause}) + {"fulltext" apply-fulltext-clause + "get-else" apply-get-else-clause}) (defn apply-sql-function "Either returns an application of `function` to `cc`, or nil to diff --git a/src/datomish/query/projection.cljc b/src/datomish/query/projection.cljc index f71a3ec6..38b1fbb8 100644 --- a/src/datomish/query/projection.cljc +++ b/src/datomish/query/projection.cljc @@ -4,18 +4,114 @@ (ns datomish.query.projection (:require - [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]] + [honeysql.core :as sql] + [datomish.query.source :as source] + [datomish.sqlite-schema :as ss] + [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]] [datascript.parser :as dp - #?@(:cljs [:refer [Pattern DefaultSrc Variable Constant Placeholder]])] + #?@(:cljs [:refer [Aggregate Pattern DefaultSrc Variable Constant Placeholder PlainSymbol]])] ) - #?(:clj (:import [datascript.parser Pattern DefaultSrc Variable Constant Placeholder])) + #?(:clj (:import [datascript.parser Aggregate Pattern DefaultSrc Variable Constant Placeholder PlainSymbol])) ) (defn lookup-variable [cc variable] (or (-> cc :bindings variable first) (raise-str "Couldn't find variable " variable))) -(defn sql-projection +(def aggregate-functions + {:avg :avg + :count :count + :max :max + :min :min + :sum :total + }) + +(defn- aggregate-symbols->projected-var [fn-symbol var-symbol] + (keyword (str "_" (name fn-symbol) "_" (subs (name var-symbol) 1)))) + +(defn- aggregate->projected-var [elem] + (aggregate-symbols->projected-var (:symbol (:fn elem)) + (:symbol (first (:args elem))))) + +(defn simple-aggregate? + "If `elem` is a simple aggregate -- symbolic function, one var arg -- + return the variable symbol." + [elem] + (when (instance? Aggregate elem) + (let [{:keys [fn args]} elem] + (when (and (instance? PlainSymbol fn) + (= 1 (count args))) + (let [arg (first args)] + (when (instance? Variable arg) + (:symbol arg))))))) + +(defn- aggregate->var [elem] + (when (instance? Aggregate elem) + (when-not (simple-aggregate? elem) + (raise-str "Only know how to handle simple aggregates.")) + + (:symbol (first (:args elem))))) + +(defn- variable->var [elem] + (when (instance? Variable elem) + (:symbol elem))) + +(defn- aggregate->projection [elem context lookup-fn] + (when (instance? Aggregate elem) + (when-not (simple-aggregate? elem) + (raise-str "Only know how to handle simple aggregates.")) + + (let [var-symbol (:symbol (first (:args elem))) + fn-symbol (:symbol (:fn elem)) + lookup-var (lookup-fn var-symbol) + aggregate-fn (get aggregate-functions (keyword fn-symbol))] + + (when-not aggregate-fn + (raise-str "Unknown aggregate function " fn-symbol)) + + (let [funcall-var (util/aggregate->sql-var aggregate-fn lookup-var) + project-as (aggregate-symbols->projected-var fn-symbol var-symbol)] + [[funcall-var project-as]])))) + +(defn- type-projection + "Produce a projection pair by looking up `var` in the provided + `extracted-types`." + [extracted-types var] + (when-let [t (get extracted-types var)] + [t (util/var->sql-type-var var)])) + +(defn- aggregate-type-projection + "Produce a passthrough projection pair for a type field + in an inner query." + [inner var] + (let [type-var (util/var->sql-type-var var)] + [(sql/qualify inner type-var) type-var])) + +(defn- symbol->projection + "Given a variable symbol, produce a projection pair. + `lookup-fn` will be used to find a column. For a non-aggregate query, + this will typically be a lookup into the CC's bindings. For an + aggregate query it'll be a qualification of the same var into the + subquery. + `known-types` is a type map to decide whether to project a type tag. + `type-projection-fn` is like `lookup-fn` but for type tag columns." + [var lookup-fn known-types type-projection-fn] + (let [lookup-var (lookup-fn var) + projected-var (util/var->sql-var var) + var-projection [lookup-var projected-var]] + + ;; If the type of a variable isn't explicitly known, we also select + ;; its type column so we can transform it. + (if-let [type-proj (when (not (contains? known-types var)) + (type-projection-fn var))] + [var-projection type-proj] + [var-projection]))) + +(defn- variable->projection [elem lookup-fn known-types type-projection-fn] + (when (instance? Variable elem) + (symbol->projection (:symbol elem) lookup-fn known-types type-projection-fn))) + +(defn sql-projection-for-relation "Take a `find` clause's `:elements` list and turn it into a SQL projection clause, suitable for passing as a `:select` clause to honeysql. @@ -32,23 +128,145 @@ [[:datoms12.e :foo] [:datoms13.e :bar]] + Note that we also look at `:group-by-vars`, because we need to + alias columns and apply `DISTINCT` to those columns in order to + aggregate correctly. + + This function unpacks aggregate operations, instead selecting the var. + @param context A Context, containing elements. @return a sequence of pairs." [context] - (def foo context) - (let [elements (:elements context)] - (when-not (every? #(instance? Variable %1) elements) - (raise-str "Unable to :find non-variables.")) - (map (fn [elem] - (let [var (:symbol elem)] - [(lookup-variable (:cc context) var) (util/var->sql-var var)])) - elements))) + (let [{:keys [group-by-vars elements cc]} context + {:keys [known-types extracted-types]} cc] + + ;; The primary projections from the :find list. + ;; Note that deduplication will be necessary, because we unpack aggregates. + (let [projected-vars + (map (fn [elem] + (or (aggregate->var elem) + (variable->var elem) + (raise "Only able to :find variables or aggregates." + {:elem elem}))) + elements) + + ;; If we have any GROUP BY requirements from :with, that aren't already + ;; included in the above, project them now. + additional-vars + (clojure.set/difference + (set group-by-vars) + (set projected-vars)) + + full-var-list + (distinct (concat projected-vars additional-vars)) + + type-proj-fn + (partial type-projection extracted-types) + + lookup-fn + (partial lookup-variable cc)] + + (mapcat (fn [var] + (symbol->projection var lookup-fn known-types type-proj-fn)) + full-var-list)))) + +(defn sql-projection-for-aggregation + "Project an element list that contains aggregates. This expects a subquery + aliased to `inner-table` which itself will project each var with the + correct name." + [context inner-table] + (let [{:keys [group-by-vars elements cc]} context + {:keys [known-types extracted-types]} cc + lookup-fn (fn [var] + (sql/qualify inner-table (util/var->sql-var var))) + type-proj-fn (partial aggregate-type-projection inner-table)] + (mapcat (fn [elem] + (or (variable->projection elem lookup-fn known-types type-proj-fn) + (aggregate->projection elem context lookup-fn) + (raise "Only able to :find variables or aggregates." + {:elem elem}))) + elements))) + +(defn make-projectors-for-columns [elements known-types extracted-types] + {:pre [(map? extracted-types) + (map? known-types)]} + (letfn [(variable->projector [elem known-types extracted-types tag-decoder] + (when (instance? Variable elem) + (let [var (:symbol elem) + projected-var (util/var->sql-var var)] + + (if-let [type (get known-types var)] + ;; We know the type! We already know how to decode it. + ;; TODO: most of these tags don't actually require calling through to <-tagged-SQLite. + ;; TODO: optimize this without making it horrible. + (let [decoder (tag-decoder (ss/->tag type))] + (fn [row] + (decoder (get row projected-var)))) + + ;; We don't know the type. Find the type projection column + ;; and use it to decode the value. + (if (contains? extracted-types var) + (let [type-column (util/var->sql-type-var var)] + (fn [row] + (ss/<-tagged-SQLite + (get row type-column) + (get row projected-var)))) + + ;; We didn't extract a type and we don't know it in advance. + ;; Just pass through; the :col will look itself up in the row. + projected-var))))) + + ;; For now we assume numerics and that everything will shake out in the wash. + (aggregate->projector [elem] + (when (instance? Aggregate elem) + (let [var (aggregate->projected-var elem)] + (fn [row] + (get row var)))))] + + (let [tag-decoder (memoize + (fn [tag] + (partial ss/<-tagged-SQLite tag)))] + (map (fn [elem] + (or (variable->projector elem known-types extracted-types tag-decoder) + (aggregate->projector elem))) + elements)))) (defn row-pair-transducer [context] - ;; For now, we only support straight var lists, so - ;; our transducer is trivial. - (let [columns-in-order (map second (sql-projection context))] - (map (fn [[row err]] - (if err - [row err] - [(map row columns-in-order) nil]))))) + (let [{:keys [elements cc]} context + {:keys [source known-types extracted-types]} cc + + ;; We know the projection will fail above if these aren't simple variables or aggregates. + projectors + (make-projectors-for-columns elements known-types extracted-types)] + + (map + (fn [[row err]] + (if err + [row err] + [(map (fn [projector] (projector row)) projectors) nil]))))) + +(defn extract-group-by-vars + "Take inputs to :find and, if any aggregates exist in `elements`, + return the variable names upon which we should GROUP BY." + [elements with] + (when (some #(instance? Aggregate %1) elements) + (loop [ignore #{} + group-by (map :symbol with) + e elements] + + (if-let [element (first e)] + (if-let [aggregated-var (simple-aggregate? element)] + (recur (conj ignore aggregated-var) + group-by + (rest e)) + (if (instance? Variable element) + (let [var (:symbol element)] + (recur ignore + (if (contains? ignore var) + group-by + (conj group-by var)) + (rest e))) + (raise-str "Unknown element." {:element element}))) + + ;; Done. Remove any later vars we saw. + (remove ignore group-by))))) diff --git a/src/datomish/query/source.cljc b/src/datomish/query/source.cljc index 5c72612b..b0a4116f 100644 --- a/src/datomish/query/source.cljc +++ b/src/datomish/query/source.cljc @@ -4,14 +4,16 @@ (ns datomish.query.source (:require - [datomish.query.transforms :as transforms] - [datascript.parser - #?@(:cljs - [:refer [Variable Constant Placeholder]])]) + [datomish.query.transforms :as transforms] + [datomish.schema :as schema] + [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str]] + [datascript.parser + #?@(:cljs + [:refer [Variable Constant Placeholder]])]) #?(:clj - (:import [datascript.parser Variable Constant Placeholder]))) + (:import [datascript.parser Variable Constant Placeholder]))) -(defn- gensym-table-alias [table] +(defn gensym-table-alias [table] (gensym (name table))) ;;; @@ -38,42 +40,63 @@ (source->non-fulltext-from [source]) (source->fulltext-from [source] "Returns a pair, `[table alias]` for querying the source's fulltext index.") + (source->fulltext-values [source] + "Returns a pair, `[table alias]` for querying the source's fulltext values") (source->constraints [source alias]) + (pattern->schema-value-type [source pattern]) (attribute-in-source [source attribute]) (constant-in-source [source constant])) (defrecord - DatomsSource - [table ; Typically :datoms. - fulltext-table ; Typically :fulltext_values - fulltext-view ; Typically :all_datoms - columns ; e.g., [:e :a :v :tx] + DatomsSource + [table ; Typically :datoms. + fulltext-table ; Typically :fulltext_datoms + fulltext-view ; Typically :all_datoms + fulltext-values ; Typically :fulltext_values + columns ; e.g., [:e :a :v :tx] + schema ; An ISchema instance. - ;; `attribute-transform` is a function from attribute to constant value. Used to - ;; turn, e.g., :p/attribute into an interned integer. - ;; `constant-transform` is a function from constant value to constant value. Used to - ;; turn, e.g., the literal 'true' into 1. - attribute-transform - constant-transform + ;; `attribute-transform` is a function from attribute to constant value. Used to + ;; turn, e.g., :p/attribute into an interned integer. + ;; `constant-transform` is a function from constant value to constant value. Used to + ;; turn, e.g., the literal 'true' into 1. + attribute-transform + constant-transform - ;; `table-alias` is a function from table to alias, e.g., :datoms => :datoms1234. - table-alias + ;; `table-alias` is a function from table to alias, e.g., :datoms => :datoms1234. + table-alias - ;; Not currently used. - make-constraints ; ?fn [source alias] => [where-clauses] - ] + ;; Not currently used. + make-constraints ; ?fn [source alias] => [where-clauses] + ] Source (source->from [source attribute] - (let [table - (if (and (instance? Constant attribute) - ;; TODO: look in the DB schema to see if `attribute` is known to not be - ;; a fulltext attribute. - true) - (:table source) + (let [schema (:schema source) + int->table (fn [a] + (if (schema/fulltext? schema a) + (:fulltext-table source) + (:table source))) + table + (cond + (integer? attribute) + (int->table attribute) + (instance? Constant attribute) + (let [a (:value attribute) + id (if (keyword? a) + (attribute-in-source source a) + a)] + (int->table id)) + + ;; TODO: perhaps we know an external binding already? + (or (instance? Variable attribute) + (instance? Placeholder attribute)) ;; It's variable. We must act as if it could be a fulltext datom. - (:fulltext-view source))] + (:fulltext-view source) + + true + (raise "Unknown source->from attribute " attribute {:attribute attribute}))] [table ((:table-alias source) table)])) (source->non-fulltext-from [source] @@ -84,24 +107,29 @@ (let [table (:fulltext-table source)] [table ((:table-alias source) table)])) + (source->fulltext-values [source] + (let [table (:fulltext-values source)] + [table ((:table-alias source) table)])) + (source->constraints [source alias] (when-let [f (:make-constraints source)] (f alias))) + (pattern->schema-value-type [source pattern] + (let [[_ a v _] pattern + schema (:schema (:schema source))] + (when (instance? Constant a) + (let [val (:value a)] + (if (keyword? val) + ;; We need to find the entid for the keyword attribute, + ;; because the schema stores attributes by ID. + (let [id (attribute-in-source source val)] + (get-in schema [id :db/valueType])) + (when (integer? val) + (get-in schema [val :db/valueType]))))))) + (attribute-in-source [source attribute] ((:attribute-transform source) attribute)) (constant-in-source [source constant] ((:constant-transform source) constant))) - -(defn datoms-source [db] - (map->DatomsSource - {:table :datoms - :fulltext-table :fulltext_values - :fulltext-view :all_datoms - :columns [:e :a :v :tx :added] - :attribute-transform transforms/attribute-transform-string - :constant-transform transforms/constant-transform-default - :table-alias gensym-table-alias - :make-constraints nil})) - diff --git a/src/datomish/schema.cljc b/src/datomish/schema.cljc index 9a9c2ced..98e2c5a8 100644 --- a/src/datomish/schema.cljc +++ b/src/datomish/schema.cljc @@ -5,7 +5,12 @@ ;; Purloined from DataScript. (ns datomish.schema - (:require [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise]])) + (:require + [datomish.sqlite-schema :as sqlite-schema] + [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise]])) + +(defn entid? [x] + (and (integer? x) (pos? x))) (defprotocol ISchema (attrs-by @@ -94,19 +99,41 @@ :key k :value v})))) -;; TODO: consider doing this with a protocol and extending the underlying Clojure(Script) types. +#?(:clj + (defn uuidish? [x] + (instance? java.util.UUID x))) +#?(:cljs + (let [uuid-re (js/RegExp. "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" "i")] + (defn uuidish? [x] + (and (string? x) + (re-find uuid-re x))))) + (def value-type-map - {:db.type/ref { :valid? #(and (integer? %) (pos? %)) :->SQLite identity :<-SQLite identity } - :db.type/keyword { :valid? keyword? :->SQLite str :<-SQLite #(keyword (subs % 1)) } - :db.type/string { :valid? string? :->SQLite identity :<-SQLite identity } - :db.type/boolean { :valid? #?(:clj #(instance? Boolean %) :cljs #(= js/Boolean (type %))) :->SQLite #(if % 1 0) :<-SQLite #(not= % 0) } - :db.type/integer { :valid? integer? :->SQLite identity :<-SQLite identity } - :db.type/real { :valid? #?(:clj float? :cljs number?) :->SQLite identity :<-SQLite identity } + {:db.type/ref { :valid? entid? } + :db.type/keyword { :valid? keyword? } + :db.type/string { :valid? string? } + :db.type/boolean { :valid? #?(:clj #(instance? Boolean %) :cljs #(= js/Boolean (type %))) } + :db.type/long { :valid? integer? } + :db.type/uuid { :valid? uuidish? } + :db.type/instant { :valid? #?(:clj #(instance? java.util.Date %) :cljs #(= js/Date (type %))) } + :db.type/uri { :valid? #?(:clj #(instance? java.net.URI %) :cljs string?) } + :db.type/double { :valid? #?(:clj float? :cljs number?) } }) +(defn #?@(:clj [^Boolean ensure-value-matches-type] + :cljs [^boolean ensure-value-matches-type]) [type value] + (if-let [valid? (get-in value-type-map [type :valid?])] + (when-not (valid? value) + (raise "Invalid value for type " type "; got " value + {:error :schema/valueType, :type type, :value value})) + (raise "Unknown valueType " type ", expected one of " (sorted-set (keys value-type-map)) + {:error :schema/valueType, :type type}))) + +;; There's some duplication here so we get better error messages. (defn #?@(:clj [^Boolean ensure-valid-value] :cljs [^boolean ensure-valid-value]) [schema attr value] - {:pre [(schema? schema)]} + {:pre [(schema? schema) + (integer? attr)]} (let [schema (.-schema schema)] (if-let [valueType (get-in schema [attr :db/valueType])] (if-let [valid? (get-in value-type-map [valueType :valid?])] @@ -119,12 +146,13 @@ {:error :schema/valueType, :attribute attr})))) (defn ->SQLite [schema attr value] - {:pre [(schema? schema)]} + {:pre [(schema? schema) + (integer? attr)]} (let [schema (.-schema schema)] (if-let [valueType (get-in schema [attr :db/valueType])] (if-let [valid? (get-in value-type-map [valueType :valid?])] (if (valid? value) - ((get-in value-type-map [valueType :->SQLite]) value) + [(sqlite-schema/->SQLite value) (sqlite-schema/->tag valueType)] (raise "Invalid value for attribute " attr ", expected " valueType " but got " value {:error :schema/valueType, :attribute attr, :value value})) (raise "Unknown valueType for attribute " attr ", expected one of " (sorted-set (keys value-type-map)) @@ -136,8 +164,8 @@ {:pre [(schema? schema)]} (let [schema (.-schema schema)] (if-let [valueType (get-in schema [attr :db/valueType])] - (if-let [<-SQLite (get-in value-type-map [valueType :<-SQLite])] - (<-SQLite value) + (if (contains? value-type-map valueType) + (sqlite-schema/<-SQLite valueType value) (raise "Unknown valueType for attribute " attr ", expected one of " (sorted-set (keys value-type-map)) {:error :schema/valueType, :attribute attr})) (raise "Unknown attribute " attr ", expected one of " (sorted-set (keys schema)) diff --git a/src/datomish/schema.edn b/src/datomish/schema.edn new file mode 100644 index 00000000..1e4736fd --- /dev/null +++ b/src/datomish/schema.edn @@ -0,0 +1,59 @@ +[ + ;; news + + {:db/id #db/id[:db.part/db] + :db/ident :news/title + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/fulltext true + :db/doc "A news story's title" + :db.install/_attribute :db.part/db} + + {:db/id #db/id[:db.part/db] + :db/ident :news/foobar + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/fulltext true + :db/doc "A news story's foobar" + :db.install/_attribute :db.part/db} + + {:db/id #db/id[:db.part/db] + :db/ident :news/zot + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/fulltext true + :db/doc "A news story's zot" + :db.install/_attribute :db.part/db} + + {:db/id #db/id[:db.part/db] + :db/ident :news/baz + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/one + :db/doc "A news story's baz" + :db.install/_attribute :db.part/db} + + {:db/id #db/id[:db.part/db] + :db/ident :news/url + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/doc "A news story's url" + :db.install/_attribute :db.part/db} + + {:db/id #db/id[:db.part/db] + :db/ident :news/summary + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/doc "Automatically generated summary of a news story" + :db.install/_attribute :db.part/db} + + {:db/id #db/id[:db.part/db] + :db/ident :news/category + :db/valueType :db.type/string + :db/cardinality :db.cardinality/many + :db/fulltext true + :db/doc "Categories automatically set for a news story" + :db.install/_attribute :db.part/db} + + ] diff --git a/src/datomish/schema_changes.cljc b/src/datomish/schema_changes.cljc index e2857cd1..c8208548 100644 --- a/src/datomish/schema_changes.cljc +++ b/src/datomish/schema_changes.cljc @@ -6,9 +6,6 @@ (:require [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]])) -(defn- is-install? [db [_ a & _]] - (= a (get-in db [:idents :db.install/attribute]))) - (defn datoms->schema-fragment "Turn [[:db.part/db :db.install/attribute e] [e :db/ident :attr]] into {:attr {:db/* v}}. diff --git a/src/datomish/sqlite.cljc b/src/datomish/sqlite.cljc index cc970c3d..68f45762 100644 --- a/src/datomish/sqlite.cljc +++ b/src/datomish/sqlite.cljc @@ -24,7 +24,7 @@ (def sql-quoting-style :ansi) (defn format [args] - (honeysql.core/format args :quoting :ansi)) + (honeysql.core/format args :quoting sql-quoting-style)) (defprotocol ISQLiteConnection (-execute! @@ -76,7 +76,7 @@ ;; channel being rejected and no further row callbacks ;; being called. (when (second result) - (put! result c)) + (put! c result)) (close! c)))) (defn all-rows diff --git a/src/datomish/sqlite_schema.cljc b/src/datomish/sqlite_schema.cljc index 695b1515..546d0f4d 100644 --- a/src/datomish/sqlite_schema.cljc +++ b/src/datomish/sqlite_schema.cljc @@ -8,7 +8,7 @@ [datomish.pair-chan :refer [go-pair !]]]) @@ -19,34 +19,51 @@ (def v1-statements ["CREATE TABLE datoms (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL, + value_type_tag SMALLINT NOT NULL, index_avet TINYINT NOT NULL DEFAULT 0, index_vaet TINYINT NOT NULL DEFAULT 0, index_fulltext TINYINT NOT NULL DEFAULT 0, - unique_value TINYINT NOT NULL DEFAULT 0, unique_identity TINYINT NOT NULL DEFAULT 0)" - "CREATE INDEX idx_datoms_eavt ON datoms (e, a, v)" - "CREATE INDEX idx_datoms_aevt ON datoms (a, e, v)" + unique_value TINYINT NOT NULL DEFAULT 0)" + "CREATE UNIQUE INDEX idx_datoms_eavt ON datoms (e, a, value_type_tag, v)" + "CREATE UNIQUE INDEX idx_datoms_aevt ON datoms (a, e, value_type_tag, v)" + + ;; n.b., v0/value_type_tag0 can be NULL, in which case we look up v from datoms; + ;; and the datom columns are NULL into the LEFT JOIN fills them in. + ;; TODO: update comment about sv. + "CREATE TABLE tx_lookup (e0 INTEGER NOT NULL, a0 SMALLINT NOT NULL, v0 BLOB NOT NULL, tx0 INTEGER NOT NULL, added0 TINYINT NOT NULL, + value_type_tag0 SMALLINT NOT NULL, + index_avet0 TINYINT, index_vaet0 TINYINT, + index_fulltext0 TINYINT, + unique_value0 TINYINT, + sv BLOB, + svalue_type_tag SMALLINT, + rid INTEGER, + e INTEGER, a SMALLINT, v BLOB, tx INTEGER, value_type_tag SMALLINT)" + + ;; Note that `id_tx_lookup_added` is created and dropped + ;; after insertion, which makes insertion slightly faster. + ;; Prevent overlapping transactions. TODO: drop added0? + "CREATE UNIQUE INDEX idx_tx_lookup_eavt ON tx_lookup (e0, a0, v0, added0, value_type_tag0) WHERE sv IS NOT NULL" ;; Opt-in index: only if a has :db/index true. - "CREATE UNIQUE INDEX idx_datoms_avet ON datoms (a, v, e) WHERE index_avet IS NOT 0" + "CREATE UNIQUE INDEX idx_datoms_avet ON datoms (a, value_type_tag, v, e) WHERE index_avet IS NOT 0" - ;; Opt-in index: only if a has :db/valueType :db.type/ref. + ;; Opt-in index: only if a has :db/valueType :db.type/ref. No need for tag here since all + ;; indexed elements are refs. "CREATE UNIQUE INDEX idx_datoms_vaet ON datoms (v, a, e) WHERE index_vaet IS NOT 0" ;; Opt-in index: only if a has :db/fulltext true; thus, it has :db/valueType :db.type/string, ;; which is not :db/valueType :db.type/ref. That is, index_vaet and index_fulltext are mutually ;; exclusive. - "CREATE INDEX idx_datoms_fulltext ON datoms (v, a, e) WHERE index_fulltext IS NOT 0" + "CREATE INDEX idx_datoms_fulltext ON datoms (value_type_tag, v, a, e) WHERE index_fulltext IS NOT 0" - ;; TODO: possibly remove this index. :db.unique/value should be asserted by the transactor in - ;; all cases, but the index may speed up some of SQLite's query planning. For now, it services - ;; to validate the transactor implementation. - "CREATE UNIQUE INDEX idx_datoms_unique_value ON datoms (v) WHERE unique_value IS NOT 0" - ;; TODO: possibly remove this index. :db.unique/identity should be asserted by the transactor in - ;; all cases, but the index may speed up some of SQLite's query planning. For now, it serves to - ;; validate the transactor implementation. - "CREATE UNIQUE INDEX idx_datoms_unique_identity ON datoms (a, v) WHERE unique_identity IS NOT 0" + ;; TODO: possibly remove this index. :db.unique/{value,identity} should be asserted by the + ;; transactor in all cases, but the index may speed up some of SQLite's query planning. For now, + ;; it serves to validate the transactor implementation. Note that tag is needed here to + ;; differentiate, e.g., keywords and strings. + "CREATE UNIQUE INDEX idx_datoms_unique_value ON datoms (a, value_type_tag, v) WHERE unique_value IS NOT 0" - "CREATE TABLE transactions (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL, added TINYINT NOT NULL DEFAULT 1)" - "CREATE INDEX idx_transactions_tx ON transactions (tx)" + "CREATE TABLE transactions (e INTEGER NOT NULL, a SMALLINT NOT NULL, v BLOB NOT NULL, tx INTEGER NOT NULL, added TINYINT NOT NULL DEFAULT 1, value_type_tag SMALLINT NOT NULL)" + "CREATE INDEX idx_transactions_tx ON transactions (tx, added)" ;; Fulltext indexing. ;; A fulltext indexed value v is an integer rowid referencing fulltext_values. @@ -57,27 +74,45 @@ ;; By default we use Unicode-aware tokenizing (particularly for case folding), but preserve ;; diacritics. "CREATE VIRTUAL TABLE fulltext_values - USING FTS4 (text NOT NULL, tokenize=unicode61 \"remove_diacritics=0\")" + USING FTS4 (text NOT NULL, searchid INT, tokenize=unicode61 \"remove_diacritics=0\")" + + ;; This combination of view and triggers allows you to transparently + ;; update-or-insert into FTS. Just INSERT INTO fulltext_values_view (text, searchid). + "CREATE VIEW fulltext_values_view AS SELECT * FROM fulltext_values" + "CREATE TRIGGER replace_fulltext_searchid + INSTEAD OF INSERT ON fulltext_values_view + WHEN EXISTS (SELECT 1 FROM fulltext_values WHERE text = new.text) + BEGIN + UPDATE fulltext_values SET searchid = new.searchid WHERE text = new.text; + END" + "CREATE TRIGGER insert_fulltext_searchid + INSTEAD OF INSERT ON fulltext_values_view + WHEN NOT EXISTS (SELECT 1 FROM fulltext_values WHERE text = new.text) + BEGIN + INSERT INTO fulltext_values (text, searchid) VALUES (new.text, new.searchid); + END" ;; A view transparently interpolating fulltext indexed values into the datom structure. "CREATE VIEW fulltext_datoms AS - SELECT e, a, fulltext_values.text AS v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity + SELECT e, a, fulltext_values.text AS v, tx, value_type_tag, index_avet, index_vaet, index_fulltext, unique_value FROM datoms, fulltext_values WHERE datoms.index_fulltext IS NOT 0 AND datoms.v = fulltext_values.rowid" ;; A view transparently interpolating all entities (fulltext and non-fulltext) into the datom structure. "CREATE VIEW all_datoms AS - SELECT e, a, v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity + SELECT e, a, v, tx, value_type_tag, index_avet, index_vaet, index_fulltext, unique_value FROM datoms WHERE index_fulltext IS 0 UNION ALL - SELECT e, a, v, tx, index_avet, index_vaet, index_fulltext, unique_value, unique_identity + SELECT e, a, v, tx, value_type_tag, index_avet, index_vaet, index_fulltext, unique_value FROM fulltext_datoms" ;; Materialized views of the schema. "CREATE TABLE idents (ident TEXT NOT NULL PRIMARY KEY, entid INTEGER UNIQUE NOT NULL)" + ;; TODO: allow arbitrary schema values (true/false) and tag the resulting values. "CREATE TABLE schema (ident TEXT NOT NULL, attr TEXT NOT NULL, value TEXT NOT NULL, FOREIGN KEY (ident) REFERENCES idents (ident))" "CREATE INDEX idx_schema_unique ON schema (ident, attr, value)" + "CREATE TABLE parts (part INTEGER NOT NULL PRIMARY KEY, start INTEGER NOT NULL, idx INTEGER NOT NULL)" ]) (defn SQLite [x] "Transforms Clojure{Script} values to SQLite.")) + +(extend-protocol IEncodeSQLite + #?@(:clj + [String + (->SQLite [x] x) + + clojure.lang.Keyword + (->SQLite [x] (str x)) + + Boolean + (->SQLite [x] (if x 1 0)) + + Integer + (->SQLite [x] x) + + Long + (->SQLite [x] x) + + java.util.Date + (->SQLite [x] (.getTime x)) + + java.util.UUID + (->SQLite [x] (.toString x)) ; TODO: BLOB storage. Issue #44. + + Float + (->SQLite [x] x) + + Double + (->SQLite [x] x)] + + :cljs + [string + (->SQLite [x] x) + + Keyword + (->SQLite [x] (str x)) + + boolean + (->SQLite [x] (if x 1 0)) + + js/Date + (->SQLite [x] (.getTime x)) + + number + (->SQLite [x] x)])) + +;; Datomish rows are tagged with a numeric representation of :db/valueType: +;; The tag is used to limit queries, and therefore is placed carefully in the relevant indices to +;; allow searching numeric longs and doubles quickly. The tag is also used to convert SQLite values +;; to the correct Datomish value type on query egress. +(def value-type-tag-map + {:db.type/ref 0 + :db.type/boolean 1 + :db.type/instant 4 + :db.type/long 5 ;; SQLite distinguishes integral from decimal types, allowing long and double to share a tag. + :db.type/double 5 ;; SQLite distinguishes integral from decimal types, allowing long and double to share a tag. + :db.type/string 10 + :db.type/uuid 11 + :db.type/uri 12 + :db.type/keyword 13}) + +(defn ->tag [valueType] + (or + (valueType value-type-tag-map) + (raise "Unknown valueType " valueType ", expected one of " (sorted-set (keys value-type-tag-map)) + {:error :SQLite/tag, :valueType valueType}))) + +#?(:clj +(defn <-tagged-SQLite + "Transforms SQLite values to Clojure with tag awareness." + [tag value] + (case tag + ;; In approximate commonality order. + 0 value ; ref. + 1 (= value 1) ; boolean + 4 (java.util.Date. value) ; instant + 13 (keyword (subs value 1)) ; keyword + 12 (java.net.URI. value) ; URI + 11 (java.util.UUID/fromString value) ; UUID + ; 5 value ; numeric + ; 10 value ; string + value + ))) + +#?(:cljs +(defn <-tagged-SQLite + "Transforms SQLite values to ClojureScript with tag awareness." + [tag value] + ;; In approximate commonality order. + (case tag + 0 value ; ref. + 1 (= value 1) ; boolean + 4 (js/Date. value) ; instant + 13 (keyword (subs value 1)) ; keyword + ; 12 value ; URI + ; 11 value ; UUID + ; 5 value ; numeric + ; 10 value ; string + value + ))) + +(defn tagged-SQLite-to-JS + "Transforms SQLite values to JavaScript-compatible values." + [tag value] + (case tag + 1 (= value 1) ; boolean. + ; 0 value ; No point trying to ident. + ; 4 value ; JS doesn't have a Date representation. + ; 13 value ; Return the keyword string from the DB: ":foobar". + value)) + +(defn <-SQLite + "Transforms SQLite values to Clojure{Script}." + [valueType value] + (case valueType + :db.type/ref value + :db.type/keyword (keyword (subs value 1)) + :db.type/string value + :db.type/boolean (not= value 0) + :db.type/long value + :db.type/instant (<-tagged-SQLite 4 value) + :db.type/uuid (<-tagged-SQLite 11 value) + :db.type/double value)) diff --git a/src/datomish/test_macros.cljc b/src/datomish/test_macros.cljc index 989e852a..bf50d1a4 100644 --- a/src/datomish/test_macros.cljc +++ b/src/datomish/test_macros.cljc @@ -4,7 +4,9 @@ (ns datomish.test-macros #?(:cljs - (:require-macros [datomish.test-macros])) + (:require-macros + [datomish.test-macros] + [datomish.node-tempfile-macros])) (:require [datomish.pair-chan])) @@ -38,3 +40,20 @@ (let [[v# e#] (clojure.core.async/!]]]) #?@(:cljs [[datomish.pair-chan] @@ -56,7 +59,8 @@ (defrecord TxReport [db-before ;; The DB before the transaction. db-after ;; The DB after the transaction. - current-tx ;; The tx ID represented by the transaction in this report. + tx ;; The tx ID represented by the transaction in this report; refer :db/tx. + txInstant ;; The timestamp instant when the the transaction was processed/committed in this report; refer :db/txInstant. entities ;; The set of entities (like [:db/add e a v tx]) processed. tx-data ;; The set of datoms applied to the database, like (Datom. e a v tx added). tempids ;; The map from id-literal -> numeric entid. @@ -106,11 +110,14 @@ entity)) (defn maybe-ident->entid [db [op e a v tx :as orig]] - (let [e (get (db/idents db) e e) ;; TODO: use ident, entid here. - a (get (db/idents db) a a) + (let [e (db/entid db e) + a (db/entid db a) v (if (ds/kw? (db/schema db) a) ;; TODO: decide if this is best. We could also check for ref and numeric types. v - (get (db/idents db) v v))] + (db/entid db v))] + (when-not (integer? a) + (raise "Unknown attribute " a + {:form orig :attribute a})) [op e a v tx])) (defrecord Transaction [db tempids entities]) @@ -120,7 +127,7 @@ (let [tx (:tx report) txInstant (:txInstant report)] ;; n.b., this must not be symbolic -- it's inserted after we map idents -> entids. - [:db/add tx (get-in db [:idents :db/txInstant]) txInstant])) + [:db/add tx (db/entid db :db/txInstant) txInstant])) (defn ensure-entity-form [[op e a v & rest :as entity]] (cond @@ -153,8 +160,8 @@ (defn- tx-instant? [db [op e a & _]] (and (= op :db/add) - (= e (get-in db [:idents :db/tx])) - (= a (get-in db [:idents :db/txInstant])))) + (= (db/entid db e) (db/entid db :db/tx)) + (= (db/entid db a) (db/entid db :db/txInstant)))) (defn- update-txInstant [db report] "Extract [:db/add :db/tx :db/txInstant ...], and update :txInstant with that value." @@ -175,7 +182,7 @@ ;; to unify all id-literals in :db.part/tx to the current transaction value, but also seems ;; inconsistent. tx (:tx report) - db* (assoc-in db [:idents :db/tx] tx)] + db* (db/with-ident db :db/tx tx)] (when-not (sequential? initial-es) (raise "Bad transaction data " initial-es ", expected sequential collection" {:error :transact/syntax, :tx-data initial-es})) @@ -205,23 +212,48 @@ (->> (update-txInstant db*))))) (defn- lookup-ref? [x] - "Return true if `x` is like [:attr value]." + "Return `x` if `x` is like [:attr value], false otherwise." (and (sequential? x) (= (count x) 2) (or (keyword? (first x)) - (integer? (first x))))) + (integer? (first x))) + x)) (defn > - (vec (for [[op & entity] (:entities report)] - (into [op] (for [field entity] - (if (lookup-ref? field) - (first ( datom. - (doseq [[e a v tx added :as datom] (:tx-data report)] - - (when added - ;; Check for violated :db/unique constraint between datom and existing store. - (when (ds/unique? schema a) - (when-let [found (first (tx-data [db report] - {:pre [(db/db? db) (report? report)]} - (go-pair - (let [initial-report report - {tx :tx} report - schema (db/schema db)] - (loop [report initial-report - es (:entities initial-report)] - (let [[[op e a v :as entity] & entities] es] - (cond - (nil? entity) - report - - (= op :db/add) - (if (ds/multival? schema a) - (if (empty? ( report - (transact-report (datom e a (.-v old-datom) tx false)) - (transact-report (datom e a v tx true))) - entities)) - (recur (transact-report report (datom e a v tx true)) entities))) - - (= op :db/retract) - (if (first (> - report - (preprocess db) + (let [> + report + (preprocess db) - (tx-data db) - (schema-fragment datoms)] (assoc-in report [:added-attributes] schema-fragment))) @@ -518,7 +461,7 @@ ;; transaction ID and transaction timestamp directly from the report; Datomic ;; makes this surprisingly difficult: one needs a :db.part/tx temporary and an ;; explicit upsert of that temporary. - :tx (db/current-tx db) + :tx ( - db + (collect-db-install-assertions db) + (p :collect-db-install-assertions)) - (db/ + db - (db/> (p :apply-db-ident-assertions)) - (db/> (p :apply-db-install-assertions))) + ] (-> report (assoc-in [:db-after] db-after))))) diff --git a/src/datomish/transact/bootstrap.cljc b/src/datomish/transact/bootstrap.cljc index 7b8d79af..17c90be2 100644 --- a/src/datomish/transact/bootstrap.cljc +++ b/src/datomish/transact/bootstrap.cljc @@ -17,13 +17,15 @@ ;; TODO: support user-specified functions in the future. ;; :db.install/function {:db/valueType :db.type/ref ;; :db/cardinality :db.cardinality/many} - :db/txInstant {:db/valueType :db.type/integer + :db/txInstant {:db/valueType :db.type/long :db/cardinality :db.cardinality/one } ;; :db/index true} TODO: Handle this using SQLite protocol. :db/valueType {:db/valueType :db.type/ref :db/cardinality :db.cardinality/one} :db/cardinality {:db/valueType :db.type/ref :db/cardinality :db.cardinality/one} + :db/doc {:db/valueType :db.type/string + :db/cardinality :db.cardinality/one} :db/unique {:db/valueType :db.type/ref :db/cardinality :db.cardinality/one} :db/isComponent {:db/valueType :db.type/boolean @@ -52,8 +54,8 @@ :db/noHistory 13 :db/add 14 :db/retract 15 - :db.part/tx 16 - :db.part/user 17 + :db.part/user 16 + :db.part/tx 17 :db/excise 18 :db.excise/attrs 19 :db.excise/beforeT 20 @@ -61,15 +63,18 @@ :db.alter/attribute 22 :db.type/ref 23 :db.type/keyword 24 - :db.type/integer 25 ;; TODO: :db.type/long, to match Datomic? - :db.type/string 26 - :db.type/boolean 27 - :db.type/instant 28 - :db.type/bytes 29 - :db.cardinality/one 30 - :db.cardinality/many 31 - :db.unique/value 32 - :db.unique/identity 33}) + :db.type/long 25 + :db.type/double 26 + :db.type/string 27 + :db.type/boolean 28 + :db.type/instant 29 + :db.type/bytes 30 + :db.cardinality/one 31 + :db.cardinality/many 32 + :db.unique/value 33 + :db.unique/identity 34 + :db/doc 35 + }) (defn tx-data [] (concat diff --git a/src/datomish/transact/explode.cljc b/src/datomish/transact/explode.cljc index 232ea003..4bcbe805 100644 --- a/src/datomish/transact/explode.cljc +++ b/src/datomish/transact/explode.cljc @@ -34,14 +34,14 @@ (declare explode-entity) (defn- explode-entity-a-v [db entity eid a v] - ;; a should be symbolic at this point. Map it. TODO: use ident/entid to ensure we have a symbolic attr. - (let [reverse? (reverse-ref? a) + (let [a (db/ident db a) ;; We expect a to be an ident, but it's legal to provide an entid. + a* (db/entid db a) + reverse? (reverse-ref? a) straight-a (if reverse? (reverse-ref a) a) - straight-a* (get-in db [:idents straight-a] straight-a) + straight-a* (db/entid db straight-a) _ (when (and reverse? (not (ds/ref? (db/schema db) straight-a*))) (raise "Bad attribute " a ": reverse attribute name requires {:db/valueType :db.type/ref} in schema" - {:error :transact/syntax, :attribute a, :op entity})) - a* (get-in db [:idents a] a)] + {:error :transact/syntax, :attribute a, :op entity}))] (cond reverse? (explode-entity-a-v db entity v straight-a eid) @@ -60,11 +60,19 @@ :op entity })) (sequential? v) - (if (ds/multival? (db/schema db) a*) ;; dm/schema - (mapcat (partial explode-entity-a-v db entity eid a) v) ;; Allow sequences of nested maps, etc. This does mean [[1]] will work. - (raise "Sequential values " v " but attribute " a " is :db.cardinality/one" - {:error :transact/entity-sequential-cardinality-one - :op entity })) + (if (some nil? v) + ;; This is a hard one to track down, with a steep stack back in `transact/ensure-entity-form`, so + ;; we error specifically here rather than expanding further. + (raise "Sequential attribute value for " a " contains nil." + {:error :transact/sequence-contains-nil + :op entity + :attribute a + :value v}) + (if (ds/multival? (db/schema db) a*) ;; dm/schema + (mapcat (partial explode-entity-a-v db entity eid a) v) ;; Allow sequences of nested maps, etc. This does mean [[1]] will work. + (raise "Sequential values " v " but attribute " a " is :db.cardinality/one" + {:error :transact/entity-sequential-cardinality-one + :op entity }))) true [[:db/add eid a* v]]))) diff --git a/src/datomish/util.cljc b/src/datomish/util.cljc index aa9c205e..e279be2e 100644 --- a/src/datomish/util.cljc +++ b/src/datomish/util.cljc @@ -30,25 +30,30 @@ ~expr (cond-let ~@rest))))) +(defn ensure-datalog-var [x] + (or (and (symbol? x) + (nil? (namespace x)) + (str/starts-with? (name x) "?")) + (throw (ex-info (str x " is not a Datalog var.") {})))) + +(defn var->sql-type-var + "Turns '?xyz into :_xyz_type_tag." + [x] + (and + (ensure-datalog-var x) + (keyword (str "_" (subs (name x) 1) "_type_tag")))) + (defn var->sql-var "Turns '?xyz into :xyz." [x] - (if (and (symbol? x) - (str/starts-with? (name x) "?")) - (keyword (subs (name x) 1)) - (throw (ex-info (str x " is not a Datalog var.") {})))) + (and + (ensure-datalog-var x) + (keyword (subs (name x) 1)))) -(defn conj-in - "Associates a value into a sequence in a nested associative structure, where - ks is a sequence of keys and v is the new value, and returns a new nested - structure. - If any levels do not exist, hash-maps will be created. If the destination - sequence does not exist, a new one is created." - {:static true} - [m [k & ks] v] - (if ks - (assoc m k (conj-in (get m k) ks v)) - (assoc m k (conj (get m k) v)))) +(defn aggregate->sql-var + "Turns (:max 'column) into :%max.column." + [fn-kw x] + (keyword (str "%" (name fn-kw) "." (name x)))) (defn concat-in {:static true} @@ -57,6 +62,30 @@ (assoc m k (concat-in (get m k) ks vs)) (assoc m k (concat (get m k) vs)))) +(defn append-in + "Associates a value into a sequence in a nested associative structure, where + ks is a sequence of keys and v is the new value, and returns a new nested + structure. + Always puts the value last. + If any levels do not exist, hash-maps will be created. If the destination + sequence does not exist, a new one is created." + {:static true} + [m path v] + (concat-in m path [v])) + +(defn assoc-if + ([m k v] + (if v + (assoc m k v) + m)) + ([m k v & kvs] + (if kvs + (let [[kk vv & remainder] kvs] + (apply assoc-if + (assoc-if m k v) + kk vv remainder)) + (assoc-if m k v)))) + (defmacro while-let [binding & forms] `(loop [] (when-let ~binding @@ -70,3 +99,5 @@ (f (first xs) (first ys)) (recur f (rest xs) (rest ys))))) +(defn mapvals [f m] + (into (empty m) (map #(vector (first %) (f (second %))) m))) diff --git a/test/datomish/api.cljc b/test/datomish/api.cljc index 1a8a50fe..6ceb93e4 100644 --- a/test/datomish/api.cljc +++ b/test/datomish/api.cljc @@ -20,14 +20,9 @@ (defn - (sqlite/!]]]) - #?@(:cljs [[datomish.pair-chan] + #?@(:cljs [[datomish.promise-sqlite] + [datomish.pair-chan] [datomish.test-macros :refer-macros [deftest-async]] [datomish.node-tempfile :refer [tempfile]] [cljs.test :as t :refer-macros [is are deftest testing async]] @@ -36,88 +39,46 @@ (defn- tempids [tx] (into {} (map (juxt (comp :idx first) second) (:tempids tx)))) -(defn- > - (s/all-rows (:sqlite-connection db) ["SELECT e, a, v, tx FROM datoms WHERE tx > ?" tx]) - (> - (s/all-rows (:sqlite-connection db) ["SELECT a, v FROM datoms WHERE e = ?" eid]) - (> - (s/all-rows (:sqlite-connection db) ["SELECT e, a, v, tx, added FROM transactions WHERE tx > ? ORDER BY tx ASC, e, a, v, added" tx]) - (> - (s/all-rows (:sqlite-connection db) ["SELECT rowid, text FROM fulltext_values"]) - (!]]]) + #?@(:cljs [[datomish.promise-sqlite] + [datomish.pair-chan] + [datomish.test-macros :refer-macros [deftest-async]] + [datomish.node-tempfile :refer [tempfile]] + [cljs.test :as t :refer-macros [is are deftest testing async]] + [cljs.core.async :as a :refer [!]]]))) + +#?(:cljs + (def Throwable js/Error)) + +(tufte/add-basic-println-handler! {}) + +(deftest-async test-import + (with-tempfile [t (tempfile)] + (let [places (!]]]) + #?@(:cljs [[datomish.promise-sqlite] + [datomish.pair-chan] + [datomish.test-macros :refer-macros [deftest-async]] + [datomish.node-tempfile :refer [tempfile]] + [cljs.test :as t :refer-macros [is are deftest testing async]] + [cljs.core.async :as a :refer [!]]])) + #?(:clj + (:import [clojure.lang ExceptionInfo])) + #?(:clj + (:import [datascript.db DB]))) + +#?(:cljs + (def Throwable js/Error)) + +(def test-schema + [{:db/id (d/id-literal :db.part/user) + :db/ident :x + :db/unique :db.unique/identity + :db/valueType :db.type/long + :db.install/_attribute :db.part/db} + ]) + +(deftest-async test-q + (with-tempfile [t (tempfile)] + (let [conn ( '?tx tx0)] + [(list '!= '?a (d/entid (d/db conn) :db/txInstant))] ;; TODO: map ident->entid for values. + ] {})) + [[101 (d/entid (d/db conn) :x) 505 tx1]]))) ;; TODO: map entid->ident on egress. + (finally + (DatomsSource - {:table :datoms - :fulltext-table :fulltext_values - :fulltext-view :all_datoms - :columns [:e :a :v :tx :added] - :attribute-transform transforms/attribute-transform-string - :constant-transform transforms/constant-transform-default - :table-alias (comp (make-predictable-gensym) name) - :make-constraints nil})) +(def simple-schema + [{:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :db/txInstant + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :foo/bar + :db/valueType :db.type/string + :db/cardinality :db.cardinality/many} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :foo/int + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :foo/str + :db/valueType :db.type/string + :db/cardinality :db.cardinality/many}]) -(defn- expand [find] - (let [context (context/->Context (mock-source nil) nil nil) +(def page-schema + [{:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :page/loves + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/many} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :page/likes + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/many} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :page/url + :db/valueType :db.type/string + :db/unique :db.unique/identity + :db/cardinality :db.cardinality/one} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :page/title + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :page/starred + :db/valueType :db.type/boolean + :db/cardinality :db.cardinality/one}]) + +(def aggregate-schema + [{:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :page/url + :db/valueType :db.type/string + :db/unique :db.unique/identity + :db/cardinality :db.cardinality/one} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :foo/points + :db/valueType :db.type/long + :db/cardinality :db.cardinality/many} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/ident :foo/visitedAt + :db/valueType :db.type/instant + :db/cardinality :db.cardinality/many}]) + +(def schema-with-page + (concat + simple-schema + page-schema)) + +(defn mock-source [db] + (assoc (datomish.db/datoms-source db) + :table-alias (comp (make-predictable-gensym) name))) + +(defn conn->context [conn] + (context/make-context (mock-source (d/db conn)))) + +(defn- expand [find conn] + (let [context (conn->context conn) parsed (query/parse find)] (query/find->sql-clause context parsed))) -(deftest test-basic-join - (is (= {:select '([:datoms1.v :timestampMicros] [:datoms0.e :page]), - :modifiers [:distinct], - :from '[[:datoms datoms0] - [:datoms datoms1]], - :where (list - :and - [:= :datoms1.e :datoms0.tx] - [:= :datoms0.a "page/starred"] - [:= :datoms0.v 1] - [:= :datoms1.a "db/txInstant"] - [:not - (list :and (list :> :datoms1.e (sql/param :latest)))])} - (expand - '[:find ?timestampMicros ?page :in $ ?latest :where - [?page :page/starred true ?t] - [?t :db/txInstant ?timestampMicros] - (not [(> ?t ?latest)])])))) +(defn- populate [find conn] + (let [context (conn->context conn) + parsed (query/parse find)] + (query/find-into-context context parsed))) -(deftest test-pattern-not-join - (is (= '{:select ([:datoms1.v :timestampMicros] [:datoms0.e :page]), - :modifiers [:distinct], - :from [[:datoms datoms0] - [:datoms datoms1]], - :where (:and - [:= :datoms1.e :datoms0.tx] - [:= :datoms0.a "page/starred"] +(defn + (populate '[:find ?e ?v :in $ :where [?e :foo/int ?v]] conn) + :cc :known-types) + {'?v :db.type/long + '?e :db.type/ref}))) + + (testing "Numeric entid." + (is (= (-> + (populate '[:find ?v :in $ :where [6 :foo/int ?v]] conn) + :cc :known-types) + {'?v :db.type/long}))) + + (testing "Keyword entity." + (is (= (-> + (populate '[:find ?v :in $ :where [:my/thing :foo/int ?v]] conn) + :cc :known-types) + {'?v :db.type/long})))) + +(deftest-db test-value-constant-constraint-descends-into-not-and-or conn + (let [attrs ( :datoms0.tx (sql/param :latest)))] + [:= :datoms0.tx :datoms1.e])} + (expand + '[:find ?timestampMicros ?page :in $ ?latest :where + [?page :page/starred true ?t] + [?t :db/txInstant ?timestampMicros] + (not [(> ?t ?latest)])] + conn))))) + +(deftest-db test-pattern-not-join conn + (let [attrs ( :datoms0.tx (sql/param :latest)))] - [:= :datoms1.a "db/txInstant"])} - (expand - '[:find ?timestampMicros ?page :in $ ?latest :where - [?page :page/starred true ?t] - (not [(> ?t ?latest)]) - [?t :db/txInstant ?timestampMicros]])))) - -(deftest test-pattern-not-join-ordering-preserved - (is (= '{:select ([:datoms2.v :timestampMicros] [:datoms0.e :page]), - :modifiers [:distinct], - :from [[:datoms datoms0] - [:datoms datoms2]], - :where (:and - [:= :datoms2.e :datoms0.tx] - [:= :datoms0.a "page/starred"] +(deftest-db test-not-clause-ordering-preserved conn + (let [attrs ( :datoms0.tx (sql/param :latest)))] + [:= :datoms1.a (:db/txInstant attrs)] + [:= :datoms0.tx :datoms1.e] )} - (expand - '[:find ?timestampMicros ?page :in $ ?latest :where - [?page :page/starred true ?t] - (not [?page :foo/bar _]) - [?t :db/txInstant ?timestampMicros]])))) + (expand + '[:find ?timestampMicros ?page :in $ ?latest :where + [?page :page/starred true ?t] + (not [(> ?t ?latest)]) + [?t :db/txInstant ?timestampMicros]] + conn))))) -(deftest test-single-or - (is (= '{:select ([:datoms1.e :page]), - :modifiers [:distinct], - :from ([:datoms datoms0] [:datoms datoms1] [:datoms datoms2]), - :where (:and - [:= :datoms1.e :datoms0.e] - [:= :datoms1.e :datoms2.v] - [:= :datoms0.a "page/url"] - [:= :datoms0.v "http://example.com/"] - [:= :datoms1.a "page/title"] - [:= :datoms2.a "page/loves"])} - (expand - '[:find ?page :in $ ?latest :where - [?page :page/url "http://example.com/"] - [?page :page/title ?title] - (or - [?entity :page/loves ?page])])))) +(deftest-db test-pattern-not-join-ordering-preserved conn + (let [attrs (tag-codes input)] + (if (= 1 (count codes)) + [:= column (first codes)] + (cons :or (map (fn [tag] + [:= column tag]) + codes))))) + +(deftest-db test-url-tag conn + (let [attrs ( ?date ?then)] + [?e :foo/points ?v]] conn)] + + (is (= (:group-by-vars context) + ['?date '?e])) + + (is (= {:select '([:preag.date :date] + [:%max.preag.v :_max_v]) + :modifiers [:distinct] + :group-by '(:date :e), + :with {:preag + {:select '([:datoms0.v :date] + [:datoms1.v :v] + [:datoms0.e :e]), ; Because we need to group on it. + :modifiers [:distinct], + :from '([:datoms datoms0] [:datoms datoms1]), + :where (list + :and + [:= :datoms0.a (:foo/visitedAt attrs)] + (list :> :datoms0.v (sql/param :then)) + [:= :datoms1.a (:foo/points attrs)] + [:= :datoms0.e :datoms1.e])}} + :from [:preag]} + (query/context->sql-clause context))))) + +(deftest-db test-get-else conn + (let [attrs (sql-var '?x))) (is (= :XX (util/var->sql-var '?XX)))) +#?(:cljs + (deftest test-integer?-js + (is (integer? 0)) + (is (integer? 5)) + (is (integer? 50000000000)) + (is (integer? 5.00)) ; Because JS. + (is (not (integer? 5.1))))) + +#?(:clj + (deftest test-integer?-clj + (is (integer? 0)) + (is (integer? 5)) + (is (integer? 50000000000)) + (is (not (integer? 5.00))) + (is (not (integer? 5.1))))) + #?(:cljs (deftest test-raise (let [caught diff --git a/test/datomish/tofinoish_test.cljc b/test/datomish/tofinoish_test.cljc new file mode 100644 index 00000000..0257904b --- /dev/null +++ b/test/datomish/tofinoish_test.cljc @@ -0,0 +1,466 @@ +;; This Source Code Form is subject to the terms of the Mozilla Public +;; License, v. 2.0. If a copy of the MPL was not distributed with this +;; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +(ns datomish.tofinoish-test + #?(:cljs + (:require-macros + [datomish.pair-chan :refer [go-pair !]]]) + #?@(:cljs [[datomish.promise-sqlite] + [datomish.pair-chan] + [datomish.test-macros :refer-macros [deftest-async deftest-db]] + [datomish.node-tempfile :refer [tempfile]] + [cljs.test :as t :refer-macros [is are deftest testing async]] + [cljs.core.async :as a :refer [!]]])) + #?(:clj + (:import [clojure.lang ExceptionInfo])) + #?(:clj + (:import [datascript.db DB]))) + +#?(:cljs + (def Throwable js/Error)) + + +(def page-schema + [{:db/id (d/id-literal :db.part/user) + :db/ident :page/url + :db/valueType :db.type/string ; Because not all URLs are java.net.URIs. For JS we may want to use /uri. + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/doc "A page's URL." + :db.install/_attribute :db.part/db} + {:db/id (d/id-literal :db.part/user) + :db/ident :page/title + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one ; We supersede as we see new titles. + :db/doc "A page's title." + :db.install/_attribute :db.part/db} + {:db/id (d/id-literal :db.part/user) + :db/ident :page/starred + :db/valueType :db.type/boolean + :db/cardinality :db.cardinality/one + :db/doc "Whether the page is starred." + :db.install/_attribute :db.part/db} + {:db/id (d/id-literal :db.part/user) + :db/ident :page/visit + :db/valueType :db.type/ref + :db/cardinality :db.cardinality/many + :db/doc "A visit to the page." + :db.install/_attribute :db.part/db}]) + +(def visit-schema + [{:db/id (d/id-literal :db.part/user) + :db/ident :visit/visitAt + :db/valueType :db.type/instant + :db/cardinality :db.cardinality/many + :db/doc "The instant of the visit." + :db.install/_attribute :db.part/db}]) + +(def session-schema + [{:db/id (d/id-literal :db.part/user) + :db/ident :session/startedFromAncestor + :db/valueType :db.type/ref ; To a session. + :db/cardinality :db.cardinality/one + :db/doc "The ancestor of a session." + :db.install/_attribute :db.part/db} + {:db/id (d/id-literal :db.part/user) + :db/ident :session/startedInScope + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/doc "The parent scope of a session." + :db.install/_attribute :db.part/db} + {:db/id (d/id-literal :db.part/user) + :db/ident :session/startReason + :db/valueType :db.type/string ; TODO: enum? + :db/cardinality :db.cardinality/many + :db/doc "The start reasons of a session." + :db.install/_attribute :db.part/db} + {:db/id (d/id-literal :db.part/user) + :db/ident :session/endReason + :db/valueType :db.type/string ; TODO: enum? + :db/cardinality :db.cardinality/many + :db/doc "The end reasons of a session." + :db.install/_attribute :db.part/db} + {:db/id (d/id-literal :db.part/user) + :db/ident :event/session + :db/valueType :db.type/ref ; To a session. + :db/cardinality :db.cardinality/one + :db/doc "The session in which a tx took place." + :db.install/_attribute :db.part/db}]) + +(def save-schema + [{:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/cardinality :db.cardinality/one + :db/valueType :db.type/ref + :db/ident :save/page} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/cardinality :db.cardinality/one + :db/valueType :db.type/instant + :db/ident :save/savedAt} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/cardinality :db.cardinality/one + :db/valueType :db.type/string + :db/fulltext true + :db/ident :save/title} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/cardinality :db.cardinality/one + :db/valueType :db.type/string + :db/fulltext true + :db/ident :save/excerpt} + {:db/id (d/id-literal :db.part/user) + :db.install/_attribute :db.part/db + :db/cardinality :db.cardinality/one + :db/valueType :db.type/string + :db/fulltext true + :db/ident :save/content}]) + +(def tofino-schema (concat page-schema visit-schema session-schema save-schema)) + +(defn instant [x] + #?(:cljs x) + #?(:clj (.getTime x))) + +(defn now [] + #?(:cljs (js/Date.)) + #?(:clj (java.util.Date.))) + +;; Returns the session ID. +(defn + (> + ( ?time ?since)] + [?page :page/visit ?visit] + [?page :page/url ?uri] + [(get-else $ ?page :page/title "") ?title]] + + '[[?page :page/visit ?visit] + [?visit :visit/visitAt ?time] + [?page :page/url ?uri] + [(get-else $ ?page :page/title "") ?title]])] + + (go-pair + (let [rows (> + rows + (sort-by (comp unchecked-negate third)) ;; TODO: these should be dates! + (take limit) + (map (fn [[uri title lastVisited]] + {:uri uri :title title :lastVisited lastVisited}))))))) + +(defn = (:lastVisited (first visited)) (:lastVisited (second visited)))))) + + (is (= "Example Philanthropy New" + (A page about apples.

Fruit content goes here.

"})) + (A page about apricots.

Fruit content goes here.

"})) + (