From e4f29ea10bb89082b0f9c2cd3c40046bceb1a0ce Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Tue, 19 Jul 2016 22:38:52 -0700 Subject: [PATCH] Broad hacking to get the beginnings of negation and arbitrary clause combinations to work. --- src/datomish/clauses.cljc | 193 ++++++++++++++++++++++++++++++++ src/datomish/context.cljc | 9 ++ src/datomish/db.cljc | 4 +- src/datomish/projection.cljc | 56 +++++++++ src/datomish/query.cljc | 189 +++++-------------------------- src/datomish/source.cljc | 62 ++++++++++ src/datomish/sqlite_schema.cljc | 4 +- src/datomish/util.cljc | 23 +++- test/datomish/test/util.cljc | 2 +- 9 files changed, 373 insertions(+), 169 deletions(-) create mode 100644 src/datomish/clauses.cljc create mode 100644 src/datomish/context.cljc create mode 100644 src/datomish/projection.cljc create mode 100644 src/datomish/source.cljc diff --git a/src/datomish/clauses.cljc b/src/datomish/clauses.cljc new file mode 100644 index 00000000..9da18447 --- /dev/null +++ b/src/datomish/clauses.cljc @@ -0,0 +1,193 @@ +;; This Source Code Form is subject to the terms of the Mozilla Public +;; License, v. 2.0. If a copy of the MPL was not distributed with this +;; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +(ns datomish.clauses + (:require + [datomish.source + :refer [attribute-in-source + constant-in-source + source->from + source->constraints]] + [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]] + [datascript.parser :as dp + #?@(:cljs [:refer [Not Pattern DefaultSrc Variable Constant Placeholder]])] + [honeysql.core :as sql] + [clojure.string :as str] + ) + #?(:clj + (:import + [datascript.parser Not Pattern DefaultSrc Variable Constant Placeholder]))) + +;; A CC is a collection of clauses that are combined with JOIN. +;; The topmost form in a query is a ConjoiningClauses. +;; +;; Ordinary pattern clauses turn into FROM parts and WHERE parts using :=. +;; Predicate clauses turn into the same, but with other functions. +;; Function clauses with bindings turn into: +;; * Subqueries. Perhaps less efficient? Certainly clearer. +;; * Projection expressions, if only used for output. +;; * Inline expressions? +;; `not` turns into NOT EXISTS with WHERE clauses inside the subquery to +;; bind it to the outer variables. +;; `not-join` is similar, but with explicit binding. +;; `or` turns into a collection of UNIONs inside a subquery. +;; `or`'s documentation states that all clauses must include the same vars, +;; but that's an over-simplification: all clauses must refer to the external +;; unification vars. +;; The entire UNION-set is JOINed to any surrounding expressions per the `rule-vars` +;; clause, or the intersection of the vars in the two sides of the JOIN. + +;; `from` is a list of [source alias] pairs, suitable for passing to honeysql. +;; `bindings` is a map from var to qualified columns. +;; `wheres` is a list of fragments that can be joined by `:and`. +(defrecord ConjoiningClauses [source from bindings wheres]) + +(defn bind-column-to-var [cc variable col] + (let [var (:symbol variable)] + (util/conj-in cc [:bindings var] col))) + +(defn constrain-column-to-constant [cc col position value] + (util/conj-in cc [:wheres] + [:= col (if (= :a position) + (attribute-in-source (:source cc) value) + (constant-in-source (:source cc) value))])) + +(declare Not->NotJoinClause not-join->where-fragment impose-external-bindings) + +;; Accumulates a pattern into the CC. Returns a new CC. +(defn apply-pattern-clause + "Transform a DataScript Pattern instance into the parts needed + to build a SQL expression. + + @arg cc A CC instance. + @arg pattern The pattern instance. + @return an augmented CC" + [cc pattern] + (when-not (instance? Pattern pattern) + (raise-str "Expected to be called with a Pattern instance." pattern)) + (when-not (instance? DefaultSrc (:source pattern)) + (raise-str "Non-default sources are not supported in patterns. Pattern: " pattern)) + + (let [[table alias] (source->from (:source cc)) ; e.g., [:datoms :datoms123] + places (map (fn [place col] [place col]) + (:pattern pattern) + (:columns (:source cc)))] + (reduce + (fn [cc + [pattern-part ; ?x, :foo/bar, 42 + position]] ; :a + (let [col (sql/qualify alias (name position))] ; :datoms123.a + (condp instance? pattern-part + ;; Placeholders don't contribute any bindings, nor do + ;; they constrain the query -- there's no need to produce + ;; IS NOT NULL, because we don't store nulls in our schema. + Placeholder + cc + + Variable + (bind-column-to-var cc pattern-part col) + + Constant + (constrain-column-to-constant cc col position (:value pattern-part)) + + (raise-str "Unknown pattern part " pattern-part)))) + + ;; Record the new table mapping. + (util/conj-in cc [:from] [table alias]) + + places))) + +(defn apply-not-clause [cc not] + (when-not (instance? Not not) + (raise-str "Expected to be called with a Not instance." not)) + (when-not (instance? DefaultSrc (:source not)) + (raise-str "Non-default sources are not supported in patterns. Pattern: " not)) + + (let [not-join-clause (Not->NotJoinClause (:source cc) not)] + ;; If our bindings are already available, great -- emit a :wheres + ;; fragment, and include the external bindings so that they match up. + ;; Otherwise, we need to delay, and we do that now by failing. + + (let [seen (set (keys (:bindings cc))) + to-unify (set (map :symbol (:unify-vars not-join-clause)))] + (println "Seen " seen " need " to-unify) + (if (clojure.set/subset? to-unify seen) + (util/conj-in cc [:wheres] (not-join->where-fragment + (impose-external-bindings not-join-clause (:bindings cc)))) + (raise-str "Haven't seen all the necessary vars for this `not` clause."))))) + +(defn apply-clause [cc it] + (if (instance? Not it) + (apply-not-clause cc it) + (apply-pattern-clause cc it))) + +(defn- bindings->where + "Take a bindings map like + {?foo [:datoms12.e :datoms13.v :datoms14.e]} + and produce a list of constraints expression like + [[:= :datoms12.e :datoms13.v] [:= :datoms12.e :datoms14.e]] + + TODO: experiment; it might be the case that producing more + pairwise equalities we get better or worse performance." + [bindings] + (mapcat (fn [[_ vs]] + (when (> (count vs) 1) + (let [root (first vs)] + (map (fn [v] [:= root v]) (rest vs))))) + bindings)) + +(defn expand-where-from-bindings + "Take the bindings in the CC and contribute + additional where clauses. Calling this more than + once will result in duplicate clauses." + [cc] + (assoc cc :wheres (concat (bindings->where (:bindings cc)) + (:wheres cc)))) + +(defn expand-pattern-clauses + "Reduce a sequence of patterns into a CC." + [cc patterns] + (reduce apply-clause cc patterns)) + +(defn patterns->cc [source patterns] + (expand-where-from-bindings + (expand-pattern-clauses + (->ConjoiningClauses source [] {} []) + patterns))) + +;; A `not-join` clause is a filter. It takes bindings from the enclosing query +;; and runs as a subquery with `NOT EXISTS`. +;; The only difference between `not` and `not-join` is that `not` computes +;; its varlist by recursively walking the provided patterns. +;; DataScript's parser does variable extraction for us, and also verifies +;; that a declared variable list is valid for the clauses given. +(defrecord NotJoinClause [unify-vars cc]) + +(defn make-not-join-clause [source unify-vars patterns] + (->NotJoinClause unify-vars (patterns->cc source patterns))) + +(defn Not->NotJoinClause [source not] + (when-not (instance? DefaultSrc (:source not)) + (raise-str "Non-default sources are not supported in patterns. Pattern: " + not)) + (make-not-join-clause source (:vars not) (:clauses not))) + +;; This is so we can link the clause to the outside world. +(defn impose-external-constraints [not-join-clause wheres] + (util/concat-in not-join-clause [:cc :wheres] wheres)) + +(defn impose-external-bindings [not-join-clause bindings] + (let [ours (:bindings (:cc not-join-clause)) + vars (clojure.set/intersection (set (keys bindings)) (set (keys ours))) + pairings (map (fn [v] [:= (first (v bindings)) (first (v ours))]) vars)] + (impose-external-constraints not-join-clause pairings))) + +(defn cc->partial-subquery [cc] + (merge + {:from (:from cc)} + (when-not (empty? (:wheres cc)) + {:where (cons :and (:wheres cc))}))) + +(defn not-join->where-fragment [not-join] + [:not [:exists (merge {:select 1} (cc->partial-subquery (:cc not-join)))]]) diff --git a/src/datomish/context.cljc b/src/datomish/context.cljc new file mode 100644 index 00000000..67ba5444 --- /dev/null +++ b/src/datomish/context.cljc @@ -0,0 +1,9 @@ +;; This Source Code Form is subject to the terms of the Mozilla Public +;; License, v. 2.0. If a copy of the MPL was not distributed with this +;; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +;; A context, very simply, holds on to a default source. Eventually +;; it'll also do projection and similar transforms. +(ns datomish.context) + +(defrecord Context [default-source]) diff --git a/src/datomish/db.cljc b/src/datomish/db.cljc index cf697573..b7b709a8 100644 --- a/src/datomish/db.cljc +++ b/src/datomish/db.cljc @@ -8,7 +8,7 @@ [datomish.pair-chan :refer [go-pair DB sqlite-connection))) diff --git a/src/datomish/projection.cljc b/src/datomish/projection.cljc new file mode 100644 index 00000000..c0bcf60f --- /dev/null +++ b/src/datomish/projection.cljc @@ -0,0 +1,56 @@ +;; This Source Code Form is subject to the terms of the Mozilla Public +;; License, v. 2.0. If a copy of the MPL was not distributed with this +;; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +(ns datomish.projection + (:require + [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]] + [datascript.parser :as dp + #?@(:cljs [:refer [Pattern DefaultSrc Variable Constant Placeholder]])] + ) + #?(:clj (:import [datascript.parser Pattern DefaultSrc Variable Constant Placeholder])) + ) + +(defn lookup-variable [cc variable] + (or (-> cc :bindings variable first) + (raise-str "Couldn't find variable " variable))) + +(defn apply-elements-to-context [context elements] + (assoc context :elements elements)) + +(defn sql-projection + "Take a `find` clause's `:elements` list and turn it into a SQL + projection clause, suitable for passing as a `:select` clause to + honeysql. + + For example: + + [Variable{:symbol ?foo}, Variable{:symbol ?bar}] + + with bindings in the context: + + {?foo [:datoms12.e :datoms13.v], ?bar [:datoms13.e]} + + => + + [[:datoms12.e :foo] [:datoms13.e :bar]] + + @param context A Context, containing elements. + @return a sequence of pairs." + [context] + (let [elements (:elements context)] + (when-not (every? #(instance? Variable %1) elements) + (raise-str "Unable to :find non-variables.")) + (map (fn [elem] + (let [var (:symbol elem)] + [(lookup-variable context var) (util/var->sql-var var)])) + elements))) + +(defn row-pair-transducer [context projection] + ;; For now, we only support straight var lists, so + ;; our transducer is trivial. + (let [columns-in-order (map second projection)] + (map (fn [[row err]] + (if err + [row err] + [(map row columns-in-order) nil]))))) diff --git a/src/datomish/query.cljc b/src/datomish/query.cljc index 334c58e6..c8a603c7 100644 --- a/src/datomish/query.cljc +++ b/src/datomish/query.cljc @@ -4,7 +4,7 @@ (ns datomish.query (:require - [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise cond-let]] + [datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]] [datomish.transforms :as transforms] [datascript.parser :as dp #?@(:cljs [:refer [Pattern DefaultSrc Variable Constant Placeholder]])] @@ -18,163 +18,6 @@ ;; but not automatically safe for use. (def sql-quoting-style :ansi) -;; -;; Context. -;; -;; `attribute-transform` is a function from attribute to constant value. Used to -;; turn, e.g., :p/attribute into an interned integer. -;; `constant-transform` is a function from constant value to constant value. Used to -;; turn, e.g., the literal 'true' into 1. -;; `from` is a list of table pairs, suitable for passing to honeysql. -;; `:bindings` is a map from var to qualified columns. -;; `:wheres` is a list of fragments that can be joined by `:and`. -;; -(defrecord Context [from bindings wheres elements attribute-transform constant-transform]) - -(defn attribute-in-context [context attribute] - ((:attribute-transform context) attribute)) - -(defn constant-in-context [context constant] - ((:constant-transform context) constant)) - -(defn bind-column-to-var [context variable col] - (let [var (:symbol variable) - existing-bindings (get-in context [:bindings var])] - (assoc-in context [:bindings var] (conj existing-bindings col)))) - -(defn constrain-column-to-constant [context col position value] - (util/conj-in context [:wheres] - [:= col (if (= :a position) - (attribute-in-context context value) - (constant-in-context context value))])) - -(defn lookup-variable [context variable] - (or (-> context :bindings variable first) - (raise (str "Couldn't find variable " variable)))) - -(defn make-context - ([] - (make-context transforms/attribute-transform-string transforms/constant-transform-default)) - ([attribute-transform constant-transform] - (map->Context {:from [] - :bindings {} - :wheres [] - :elements [] - :attribute-transform attribute-transform - :constant-transform constant-transform}))) - -(defn apply-pattern-to-context - "Transform a DataScript Pattern instance into the parts needed - to build a SQL expression. - - @arg context A Context instance. - @arg pattern The pattern instance. - @return an augmented Context." - [context pattern] - (when-not (instance? Pattern pattern) - (raise "Expected to be called with a Pattern instance.")) - (when-not (instance? DefaultSrc (:source pattern)) - (raise (str "Non-default sources are not supported in patterns. Pattern: " - (print-str pattern)))) - - (let [table :datoms - alias (gensym (name table)) - places (map (fn [place col] [place col]) - (:pattern pattern) - [:e :a :v :tx])] - (reduce - (fn [context - [pattern-part ; ?x, :foo/bar, 42 - position]] ; :a - (let [col (sql/qualify alias (name position))] ; :datoms123.a - (condp instance? pattern-part - ;; Placeholders don't contribute any bindings, nor do - ;; they constrain the query -- there's no need to produce - ;; IS NOT NULL, because we don't store nulls in our schema. - Placeholder - context - - Variable - (bind-column-to-var context pattern-part col) - - Constant - (constrain-column-to-constant context col position (:value pattern-part)) - - (raise (str "Unknown pattern part " (print-str pattern-part)))))) - - ;; Record the new table mapping. - (util/conj-in context [:from] [table alias]) - - places))) - -(defn- bindings->where - "Take a bindings map like - {?foo [:datoms12.e :datoms13.v :datoms14.e]} - and produce a list of constraints expression like - [[:= :datoms12.e :datoms13.v] [:= :datoms12.e :datoms14.e]] - - TODO: experiment; it might be the case that producing more - pairwise equalities we get better or worse performance." - [bindings] - (mapcat (fn [[_ vs]] - (when (> (count vs) 1) - (let [root (first vs)] - (map (fn [v] [:= root v]) (rest vs))))) - bindings)) - -(defn expand-where-from-bindings - "Take the bindings in the context and contribute - additional where clauses. Calling this more than - once will result in duplicate clauses." - [context] - (assoc context :wheres (concat (bindings->where (:bindings context)) - (:wheres context)))) - -(defn apply-elements-to-context [context elements] - (assoc context :elements elements)) - -(defn expand-patterns-into-context - "Reduce a sequence of patterns into a Context." - [context patterns] - (reduce apply-pattern-to-context context patterns)) - -(defn sql-projection - "Take a `find` clause's `:elements` list and turn it into a SQL - projection clause, suitable for passing as a `:select` clause to - honeysql. - - For example: - - [Variable{:symbol ?foo}, Variable{:symbol ?bar}] - - with bindings in the context: - - {?foo [:datoms12.e :datoms13.v], ?bar [:datoms13.e]} - - => - - [[:datoms12.e :foo] [:datoms13.e :bar]] - - @param context A Context, containing elements. - @return a sequence of pairs." - [context] - (let [elements (:elements context)] - (when-not (every? #(instance? Variable %1) elements) - (raise "Unable to :find non-variables.")) - (map (fn [elem] - (let [var (:symbol elem)] - [(lookup-variable context var) (util/var->sql-var var)])) - elements))) - -(defn row-pair-transducer [context projection] - ;; For now, we only support straight var lists, so - ;; our transducer is trivial. - (let [columns-in-order (map second projection)] - (map (fn [[row err]] - (if err - [row err] - [(map row columns-in-order) nil]))))) - (defn context->sql-clause [context] (merge {:select (sql-projection context) @@ -191,12 +34,12 @@ (defn- validate-with [with] (when-not (nil? with) - (raise "`with` not supported."))) + (raise-str "`with` not supported."))) (defn- validate-in [in] (when-not (and (== 1 (count in)) (= "$" (name (-> in first :variable :symbol)))) - (raise (str "Complex `in` not supported: " (print-str in))))) + (raise-str "Complex `in` not supported: " in))) (defn expand-find-into-context [context find] ;; There's some confusing use of 'where' and friends here. That's because @@ -252,10 +95,34 @@ (pattern->sql (first (:where - (datomish.query/parse + (datascript.parser/parse-query '[:find (max ?timestampMicros) (pull ?page [:page/url :page/title]) ?page :in $ :where [?page :page/starred true ?t] + (not-join [?fo] + [(> ?fooo 5)] + [?xpage :page/starred false] + ) [?t :db/txInstant ?timestampMicros]]))) identity)) + +(cc->partial-subquery + + (require 'datomish.clauses) + (in-ns 'datomish.clauses) +(patterns->cc (datomish.source/datoms-source nil) + (:where +(datascript.parser/parse-query + '[:find (max ?timestampMicros) (pull ?page [:page/url :page/title]) ?page + :in $ + :where + [?page :page/starred true ?t] + (not-join [?page] + [?page :page/starred false] + ) + [?t :db/txInstant ?timestampMicros]]))) + +(Not->NotJoinClause (datomish.source/datoms-source nil) +#object[datomish.clauses$Not__GT_NotJoinClause 0x6d8aa02d "datomish.clauses$Not__GT_NotJoinClause@6d8aa02d"] +datomish.clauses=> #datascript.parser.Not{:source #datascript.parser.DefaultSrc{}, :vars [#datascript.parser.Variable{:symbol ?fooo}], :clauses [#datascript.parser.Pattern{:source #datascript.parser.DefaultSrc{}, :pattern [#datascript.parser.Variable{:symbol ?xpage} #datascript.parser.Constant{:value :page/starred} #datascript.parser.Constant{:value false}]}]}) diff --git a/src/datomish/source.cljc b/src/datomish/source.cljc new file mode 100644 index 00000000..d92bad07 --- /dev/null +++ b/src/datomish/source.cljc @@ -0,0 +1,62 @@ +;; This Source Code Form is subject to the terms of the Mozilla Public +;; License, v. 2.0. If a copy of the MPL was not distributed with this +;; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +(ns datomish.source + (:require + [datomish.transforms :as transforms])) + +;;; +;;; A source is something that can match patterns. For example: +;;; +;;; * The database itself. +;;; * The history of the database. +;;; * A filtered version of the database or the history. +;;; +;;; We model this in a SQL context as something that can: +;;; +;;; * Give us a table name. +;;; * Give us a new alias for the table name. +;;; * Provide us with a list of columns to match, positionally, +;;; against patterns. +;;; * Provide us with a set of WHERE fragments that, in combination +;;; with the table name, denote the source. +;;; * Transform constants and attributes into something usable +;;; by the source. + +(defrecord + Source + [table ; e.g., :datoms + columns ; e.g., [:e :a :v :tx] + + ;; `attribute-transform` is a function from attribute to constant value. Used to + ;; turn, e.g., :p/attribute into an interned integer. + ;; `constant-transform` is a function from constant value to constant value. Used to + ;; turn, e.g., the literal 'true' into 1. + attribute-transform + constant-transform + + ;; Not currently used. + make-constraints ; ?fn [source alias] => [where-clauses] + ]) + +(defn datoms-source [db] + (->Source :datoms + [:e :a :v :tx :added] + transforms/attribute-transform-string + transforms/constant-transform-default + nil)) + +(defn source->from [source] + (let [table (:table source)] + [table (gensym (name table))])) + +(defn source->constraints [source alias] + (when-let [f (:make-constraints source)] + (f alias))) + +(defn attribute-in-source [source attribute] + ((:attribute-transform source) attribute)) + +(defn constant-in-source [source constant] + ((:constant-transform source) constant)) diff --git a/src/datomish/sqlite_schema.cljc b/src/datomish/sqlite_schema.cljc index 2b1c38f7..ef63975b 100644 --- a/src/datomish/sqlite_schema.cljc +++ b/src/datomish/sqlite_schema.cljc @@ -8,7 +8,7 @@ [datomish.pair-chan :refer [go-pair !]]]) @@ -42,7 +42,7 @@ {:pre [(> from-version 0)]} ;; Or we'd create-current-version instead. {:pre [(< from-version current-version)]} ;; Or we wouldn't need to update-from-version. (go-pair - (raise "No migrations yet defined!") + (raise-str "No migrations yet defined!") (