Broad hacking to get the beginnings of negation and arbitrary clause combinations to work.

This commit is contained in:
Richard Newman 2016-07-19 22:38:52 -07:00
parent 9ae9a0572b
commit e4f29ea10b
9 changed files with 373 additions and 169 deletions

193
src/datomish/clauses.cljc Normal file
View file

@ -0,0 +1,193 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
(ns datomish.clauses
(:require
[datomish.source
:refer [attribute-in-source
constant-in-source
source->from
source->constraints]]
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]]
[datascript.parser :as dp
#?@(:cljs [:refer [Not Pattern DefaultSrc Variable Constant Placeholder]])]
[honeysql.core :as sql]
[clojure.string :as str]
)
#?(:clj
(:import
[datascript.parser Not Pattern DefaultSrc Variable Constant Placeholder])))
;; A CC is a collection of clauses that are combined with JOIN.
;; The topmost form in a query is a ConjoiningClauses.
;;
;; Ordinary pattern clauses turn into FROM parts and WHERE parts using :=.
;; Predicate clauses turn into the same, but with other functions.
;; Function clauses with bindings turn into:
;; * Subqueries. Perhaps less efficient? Certainly clearer.
;; * Projection expressions, if only used for output.
;; * Inline expressions?
;; `not` turns into NOT EXISTS with WHERE clauses inside the subquery to
;; bind it to the outer variables.
;; `not-join` is similar, but with explicit binding.
;; `or` turns into a collection of UNIONs inside a subquery.
;; `or`'s documentation states that all clauses must include the same vars,
;; but that's an over-simplification: all clauses must refer to the external
;; unification vars.
;; The entire UNION-set is JOINed to any surrounding expressions per the `rule-vars`
;; clause, or the intersection of the vars in the two sides of the JOIN.
;; `from` is a list of [source alias] pairs, suitable for passing to honeysql.
;; `bindings` is a map from var to qualified columns.
;; `wheres` is a list of fragments that can be joined by `:and`.
(defrecord ConjoiningClauses [source from bindings wheres])
(defn bind-column-to-var [cc variable col]
(let [var (:symbol variable)]
(util/conj-in cc [:bindings var] col)))
(defn constrain-column-to-constant [cc col position value]
(util/conj-in cc [:wheres]
[:= col (if (= :a position)
(attribute-in-source (:source cc) value)
(constant-in-source (:source cc) value))]))
(declare Not->NotJoinClause not-join->where-fragment impose-external-bindings)
;; Accumulates a pattern into the CC. Returns a new CC.
(defn apply-pattern-clause
"Transform a DataScript Pattern instance into the parts needed
to build a SQL expression.
@arg cc A CC instance.
@arg pattern The pattern instance.
@return an augmented CC"
[cc pattern]
(when-not (instance? Pattern pattern)
(raise-str "Expected to be called with a Pattern instance." pattern))
(when-not (instance? DefaultSrc (:source pattern))
(raise-str "Non-default sources are not supported in patterns. Pattern: " pattern))
(let [[table alias] (source->from (:source cc)) ; e.g., [:datoms :datoms123]
places (map (fn [place col] [place col])
(:pattern pattern)
(:columns (:source cc)))]
(reduce
(fn [cc
[pattern-part ; ?x, :foo/bar, 42
position]] ; :a
(let [col (sql/qualify alias (name position))] ; :datoms123.a
(condp instance? pattern-part
;; Placeholders don't contribute any bindings, nor do
;; they constrain the query -- there's no need to produce
;; IS NOT NULL, because we don't store nulls in our schema.
Placeholder
cc
Variable
(bind-column-to-var cc pattern-part col)
Constant
(constrain-column-to-constant cc col position (:value pattern-part))
(raise-str "Unknown pattern part " pattern-part))))
;; Record the new table mapping.
(util/conj-in cc [:from] [table alias])
places)))
(defn apply-not-clause [cc not]
(when-not (instance? Not not)
(raise-str "Expected to be called with a Not instance." not))
(when-not (instance? DefaultSrc (:source not))
(raise-str "Non-default sources are not supported in patterns. Pattern: " not))
(let [not-join-clause (Not->NotJoinClause (:source cc) not)]
;; If our bindings are already available, great -- emit a :wheres
;; fragment, and include the external bindings so that they match up.
;; Otherwise, we need to delay, and we do that now by failing.
(let [seen (set (keys (:bindings cc)))
to-unify (set (map :symbol (:unify-vars not-join-clause)))]
(println "Seen " seen " need " to-unify)
(if (clojure.set/subset? to-unify seen)
(util/conj-in cc [:wheres] (not-join->where-fragment
(impose-external-bindings not-join-clause (:bindings cc))))
(raise-str "Haven't seen all the necessary vars for this `not` clause.")))))
(defn apply-clause [cc it]
(if (instance? Not it)
(apply-not-clause cc it)
(apply-pattern-clause cc it)))
(defn- bindings->where
"Take a bindings map like
{?foo [:datoms12.e :datoms13.v :datoms14.e]}
and produce a list of constraints expression like
[[:= :datoms12.e :datoms13.v] [:= :datoms12.e :datoms14.e]]
TODO: experiment; it might be the case that producing more
pairwise equalities we get better or worse performance."
[bindings]
(mapcat (fn [[_ vs]]
(when (> (count vs) 1)
(let [root (first vs)]
(map (fn [v] [:= root v]) (rest vs)))))
bindings))
(defn expand-where-from-bindings
"Take the bindings in the CC and contribute
additional where clauses. Calling this more than
once will result in duplicate clauses."
[cc]
(assoc cc :wheres (concat (bindings->where (:bindings cc))
(:wheres cc))))
(defn expand-pattern-clauses
"Reduce a sequence of patterns into a CC."
[cc patterns]
(reduce apply-clause cc patterns))
(defn patterns->cc [source patterns]
(expand-where-from-bindings
(expand-pattern-clauses
(->ConjoiningClauses source [] {} [])
patterns)))
;; A `not-join` clause is a filter. It takes bindings from the enclosing query
;; and runs as a subquery with `NOT EXISTS`.
;; The only difference between `not` and `not-join` is that `not` computes
;; its varlist by recursively walking the provided patterns.
;; DataScript's parser does variable extraction for us, and also verifies
;; that a declared variable list is valid for the clauses given.
(defrecord NotJoinClause [unify-vars cc])
(defn make-not-join-clause [source unify-vars patterns]
(->NotJoinClause unify-vars (patterns->cc source patterns)))
(defn Not->NotJoinClause [source not]
(when-not (instance? DefaultSrc (:source not))
(raise-str "Non-default sources are not supported in patterns. Pattern: "
not))
(make-not-join-clause source (:vars not) (:clauses not)))
;; This is so we can link the clause to the outside world.
(defn impose-external-constraints [not-join-clause wheres]
(util/concat-in not-join-clause [:cc :wheres] wheres))
(defn impose-external-bindings [not-join-clause bindings]
(let [ours (:bindings (:cc not-join-clause))
vars (clojure.set/intersection (set (keys bindings)) (set (keys ours)))
pairings (map (fn [v] [:= (first (v bindings)) (first (v ours))]) vars)]
(impose-external-constraints not-join-clause pairings)))
(defn cc->partial-subquery [cc]
(merge
{:from (:from cc)}
(when-not (empty? (:wheres cc))
{:where (cons :and (:wheres cc))})))
(defn not-join->where-fragment [not-join]
[:not [:exists (merge {:select 1} (cc->partial-subquery (:cc not-join)))]])

View file

@ -0,0 +1,9 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;; A context, very simply, holds on to a default source. Eventually
;; it'll also do projection and similar transforms.
(ns datomish.context)
(defrecord Context [default-source])

View file

@ -8,7 +8,7 @@
[datomish.pair-chan :refer [go-pair <?]]
[cljs.core.async.macros :refer [go]]))
(:require
[datomish.util :as util :refer [raise]]
[datomish.util :as util :refer [raise-str]]
[datomish.sqlite :as s]
[datomish.sqlite-schema :as sqlite-schema]
#?@(:clj [[datomish.pair-chan :refer [go-pair <?]]
@ -28,5 +28,5 @@
(defn <with-sqlite-connection [sqlite-connection]
(go-pair
(when-not (= sqlite-schema/current-version (<? (sqlite-schema/<ensure-current-version sqlite-connection)))
(raise "Could not ensure current SQLite schema version."))
(raise-str "Could not ensure current SQLite schema version."))
(->DB sqlite-connection)))

View file

@ -0,0 +1,56 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
(ns datomish.projection
(:require
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]]
[datascript.parser :as dp
#?@(:cljs [:refer [Pattern DefaultSrc Variable Constant Placeholder]])]
)
#?(:clj (:import [datascript.parser Pattern DefaultSrc Variable Constant Placeholder]))
)
(defn lookup-variable [cc variable]
(or (-> cc :bindings variable first)
(raise-str "Couldn't find variable " variable)))
(defn apply-elements-to-context [context elements]
(assoc context :elements elements))
(defn sql-projection
"Take a `find` clause's `:elements` list and turn it into a SQL
projection clause, suitable for passing as a `:select` clause to
honeysql.
For example:
[Variable{:symbol ?foo}, Variable{:symbol ?bar}]
with bindings in the context:
{?foo [:datoms12.e :datoms13.v], ?bar [:datoms13.e]}
=>
[[:datoms12.e :foo] [:datoms13.e :bar]]
@param context A Context, containing elements.
@return a sequence of pairs."
[context]
(let [elements (:elements context)]
(when-not (every? #(instance? Variable %1) elements)
(raise-str "Unable to :find non-variables."))
(map (fn [elem]
(let [var (:symbol elem)]
[(lookup-variable context var) (util/var->sql-var var)]))
elements)))
(defn row-pair-transducer [context projection]
;; For now, we only support straight var lists, so
;; our transducer is trivial.
(let [columns-in-order (map second projection)]
(map (fn [[row err]]
(if err
[row err]
[(map row columns-in-order) nil])))))

View file

@ -4,7 +4,7 @@
(ns datomish.query
(:require
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise cond-let]]
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]]
[datomish.transforms :as transforms]
[datascript.parser :as dp
#?@(:cljs [:refer [Pattern DefaultSrc Variable Constant Placeholder]])]
@ -18,163 +18,6 @@
;; but not automatically safe for use.
(def sql-quoting-style :ansi)
;;
;; Context.
;;
;; `attribute-transform` is a function from attribute to constant value. Used to
;; turn, e.g., :p/attribute into an interned integer.
;; `constant-transform` is a function from constant value to constant value. Used to
;; turn, e.g., the literal 'true' into 1.
;; `from` is a list of table pairs, suitable for passing to honeysql.
;; `:bindings` is a map from var to qualified columns.
;; `:wheres` is a list of fragments that can be joined by `:and`.
;;
(defrecord Context [from bindings wheres elements attribute-transform constant-transform])
(defn attribute-in-context [context attribute]
((:attribute-transform context) attribute))
(defn constant-in-context [context constant]
((:constant-transform context) constant))
(defn bind-column-to-var [context variable col]
(let [var (:symbol variable)
existing-bindings (get-in context [:bindings var])]
(assoc-in context [:bindings var] (conj existing-bindings col))))
(defn constrain-column-to-constant [context col position value]
(util/conj-in context [:wheres]
[:= col (if (= :a position)
(attribute-in-context context value)
(constant-in-context context value))]))
(defn lookup-variable [context variable]
(or (-> context :bindings variable first)
(raise (str "Couldn't find variable " variable))))
(defn make-context
([]
(make-context transforms/attribute-transform-string transforms/constant-transform-default))
([attribute-transform constant-transform]
(map->Context {:from []
:bindings {}
:wheres []
:elements []
:attribute-transform attribute-transform
:constant-transform constant-transform})))
(defn apply-pattern-to-context
"Transform a DataScript Pattern instance into the parts needed
to build a SQL expression.
@arg context A Context instance.
@arg pattern The pattern instance.
@return an augmented Context."
[context pattern]
(when-not (instance? Pattern pattern)
(raise "Expected to be called with a Pattern instance."))
(when-not (instance? DefaultSrc (:source pattern))
(raise (str "Non-default sources are not supported in patterns. Pattern: "
(print-str pattern))))
(let [table :datoms
alias (gensym (name table))
places (map (fn [place col] [place col])
(:pattern pattern)
[:e :a :v :tx])]
(reduce
(fn [context
[pattern-part ; ?x, :foo/bar, 42
position]] ; :a
(let [col (sql/qualify alias (name position))] ; :datoms123.a
(condp instance? pattern-part
;; Placeholders don't contribute any bindings, nor do
;; they constrain the query -- there's no need to produce
;; IS NOT NULL, because we don't store nulls in our schema.
Placeholder
context
Variable
(bind-column-to-var context pattern-part col)
Constant
(constrain-column-to-constant context col position (:value pattern-part))
(raise (str "Unknown pattern part " (print-str pattern-part))))))
;; Record the new table mapping.
(util/conj-in context [:from] [table alias])
places)))
(defn- bindings->where
"Take a bindings map like
{?foo [:datoms12.e :datoms13.v :datoms14.e]}
and produce a list of constraints expression like
[[:= :datoms12.e :datoms13.v] [:= :datoms12.e :datoms14.e]]
TODO: experiment; it might be the case that producing more
pairwise equalities we get better or worse performance."
[bindings]
(mapcat (fn [[_ vs]]
(when (> (count vs) 1)
(let [root (first vs)]
(map (fn [v] [:= root v]) (rest vs)))))
bindings))
(defn expand-where-from-bindings
"Take the bindings in the context and contribute
additional where clauses. Calling this more than
once will result in duplicate clauses."
[context]
(assoc context :wheres (concat (bindings->where (:bindings context))
(:wheres context))))
(defn apply-elements-to-context [context elements]
(assoc context :elements elements))
(defn expand-patterns-into-context
"Reduce a sequence of patterns into a Context."
[context patterns]
(reduce apply-pattern-to-context context patterns))
(defn sql-projection
"Take a `find` clause's `:elements` list and turn it into a SQL
projection clause, suitable for passing as a `:select` clause to
honeysql.
For example:
[Variable{:symbol ?foo}, Variable{:symbol ?bar}]
with bindings in the context:
{?foo [:datoms12.e :datoms13.v], ?bar [:datoms13.e]}
=>
[[:datoms12.e :foo] [:datoms13.e :bar]]
@param context A Context, containing elements.
@return a sequence of pairs."
[context]
(let [elements (:elements context)]
(when-not (every? #(instance? Variable %1) elements)
(raise "Unable to :find non-variables."))
(map (fn [elem]
(let [var (:symbol elem)]
[(lookup-variable context var) (util/var->sql-var var)]))
elements)))
(defn row-pair-transducer [context projection]
;; For now, we only support straight var lists, so
;; our transducer is trivial.
(let [columns-in-order (map second projection)]
(map (fn [[row err]]
(if err
[row err]
[(map row columns-in-order) nil])))))
(defn context->sql-clause [context]
(merge
{:select (sql-projection context)
@ -191,12 +34,12 @@
(defn- validate-with [with]
(when-not (nil? with)
(raise "`with` not supported.")))
(raise-str "`with` not supported.")))
(defn- validate-in [in]
(when-not (and (== 1 (count in))
(= "$" (name (-> in first :variable :symbol))))
(raise (str "Complex `in` not supported: " (print-str in)))))
(raise-str "Complex `in` not supported: " in)))
(defn expand-find-into-context [context find]
;; There's some confusing use of 'where' and friends here. That's because
@ -252,10 +95,34 @@
(pattern->sql
(first
(:where
(datomish.query/parse
(datascript.parser/parse-query
'[:find (max ?timestampMicros) (pull ?page [:page/url :page/title]) ?page
:in $
:where
[?page :page/starred true ?t]
(not-join [?fo]
[(> ?fooo 5)]
[?xpage :page/starred false]
)
[?t :db/txInstant ?timestampMicros]])))
identity))
(cc->partial-subquery
(require 'datomish.clauses)
(in-ns 'datomish.clauses)
(patterns->cc (datomish.source/datoms-source nil)
(:where
(datascript.parser/parse-query
'[:find (max ?timestampMicros) (pull ?page [:page/url :page/title]) ?page
:in $
:where
[?page :page/starred true ?t]
(not-join [?page]
[?page :page/starred false]
)
[?t :db/txInstant ?timestampMicros]])))
(Not->NotJoinClause (datomish.source/datoms-source nil)
#object[datomish.clauses$Not__GT_NotJoinClause 0x6d8aa02d "datomish.clauses$Not__GT_NotJoinClause@6d8aa02d"]
datomish.clauses=> #datascript.parser.Not{:source #datascript.parser.DefaultSrc{}, :vars [#datascript.parser.Variable{:symbol ?fooo}], :clauses [#datascript.parser.Pattern{:source #datascript.parser.DefaultSrc{}, :pattern [#datascript.parser.Variable{:symbol ?xpage} #datascript.parser.Constant{:value :page/starred} #datascript.parser.Constant{:value false}]}]})

62
src/datomish/source.cljc Normal file
View file

@ -0,0 +1,62 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
(ns datomish.source
(:require
[datomish.transforms :as transforms]))
;;;
;;; A source is something that can match patterns. For example:
;;;
;;; * The database itself.
;;; * The history of the database.
;;; * A filtered version of the database or the history.
;;;
;;; We model this in a SQL context as something that can:
;;;
;;; * Give us a table name.
;;; * Give us a new alias for the table name.
;;; * Provide us with a list of columns to match, positionally,
;;; against patterns.
;;; * Provide us with a set of WHERE fragments that, in combination
;;; with the table name, denote the source.
;;; * Transform constants and attributes into something usable
;;; by the source.
(defrecord
Source
[table ; e.g., :datoms
columns ; e.g., [:e :a :v :tx]
;; `attribute-transform` is a function from attribute to constant value. Used to
;; turn, e.g., :p/attribute into an interned integer.
;; `constant-transform` is a function from constant value to constant value. Used to
;; turn, e.g., the literal 'true' into 1.
attribute-transform
constant-transform
;; Not currently used.
make-constraints ; ?fn [source alias] => [where-clauses]
])
(defn datoms-source [db]
(->Source :datoms
[:e :a :v :tx :added]
transforms/attribute-transform-string
transforms/constant-transform-default
nil))
(defn source->from [source]
(let [table (:table source)]
[table (gensym (name table))]))
(defn source->constraints [source alias]
(when-let [f (:make-constraints source)]
(f alias)))
(defn attribute-in-source [source attribute]
((:attribute-transform source) attribute))
(defn constant-in-source [source constant]
((:constant-transform source) constant))

View file

@ -8,7 +8,7 @@
[datomish.pair-chan :refer [go-pair <?]]
[cljs.core.async.macros :refer [go]]))
(:require
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise cond-let]]
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise-str cond-let]]
[datomish.sqlite :as s]
#?@(:clj [[datomish.pair-chan :refer [go-pair <?]]
[clojure.core.async :refer [go <! >!]]])
@ -42,7 +42,7 @@
{:pre [(> from-version 0)]} ;; Or we'd create-current-version instead.
{:pre [(< from-version current-version)]} ;; Or we wouldn't need to update-from-version.
(go-pair
(raise "No migrations yet defined!")
(raise-str "No migrations yet defined!")
(<? (s/set-user-version db current-version))
(<? (s/get-user-version db))))

View file

@ -8,10 +8,20 @@
[clojure.string :as str]))
#?(:clj
(defmacro raise [& fragments]
(defmacro raise-str
"Like `raise`, but doesn't require a data argument."
[& fragments]
`(throw (ex-info (str ~@(map (fn [m#] (if (string? m#) m# (list 'pr-str m#))) fragments)) {}))))
#?(:clj
(defmacro raise
"The last argument must be a map."
[& fragments]
(let [msgs (butlast fragments)
data (last fragments)]
`(throw (ex-info (str ~@(map (fn [m#] (if (string? m#) m# (list 'pr-str m#))) msgs)) ~data)))))
`(throw
(ex-info
(str ~@(map (fn [m#] (if (string? m#) m# (list 'pr-str m#))) msgs)) ~data)))))
#?(:clj
(defmacro cond-let [& clauses]
@ -26,7 +36,7 @@
(if (and (symbol? x)
(str/starts-with? (name x) "?"))
(keyword (subs (name x) 1))
(raise (str x " is not a Datalog var."))))
(raise-str x " is not a Datalog var.")))
(defn conj-in
"Associates a value into a sequence in a nested associative structure, where
@ -40,6 +50,13 @@
(assoc m k (conj-in (get m k) ks v))
(assoc m k (conj (get m k) v))))
(defn concat-in
{:static true}
[m [k & ks] vs]
(if ks
(assoc m k (concat-in (get m k) ks vs))
(assoc m k (concat (get m k) vs))))
(defmacro while-let [binding & forms]
`(loop []
(when-let ~binding

View file

@ -14,7 +14,7 @@
(let [caught
(try
(do
(util/raise "succeed")
(util/raise "succeed" {})
"fail")
(catch :default e e))]
(is (= "succeed" (aget caught "data"))))))