Compare commits

...

7 commits

Author SHA1 Message Date
Richard Newman
696ae59570 Example query hacking. 2016-08-04 18:55:06 -07:00
Richard Newman
753fc859b1 Implement basic fulltext binding. 2016-08-04 18:54:50 -07:00
Richard Newman
5f04a48c2a Add explicit fts-table to source. 2016-08-04 18:50:34 -07:00
Richard Newman
3058c43c70 Split out datomish.query.cc so we can implement clause resolution in more than one namespace. 2016-08-04 18:49:31 -07:00
Richard Newman
dc3cef985b Part 1: process Function instances. 2016-08-04 16:59:37 -07:00
Richard Newman
2faa0b1ac6 Notes on parsing functions. 2016-08-04 16:58:59 -07:00
Richard Newman
d5e3716eba Turn Source into a protocol. Allow source->from to switch on attribute. 2016-08-04 16:40:03 -07:00
7 changed files with 433 additions and 153 deletions

View file

@ -95,3 +95,9 @@
'[:find ?e ?v :in $ :where
[?e :x ?v]
#_[(> ?v 1000)]] {}))))))
(dq/parse '[:find ?entity ?tx ?score
:in $ ?search
:where [(foobar $ :artist/name ?search) [[?entity _ ?tx ?score]]]])
(honeysql.core/format {:select [:?foo] :from [:foo] :where [:match :foo.x "Bar"]})

View file

@ -134,7 +134,7 @@
(datomish.query/parse
'[:find ?page :in $ ?latest :where
[?page :page/url "http://example.com/"]
[?page :page/title ?title]
[(fulltext $ :page/title "Some title") [[?page ?title _ _]]]
(or
[?entity :page/likes ?page]
[?entity :page/loves ?page])

143
src/datomish/query/cc.cljc Normal file
View file

@ -0,0 +1,143 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
(ns datomish.query.cc
(:require
[datomish.query.source
:refer [attribute-in-source
constant-in-source]]
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]]
[datascript.parser :as dp
#?@(:cljs
[:refer
[
Constant
Placeholder
Variable
]])])
#?(:clj
(:import
[datascript.parser
Constant
Placeholder
Variable
])))
;; A ConjoiningClauses (CC) is a collection of clauses that are combined with JOIN.
;; The topmost form in a query is a ConjoiningClauses.
;;
;;---------------------------------------------------------------------------------------
;; Done:
;; - Ordinary pattern clauses turn into FROM parts and WHERE parts using :=.
;; - Predicate clauses turn into the same, but with other functions.
;; - `not` turns into NOT EXISTS with WHERE clauses inside the subquery to
;; bind it to the outer variables, or adds simple WHERE clauses to the outer
;; clause.
;; - `not-join` is similar, but with explicit binding.
;;
;; Not yet done:
;; - Function clauses with bindings turn into:
;; * Subqueries. Perhaps less efficient? Certainly clearer.
;; * Projection expressions, if only used for output.
;; * Inline expressions?
;; - `or` turns into a collection of UNIONs inside a subquery.
;; `or`'s documentation states that all clauses must include the same vars,
;; but that's an over-simplification: all clauses must refer to the external
;; unification vars.
;; The entire UNION-set is JOINed to any surrounding expressions per the `rule-vars`
;; clause, or the intersection of the vars in the two sides of the JOIN.
;;---------------------------------------------------------------------------------------
;;
;; `from` is a list of [source alias] pairs, suitable for passing to honeysql.
;; `bindings` is a map from var to qualified columns.
;; `wheres` is a list of fragments that can be joined by `:and`.
(defrecord ConjoiningClauses [source from external-bindings bindings wheres])
(defn bind-column-to-var [cc variable col]
(let [var (:symbol variable)]
(util/conj-in cc [:bindings var] col)))
(defn constrain-column-to-constant [cc col position value]
(util/conj-in cc [:wheres]
[:= col (if (= :a position)
(attribute-in-source (:source cc) value)
(constant-in-source (:source cc) value))]))
(defn augment-cc [cc from bindings wheres]
(assoc cc
:from (concat (:from cc) from)
:bindings (merge-with concat (:bindings cc) bindings)
:wheres (concat (:wheres cc) wheres)))
(defn merge-ccs [left right]
(augment-cc left (:from right) (:bindings right) (:wheres right)))
(defn- bindings->where
"Take a bindings map like
{?foo [:datoms12.e :datoms13.v :datoms14.e]}
and produce a list of constraints expression like
[[:= :datoms12.e :datoms13.v] [:= :datoms12.e :datoms14.e]]
TODO: experiment; it might be the case that producing more
pairwise equalities we get better or worse performance."
[bindings]
(mapcat (fn [[_ vs]]
(when (> (count vs) 1)
(let [root (first vs)]
(map (fn [v] [:= root v]) (rest vs)))))
bindings))
;; This is so we can link clauses to the outside world.
(defn- impose-external-bindings [cc]
(if (empty? (:external-bindings cc))
cc
(let [ours (:bindings cc)
theirs (:external-bindings cc)
vars (clojure.set/intersection (set (keys theirs)) (set (keys ours)))]
(util/concat-in
cc [:wheres]
(map
(fn [v]
(let [external (first (v theirs))
internal (first (v ours))]
(assert external)
(assert internal)
[:= external internal]))
vars)))))
(defn expand-where-from-bindings
"Take the bindings in the CC and contribute
additional where clauses. Calling this more than
once will result in duplicate clauses."
[cc]
(impose-external-bindings
(assoc cc :wheres
;; Note that the order of clauses here means that cross-pattern var bindings
;; come first. That's OK: the SQL engine considers these altogether.
(concat (bindings->where (:bindings cc))
(:wheres cc)))))
(defn binding-for-symbol-or-throw [cc symbol]
(let [internal-bindings (symbol (:bindings cc))
external-bindings (symbol (:external-bindings cc))]
(or (first internal-bindings)
(first external-bindings)
(raise-str "No bindings yet for " symbol))))
(defn argument->value
"Take a value from an argument list and resolve it against the CC.
Throws if the value can't be resolved (e.g., no binding is established)."
[cc arg]
(condp instance? arg
Placeholder
(raise-str "Can't use a placeholder in a predicate.")
Variable
(binding-for-symbol-or-throw cc (:symbol arg))
Constant
(constant-in-source (:source cc) (:value arg))
(raise-str "Unknown predicate argument " arg)))

View file

@ -4,6 +4,8 @@
(ns datomish.query.clauses
(:require
[datomish.query.cc :as cc]
[datomish.query.functions :as functions]
[datomish.query.source
:refer [attribute-in-source
constant-in-source
@ -13,105 +15,35 @@
[datascript.parser :as dp
#?@(:cljs
[:refer
[PlainSymbol Predicate Not Or Pattern DefaultSrc Variable Constant Placeholder]])]
[
Constant
DefaultSrc
Function
Not
Or
Pattern
Placeholder
PlainSymbol
Predicate
Variable
]])]
[honeysql.core :as sql]
[clojure.string :as str]
)
#?(:clj
(:import
[datascript.parser
PlainSymbol Predicate Not Or Pattern DefaultSrc Variable Constant Placeholder])))
;; A ConjoiningClauses (CC) is a collection of clauses that are combined with JOIN.
;; The topmost form in a query is a ConjoiningClauses.
;;
;;---------------------------------------------------------------------------------------
;; Done:
;; - Ordinary pattern clauses turn into FROM parts and WHERE parts using :=.
;; - Predicate clauses turn into the same, but with other functions.
;; - `not` turns into NOT EXISTS with WHERE clauses inside the subquery to
;; bind it to the outer variables, or adds simple WHERE clauses to the outer
;; clause.
;; - `not-join` is similar, but with explicit binding.
;;
;; Not yet done:
;; - Function clauses with bindings turn into:
;; * Subqueries. Perhaps less efficient? Certainly clearer.
;; * Projection expressions, if only used for output.
;; * Inline expressions?
;; - `or` turns into a collection of UNIONs inside a subquery.
;; `or`'s documentation states that all clauses must include the same vars,
;; but that's an over-simplification: all clauses must refer to the external
;; unification vars.
;; The entire UNION-set is JOINed to any surrounding expressions per the `rule-vars`
;; clause, or the intersection of the vars in the two sides of the JOIN.
;;---------------------------------------------------------------------------------------
;;
;; `from` is a list of [source alias] pairs, suitable for passing to honeysql.
;; `bindings` is a map from var to qualified columns.
;; `wheres` is a list of fragments that can be joined by `:and`.
(defrecord ConjoiningClauses [source from external-bindings bindings wheres])
(defn bind-column-to-var [cc variable col]
(let [var (:symbol variable)]
(util/conj-in cc [:bindings var] col)))
(defn constrain-column-to-constant [cc col position value]
(util/conj-in cc [:wheres]
[:= col (if (= :a position)
(attribute-in-source (:source cc) value)
(constant-in-source (:source cc) value))]))
(defn merge-ccs [left right]
(assoc left
:from (concat (:from left) (:from right))
:bindings (merge-with concat (:bindings left) (:bindings right))
:wheres (concat (:wheres left) (:wheres right))))
(defn- bindings->where
"Take a bindings map like
{?foo [:datoms12.e :datoms13.v :datoms14.e]}
and produce a list of constraints expression like
[[:= :datoms12.e :datoms13.v] [:= :datoms12.e :datoms14.e]]
TODO: experiment; it might be the case that producing more
pairwise equalities we get better or worse performance."
[bindings]
(mapcat (fn [[_ vs]]
(when (> (count vs) 1)
(let [root (first vs)]
(map (fn [v] [:= root v]) (rest vs)))))
bindings))
;; This is so we can link clauses to the outside world.
(defn impose-external-bindings [cc]
(if (empty? (:external-bindings cc))
cc
(let [ours (:bindings cc)
theirs (:external-bindings cc)
vars (clojure.set/intersection (set (keys theirs)) (set (keys ours)))]
(util/concat-in
cc [:wheres]
(map
(fn [v]
(let [external (first (v theirs))
internal (first (v ours))]
(assert external)
(assert internal)
[:= external internal]))
vars)))))
(defn expand-where-from-bindings
"Take the bindings in the CC and contribute
additional where clauses. Calling this more than
once will result in duplicate clauses."
[cc]
(impose-external-bindings
(assoc cc :wheres
;; Note that the order of clauses here means that cross-pattern var bindings
;; come first. That's OK: the SQL engine considers these altogether.
(concat (bindings->where (:bindings cc))
(:wheres cc)))))
Constant
DefaultSrc
Function
Not
Or
Pattern
Placeholder
PlainSymbol
Predicate
Variable
])))
;; Pattern building is recursive, so we need forward declarations.
(declare
@ -138,16 +70,19 @@
cc
Variable
(bind-column-to-var cc pattern-part col)
(cc/bind-column-to-var cc pattern-part col)
Constant
(constrain-column-to-constant cc col position (:value pattern-part))
(cc/constrain-column-to-constant cc col position (:value pattern-part))
(raise-str "Unknown pattern part " pattern-part))))
(raise "Unknown pattern part." {:part pattern-part :clause pattern}))))
cc
places)))
(defn pattern->attribute [pattern]
(second (:pattern pattern)))
;; Accumulates a pattern into the CC. Returns a new CC.
(defn apply-pattern-clause
"Transform a DataScript Pattern instance into the parts needed
@ -162,7 +97,11 @@
(when-not (instance? DefaultSrc (:source pattern))
(raise-str "Non-default sources are not supported in patterns. Pattern: " pattern))
(let [[table alias] (source->from (:source cc))] ; e.g., [:datoms :datoms123]
;; TODO: look up the attribute in external bindings if it's a var. Perhaps we
;; already know what it is…
(let [[table alias] (source->from
(:source cc) ; e.g., [:datoms :datoms123]
(pattern->attribute pattern))]
(apply-pattern-clause-for-alias
;; Record the new table mapping.
@ -184,26 +123,7 @@
(when-not f
(raise-str "Unknown function " (:fn predicate)))
(let [args (map
(fn [arg]
(condp instance? arg
Placeholder
(raise-str "Can't use a placeholder in a predicate.")
Variable
(let [v (:symbol arg)
internal-bindings (v (:bindings cc))
external-bindings (v (:external-bindings cc))]
(or (first internal-bindings)
(first external-bindings)
(raise-str "No bindings yet for " v)))
Constant
(constant-in-source (:source cc) (:value arg))
(raise-str "Unknown predicate argument " arg)))
(:args predicate))]
(let [args (map (partial cc/argument->value cc) (:args predicate))]
(util/conj-in cc [:wheres] (cons f args)))))
(defn apply-not-clause [cc not]
@ -242,7 +162,7 @@
;; subquery.
(if (simple-or? orc)
(merge-ccs cc (simple-or->cc (:source cc)
(cc/merge-ccs cc (simple-or->cc (:source cc)
(merge-with concat
(:external-bindings cc)
(:bindings cc))
@ -251,6 +171,10 @@
;; TODO: handle And within the Or patterns.
(raise "Non-simple `or` clauses not yet supported." {:clause orc})))
(defn apply-function-clause [cc function]
(or (functions/apply-sql-function cc function)
(raise "Unknown function expression." {:clause function})))
;; We're keeping this simple for now: a straightforward type switch.
(defn apply-clause [cc it]
(condp instance? it
@ -266,6 +190,9 @@
Pattern
(apply-pattern-clause cc it)
Function
(apply-function-clause cc it)
(raise "Unknown clause." {:clause it})))
(defn expand-pattern-clauses
@ -274,9 +201,9 @@
(reduce apply-clause cc patterns))
(defn patterns->cc [source patterns external-bindings]
(expand-where-from-bindings
(cc/expand-where-from-bindings
(expand-pattern-clauses
(map->ConjoiningClauses
(cc/map->ConjoiningClauses
{:source source
:from []
:external-bindings (or external-bindings {})
@ -367,7 +294,7 @@
;; We 'fork' a CC for each pattern, then union them together.
;; We need to build the first in order that the others use the same
;; column names.
(let [cc (map->ConjoiningClauses
(let [cc (cc/map->ConjoiningClauses
{:source source
:from []
:external-bindings (or external-bindings {})

View file

@ -0,0 +1,164 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
(ns datomish.query.functions
(:require
[honeysql.format :as fmt]
[datomish.query.cc :as cc]
[datomish.query.source :as source]
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]]
[datascript.parser :as dp
#?@(:cljs
[:refer
[
BindColl
BindScalar
BindTuple
BindIgnore
Constant
Function
PlainSymbol
SrcVar
Variable
]])]
[honeysql.core :as sql]
[clojure.string :as str]
)
#?(:clj
(:import
[datascript.parser
BindColl
BindScalar
BindTuple
BindIgnore
Constant
Function
PlainSymbol
SrcVar
Variable
])))
;; honeysql's MATCH handler doesn't work for sqlite. This does.
(defmethod fmt/fn-handler "match" [_ col val]
(str (fmt/to-sql col) " MATCH " (fmt/to-sql val)))
(defn fulltext-attribute? [source attribute]
;; TODO: schema lookup.
true)
(defn- validate-fulltext-clause [cc function]
(let [[src attr search] (:args function)]
(when-not (and (instance? SrcVar src)
(= "$" (name (:symbol src))))
(raise "Non-default sources not supported." {:arg src}))
(when-not (instance? Constant attr)
(raise "Non-constant fulltext attributes not supported." {:arg attr}))
(when-not (fulltext-attribute? (:source cc) (:value attr))
(raise-str "Attribute " (:value attr) " is not a fulltext-indexed attribute."))
(when-not (and (instance? BindColl (:binding function))
(instance? BindTuple (:binding (:binding function)))
(every? (fn [s]
(or (instance? BindIgnore s)
(and (instance? BindScalar s)
(instance? Variable (:variable s)))))
(:bindings (:binding (:binding function)))))
(raise "Unexpected binding value." {:binding (:binding function)}))))
(defn apply-fulltext-clause [cc function]
(validate-fulltext-clause cc function)
;; A fulltext search string is either a constant string or a variable binding.
;; The search string and the attribute are used to generate a SQL MATCH expression:
;; table MATCH 'search string'
;; This is then joined against an ordinary pattern to yield entity, value, and tx.
;; We do not currently support scoring; the score value will always be 0.
(let [[src attr search] (:args function)
;; Pull out the symbols for the binding array.
[entity value tx score]
(map (comp :symbol :variable) ; This will nil-out placeholders.
(get-in function [:binding :binding :bindings]))
;; Find the FTS table name and alias. We might have multiple fulltext
;; expressions so we will generate a query like
;; SELECT ttt.a FROM t1 AS ttt WHERE ttt.t1 MATCH 'string'
[fts-table fts-alias] (source/source->fts-from (:source cc)) ; [:t1 :ttt]
match-column (sql/qualify fts-alias fts-table) ; :ttt.t1
match-value (cc/argument->value cc search)
[datom-table datom-alias] (source/source->non-fts-from (:source cc))
;; The following will end up being added to the CC.
from [[fts-table fts-alias]
[datom-table datom-alias]]
wheres [[:match match-column match-value] ; The FTS match.
;; The FTS rowid to datom correspondence.
[:=
(sql/qualify datom-alias :v)
(sql/qualify fts-alias :rowid)]
;; The attribute itself must match.
[:=
(sql/qualify datom-alias :a)
(source/attribute-in-source (:source cc) (:value attr))]]
;; Now compose any bindings for entity, value, tx, and score.
;; TODO: do we need to examine existing bindings to capture
;; wheres for any of these? We shouldn't, because the CC will
;; be internally cross-where'd when everything is done...
bindings (into {}
(filter
(comp not nil? first)
[[entity [(sql/qualify datom-alias :e)]]
[value [match-column]]
[tx [(sql/qualify datom-alias :tx)]]
;; Future: use matchinfo to compute a score
;; if this is a variable rather than a placeholder.
[score [0]]]))]
(cc/augment-cc cc from bindings wheres)))
(def sql-functions
;; Future: versions of this that uses snippet() or matchinfo().
{"fulltext" apply-fulltext-clause})
(defn apply-sql-function
"Either returns an application of `function` to `cc`, or nil to
encourage you to try a different application."
[cc function]
(when (and (instance? Function function)
(instance? PlainSymbol (:fn function)))
(when-let [apply-f (get sql-functions (name (:symbol (:fn function))))]
(apply-f cc function))))
;; A fulltext expression parses to:
;;
;; Function ( :fn, :args )
;;
;; The args begin with a SrcVar, and then are attr and search.
;;
;; This binds a relation of [?entity ?value ?tx ?score]:
;;
;; BindColl
;; :binding BindTuple
;; :bindings [BindScalar...]
;;
;; #datascript.parser.Function
;; {:fn #datascript.parser.PlainSymbol{:symbol fulltext},
;; :args [#datascript.parser.SrcVar{:symbol $}
;; #datascript.parser.Constant{:value :artist/name}
;; #datascript.parser.Variable{:symbol ?search}],
;; :binding #datascript.parser.BindColl
;; {:binding #datascript.parser.BindTuple
;; {:bindings [
;; #datascript.parser.BindScalar{:variable #datascript.parser.Variable{:symbol ?entity}}
;; #datascript.parser.BindScalar{:variable #datascript.parser.Variable{:symbol ?name}}
;; #datascript.parser.BindScalar{:variable #datascript.parser.Variable{:symbol ?tx}}
;; #datascript.parser.BindScalar{:variable #datascript.parser.Variable{:symbol ?score}}]}}}

View file

@ -4,7 +4,15 @@
(ns datomish.query.source
(:require
[datomish.query.transforms :as transforms]))
[datomish.query.transforms :as transforms]
[datascript.parser
#?@(:cljs
[:refer [Variable Constant Placeholder]])])
#?(:clj
(:import [datascript.parser Variable Constant Placeholder])))
(defn- gensym-table-alias [table]
(gensym (name table)))
;;;
;;; A source is something that can match patterns. For example:
@ -24,9 +32,21 @@
;;; * Transform constants and attributes into something usable
;;; by the source.
(defprotocol Source
(source->from [source attribute]
"Returns a pair, `[table alias]` for a pattern with the provided attribute.")
(source->non-fts-from [source])
(source->fts-from [source]
"Returns a pair, `[table alias]` for querying the source's fulltext index.")
(source->constraints [source alias])
(attribute-in-source [source attribute])
(constant-in-source [source constant]))
(defrecord
Source
[table ; e.g., :datoms
DatomsSource
[table ; Typically :datoms.
fts-table ; Typically :fulltext_values
fts-view ; Typically :fulltext_datoms.
columns ; e.g., [:e :a :v :tx]
;; `attribute-transform` is a function from attribute to constant value. Used to
@ -41,29 +61,47 @@
;; Not currently used.
make-constraints ; ?fn [source alias] => [where-clauses]
])
]
Source
(defn gensym-table-alias [table]
(gensym (name table)))
(source->from [source attribute]
(let [table
(if (and (instance? Constant attribute)
;; TODO: look in the DB schema to see if `attribute` is known to not be
;; a fulltext attribute.
true)
(:table source)
(defn datoms-source [db]
(->Source :datoms
[:e :a :v :tx :added]
transforms/attribute-transform-string
transforms/constant-transform-default
gensym-table-alias
nil))
;; It's variable. We must act as if it could be a fulltext datom.
(:fts-view source))]
[table ((:table-alias source) table)]))
(defn source->from [source]
(source->non-fts-from [source]
(let [table (:table source)]
[table ((:table-alias source) table)]))
(defn source->constraints [source alias]
(source->fts-from [source]
(let [table (:fts-table source)]
[table ((:table-alias source) table)]))
(source->constraints [source alias]
(when-let [f (:make-constraints source)]
(f alias)))
(defn attribute-in-source [source attribute]
(attribute-in-source [source attribute]
((:attribute-transform source) attribute))
(defn constant-in-source [source constant]
((:constant-transform source) constant))
(constant-in-source [source constant]
((:constant-transform source) constant)))
(defn datoms-source [db]
(map->DatomsSource
{:table :datoms
:fts-table :fulltext_values
:fts-view :fulltext_datoms
:columns [:e :a :v :tx :added]
:attribute-transform transforms/attribute-transform-string
:constant-transform transforms/constant-transform-default
:table-alias gensym-table-alias
:make-constraints nil}))

View file

@ -26,8 +26,10 @@
(fgensym s (dec (swap! counter inc)))))))
(defn mock-source [db]
(source/map->Source
(source/map->DatomsSource
{:table :datoms
:fts-table :fulltext_values
:fts-view :fulltext_datoms
:columns [:e :a :v :tx :added]
:attribute-transform transforms/attribute-transform-string
:constant-transform transforms/constant-transform-default