// Copyright 2016 Mozilla // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use // this file except in compliance with the License. You may obtain a copy of the // License at http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. ///! This module defines some core types that support find expressions: sources, ///! variables, expressions, etc. ///! These are produced as 'fuel' by the query parser, consumed by the query ///! translator and executor. ///! ///! Many of these types are defined as simple structs that are little more than ///! a richer type alias: a variable, for example, is really just a fancy kind ///! of string. ///! ///! At some point in the future, we might consider reducing copying and memory ///! usage by recasting all of these string-holding structs and enums in terms ///! of string references, with those references being slices of some parsed ///! input query string, and valid for the lifetime of that string. ///! ///! For now, for the sake of simplicity, all of these strings are heap-allocated. ///! ///! Furthermore, we might cut out some of the chaff here: each time a 'tagged' ///! type is used within an enum, we have an opportunity to simplify and use the ///! inner type directly in conjunction with matching on the enum. Before diving ///! deeply into this it's worth recognizing that this loss of 'sovereignty' is ///! a tradeoff against well-typed function signatures and other such boundaries. use std::collections::{BTreeSet, HashSet}; use std; use std::fmt; use std::rc::Rc; use crate::{BigInt, DateTime, OrderedFloat, Utc, Uuid}; use crate::value_rc::{FromRc, ValueRc}; pub use crate::{Keyword, PlainSymbol}; pub type SrcVarName = String; // Do not include the required syntactic '$'. #[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct Variable(pub Rc); impl Variable { pub fn as_str(&self) -> &str { self.0.as_ref().0.as_str() } pub fn name(&self) -> PlainSymbol { self.0.as_ref().clone() } /// Return a new `Variable`, assuming that the provided string is a valid name. pub fn from_valid_name(name: &str) -> Variable { let s = PlainSymbol::plain(name); assert!(s.is_var_symbol()); Variable(Rc::new(s)) } } pub trait FromValue { fn from_value(v: &crate::ValueAndSpan) -> Option; } /// If the provided EDN value is a PlainSymbol beginning with '?', return /// it wrapped in a Variable. If not, return None. /// TODO: intern strings. #398. impl FromValue for Variable { fn from_value(v: &crate::ValueAndSpan) -> Option { if let crate::SpannedValue::PlainSymbol(ref s) = v.inner { Variable::from_symbol(s) } else { None } } } impl Variable { pub fn from_rc(sym: Rc) -> Option { if sym.is_var_symbol() { Some(Variable(sym)) } else { None } } /// TODO: intern strings. #398. pub fn from_symbol(sym: &PlainSymbol) -> Option { if sym.is_var_symbol() { Some(Variable(Rc::new(sym.clone()))) } else { None } } } impl fmt::Debug for Variable { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "var({})", self.0) } } impl std::fmt::Display for Variable { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.0) } } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct QueryFunction(pub PlainSymbol); impl FromValue for QueryFunction { fn from_value(v: &crate::ValueAndSpan) -> Option { if let crate::SpannedValue::PlainSymbol(ref s) = v.inner { QueryFunction::from_symbol(s) } else { None } } } impl QueryFunction { pub fn from_symbol(sym: &PlainSymbol) -> Option { // TODO: validate the acceptable set of function names. Some(QueryFunction(sym.clone())) } } impl std::fmt::Display for QueryFunction { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.0) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum Direction { Ascending, Descending, } /// An abstract declaration of ordering: direction and variable. #[derive(Clone, Debug, Eq, PartialEq)] pub struct Order(pub Direction, pub Variable); // Future: Element instead of Variable? #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum SrcVar { DefaultSrc, NamedSrc(SrcVarName), } impl FromValue for SrcVar { fn from_value(v: &crate::ValueAndSpan) -> Option { if let crate::SpannedValue::PlainSymbol(ref s) = v.inner { SrcVar::from_symbol(s) } else { None } } } impl SrcVar { pub fn from_symbol(sym: &PlainSymbol) -> Option { if sym.is_src_symbol() { if sym.0 == "$" { Some(SrcVar::DefaultSrc) } else { Some(SrcVar::NamedSrc(sym.name().to_string())) } } else { None } } } /// These are the scalar values representable in EDN. #[derive(Clone, Debug, Eq, PartialEq)] pub enum NonIntegerConstant { Boolean(bool), BigInteger(BigInt), Float(OrderedFloat), Text(ValueRc), Instant(DateTime), Uuid(Uuid), } impl<'a> From<&'a str> for NonIntegerConstant { fn from(val: &'a str) -> NonIntegerConstant { NonIntegerConstant::Text(ValueRc::new(val.to_string())) } } impl From for NonIntegerConstant { fn from(val: String) -> NonIntegerConstant { NonIntegerConstant::Text(ValueRc::new(val)) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum FnArg { Variable(Variable), SrcVar(SrcVar), EntidOrInteger(i64), IdentOrKeyword(Keyword), Constant(NonIntegerConstant), // The collection values representable in EDN. There's no advantage to destructuring up front, // since consumers will need to handle arbitrarily nested EDN themselves anyway. Vector(Vec), } impl FromValue for FnArg { fn from_value(v: &crate::ValueAndSpan) -> Option { use crate::SpannedValue::*; match v.inner { Integer(x) => Some(FnArg::EntidOrInteger(x)), PlainSymbol(ref x) if x.is_src_symbol() => SrcVar::from_symbol(x).map(FnArg::SrcVar), PlainSymbol(ref x) if x.is_var_symbol() => { Variable::from_symbol(x).map(FnArg::Variable) } PlainSymbol(_) => None, Keyword(ref x) => Some(FnArg::IdentOrKeyword(x.clone())), Instant(x) => Some(FnArg::Constant(NonIntegerConstant::Instant(x))), Uuid(x) => Some(FnArg::Constant(NonIntegerConstant::Uuid(x))), Boolean(x) => Some(FnArg::Constant(NonIntegerConstant::Boolean(x))), Float(x) => Some(FnArg::Constant(NonIntegerConstant::Float(x))), BigInteger(ref x) => Some(FnArg::Constant(NonIntegerConstant::BigInteger(x.clone()))), Text(ref x) => // TODO: intern strings. #398. { Some(FnArg::Constant(x.clone().into())) } Nil | NamespacedSymbol(_) | Vector(_) | List(_) | Set(_) | Map(_) | Bytes(_) => None, } } } // For display in column headings in the repl. impl std::fmt::Display for FnArg { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { FnArg::Variable(ref var) => write!(f, "{}", var), FnArg::SrcVar(ref var) => { if var == &SrcVar::DefaultSrc { write!(f, "$") } else { write!(f, "{:?}", var) } } FnArg::EntidOrInteger(entid) => write!(f, "{}", entid), FnArg::IdentOrKeyword(ref kw) => write!(f, "{}", kw), FnArg::Constant(ref constant) => write!(f, "{:?}", constant), FnArg::Vector(ref vec) => write!(f, "{:?}", vec), } } } impl FnArg { pub fn as_variable(&self) -> Option<&Variable> { match self { FnArg::Variable(ref v) => Some(v), _ => None, } } } /// e, a, tx can't be values -- no strings, no floats -- and so /// they can only be variables, entity IDs, ident keywords, or /// placeholders. /// This encoding allows us to represent integers that aren't /// entity IDs. That'll get filtered out in the context of the /// database. #[derive(Clone, Debug, Eq, PartialEq)] pub enum PatternNonValuePlace { Placeholder, Variable(Variable), Entid(i64), // Will always be +ve. See #190. Ident(ValueRc), } impl From> for PatternNonValuePlace { fn from(value: Rc) -> Self { PatternNonValuePlace::Ident(ValueRc::from_rc(value)) } } impl From for PatternNonValuePlace { fn from(value: Keyword) -> Self { PatternNonValuePlace::Ident(ValueRc::new(value)) } } impl PatternNonValuePlace { // I think we'll want move variants, so let's leave these here for now. #[allow(dead_code)] fn into_pattern_value_place(self) -> PatternValuePlace { match self { PatternNonValuePlace::Placeholder => PatternValuePlace::Placeholder, PatternNonValuePlace::Variable(x) => PatternValuePlace::Variable(x), PatternNonValuePlace::Entid(x) => PatternValuePlace::EntidOrInteger(x), PatternNonValuePlace::Ident(x) => PatternValuePlace::IdentOrKeyword(x), } } fn to_pattern_value_place(&self) -> PatternValuePlace { match *self { PatternNonValuePlace::Placeholder => PatternValuePlace::Placeholder, PatternNonValuePlace::Variable(ref x) => PatternValuePlace::Variable(x.clone()), PatternNonValuePlace::Entid(x) => PatternValuePlace::EntidOrInteger(x), PatternNonValuePlace::Ident(ref x) => PatternValuePlace::IdentOrKeyword(x.clone()), } } } impl FromValue for PatternNonValuePlace { fn from_value(v: &crate::ValueAndSpan) -> Option { match v.inner { crate::SpannedValue::Integer(x) => { if x >= 0 { Some(PatternNonValuePlace::Entid(x)) } else { None } } crate::SpannedValue::PlainSymbol(ref x) => { if x.0.as_str() == "_" { Some(PatternNonValuePlace::Placeholder) } else if let Some(v) = Variable::from_symbol(x) { Some(PatternNonValuePlace::Variable(v)) } else { None } } crate::SpannedValue::Keyword(ref x) => Some(x.clone().into()), _ => None, } } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum IdentOrEntid { Ident(Keyword), Entid(i64), } /// The `v` part of a pattern can be much broader: it can represent /// integers that aren't entity IDs (particularly negative integers), /// strings, and all the rest. We group those under `Constant`. #[derive(Clone, Debug, Eq, PartialEq)] pub enum PatternValuePlace { Placeholder, Variable(Variable), EntidOrInteger(i64), IdentOrKeyword(ValueRc), Constant(NonIntegerConstant), } impl From> for PatternValuePlace { fn from(value: Rc) -> Self { PatternValuePlace::IdentOrKeyword(ValueRc::from_rc(value)) } } impl From for PatternValuePlace { fn from(value: Keyword) -> Self { PatternValuePlace::IdentOrKeyword(ValueRc::new(value)) } } impl FromValue for PatternValuePlace { fn from_value(v: &crate::ValueAndSpan) -> Option { match v.inner { crate::SpannedValue::Integer(x) => Some(PatternValuePlace::EntidOrInteger(x)), crate::SpannedValue::PlainSymbol(ref x) if x.0.as_str() == "_" => { Some(PatternValuePlace::Placeholder) } crate::SpannedValue::PlainSymbol(ref x) => { Variable::from_symbol(x).map(PatternValuePlace::Variable) } crate::SpannedValue::Keyword(ref x) if x.is_namespaced() => Some(x.clone().into()), crate::SpannedValue::Boolean(x) => { Some(PatternValuePlace::Constant(NonIntegerConstant::Boolean(x))) } crate::SpannedValue::Float(x) => { Some(PatternValuePlace::Constant(NonIntegerConstant::Float(x))) } crate::SpannedValue::BigInteger(ref x) => Some(PatternValuePlace::Constant( NonIntegerConstant::BigInteger(x.clone()), )), crate::SpannedValue::Instant(x) => { Some(PatternValuePlace::Constant(NonIntegerConstant::Instant(x))) } crate::SpannedValue::Text(ref x) => // TODO: intern strings. #398. { Some(PatternValuePlace::Constant(x.clone().into())) } crate::SpannedValue::Uuid(ref u) => { Some(PatternValuePlace::Constant(NonIntegerConstant::Uuid(*u))) } // These don't appear in queries. crate::SpannedValue::Nil => None, crate::SpannedValue::NamespacedSymbol(_) => None, crate::SpannedValue::Keyword(_) => None, // … yet. crate::SpannedValue::Map(_) => None, crate::SpannedValue::List(_) => None, crate::SpannedValue::Set(_) => None, crate::SpannedValue::Vector(_) => None, crate::SpannedValue::Bytes(_) => None, } } } impl PatternValuePlace { // I think we'll want move variants, so let's leave these here for now. #[allow(dead_code)] fn into_pattern_non_value_place(self) -> Option { match self { PatternValuePlace::Placeholder => Some(PatternNonValuePlace::Placeholder), PatternValuePlace::Variable(x) => Some(PatternNonValuePlace::Variable(x)), PatternValuePlace::EntidOrInteger(x) => { if x >= 0 { Some(PatternNonValuePlace::Entid(x)) } else { None } } PatternValuePlace::IdentOrKeyword(x) => Some(PatternNonValuePlace::Ident(x)), PatternValuePlace::Constant(_) => None, } } fn to_pattern_non_value_place(&self) -> Option { match *self { PatternValuePlace::Placeholder => Some(PatternNonValuePlace::Placeholder), PatternValuePlace::Variable(ref x) => Some(PatternNonValuePlace::Variable(x.clone())), PatternValuePlace::EntidOrInteger(x) => { if x >= 0 { Some(PatternNonValuePlace::Entid(x)) } else { None } } PatternValuePlace::IdentOrKeyword(ref x) => { Some(PatternNonValuePlace::Ident(x.clone())) } PatternValuePlace::Constant(_) => None, } } } // Not yet used. // pub enum PullDefaultValue { // EntidOrInteger(i64), // IdentOrKeyword(Rc), // Constant(NonIntegerConstant), // } #[derive(Clone, Debug, Eq, PartialEq)] pub enum PullConcreteAttribute { Ident(Rc), Entid(i64), } #[derive(Clone, Debug, Eq, PartialEq)] pub struct NamedPullAttribute { pub attribute: PullConcreteAttribute, pub alias: Option>, } impl From for NamedPullAttribute { fn from(a: PullConcreteAttribute) -> Self { NamedPullAttribute { attribute: a, alias: None, } } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum PullAttributeSpec { Wildcard, Attribute(NamedPullAttribute), // PullMapSpec(Vec<…>), // LimitedAttribute(NamedPullAttribute, u64), // Limit nil => Attribute instead. // DefaultedAttribute(NamedPullAttribute, PullDefaultValue), } impl std::fmt::Display for PullConcreteAttribute { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { PullConcreteAttribute::Ident(ref k) => write!(f, "{}", k), PullConcreteAttribute::Entid(i) => write!(f, "{}", i), } } } impl std::fmt::Display for NamedPullAttribute { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { if let Some(ref alias) = self.alias { write!(f, "{} :as {}", self.attribute, alias) } else { write!(f, "{}", self.attribute) } } } impl std::fmt::Display for PullAttributeSpec { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { PullAttributeSpec::Wildcard => write!(f, "*"), PullAttributeSpec::Attribute(ref attr) => write!(f, "{}", attr), } } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Pull { pub var: Variable, pub patterns: Vec, } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Aggregate { pub func: QueryFunction, pub args: Vec, } #[derive(Clone, Debug, Eq, PartialEq)] pub enum Element { Variable(Variable), Aggregate(Aggregate), /// In a query with a `max` or `min` aggregate, a corresponding variable /// (indicated in the query with `(the ?var)`, is guaranteed to come from /// the row that provided the max or min value. Queries with more than one /// `max` or `min` cannot yield predictable behavior, and will err during /// algebrizing. Corresponding(Variable), Pull(Pull), } impl Element { /// Returns true if the element must yield only one value. pub fn is_unit(&self) -> bool { match self { Element::Variable(_) => false, Element::Pull(_) => false, Element::Aggregate(_) => true, Element::Corresponding(_) => true, } } } impl From for Element { fn from(x: Variable) -> Element { Element::Variable(x) } } impl std::fmt::Display for Element { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Element::Variable(ref var) => write!(f, "{}", var), Element::Pull(Pull { ref var, ref patterns, }) => { write!(f, "(pull {} [ ", var)?; for p in patterns.iter() { write!(f, "{} ", p)?; } write!(f, "])") } Element::Aggregate(ref agg) => match agg.args.len() { 0 => write!(f, "({})", agg.func), 1 => write!(f, "({} {})", agg.func, agg.args[0]), _ => write!(f, "({} {:?})", agg.func, agg.args), }, Element::Corresponding(ref var) => write!(f, "(the {})", var), } } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum Limit { None, Fixed(u64), Variable(Variable), } /// A definition of the first part of a find query: the /// `[:find ?foo ?bar…]` bit. /// /// There are four different kinds of find specs, allowing you to query for /// a single value, a collection of values from different entities, a single /// tuple (relation), or a collection of tuples. /// /// Examples: /// /// ```rust /// # use edn::query::{Element, FindSpec, Variable}; /// let elements = vec![ /// Element::Variable(Variable::from_valid_name("?foo")), /// Element::Variable(Variable::from_valid_name("?bar")), /// ]; /// let rel = FindSpec::FindRel(elements); /// /// if let FindSpec::FindRel(elements) = rel { /// assert_eq!(2, elements.len()); /// } /// ``` /// #[derive(Clone, Debug, Eq, PartialEq)] pub enum FindSpec { /// Returns an array of arrays, represented as a single array with length a multiple of width. FindRel(Vec), /// Returns an array of scalars, usually homogeneous. /// This is equivalent to mapping over the results of a `FindRel`, /// returning the first value of each. FindColl(Element), /// Returns a single tuple: a heterogeneous array of scalars. Equivalent to /// taking the first result from a `FindRel`. FindTuple(Vec), /// Returns a single scalar value. Equivalent to taking the first result /// from a `FindColl`. FindScalar(Element), } /// Returns true if the provided `FindSpec` returns at most one result. impl FindSpec { pub fn is_unit_limited(&self) -> bool { use self::FindSpec::*; match self { FindScalar(..) => true, FindTuple(..) => true, FindRel(..) => false, FindColl(..) => false, } } pub fn expected_column_count(&self) -> usize { use self::FindSpec::*; match self { FindScalar(..) => 1, FindColl(..) => 1, FindTuple(ref elems) | &FindRel(ref elems) => elems.len(), } } /// Returns true if the provided `FindSpec` cares about distinct results. /// /// I use the words "cares about" because find is generally defined in terms of producing distinct /// results at the Datalog level. /// /// Two of the find specs (scalar and tuple) produce only a single result. Those don't need to be /// run with `SELECT DISTINCT`, because we're only consuming a single result. Those queries will be /// run with `LIMIT 1`. /// /// Additionally, some projections cannot produce duplicate results: `[:find (max ?x) …]`, for /// example. /// /// This function gives us the hook to add that logic when we're ready. /// /// Beyond this, `DISTINCT` is not always needed. For example, in some kinds of accumulation or /// sampling projections we might not need to do it at the SQL level because we're consuming into /// a dupe-eliminating data structure like a Set, or we know that a particular query cannot produce /// duplicate results. pub fn requires_distinct(&self) -> bool { !self.is_unit_limited() } pub fn columns<'s>(&'s self) -> Box + 's> { use self::FindSpec::*; match self { FindScalar(ref e) => Box::new(std::iter::once(e)), FindColl(ref e) => Box::new(std::iter::once(e)), FindTuple(ref v) => Box::new(v.iter()), FindRel(ref v) => Box::new(v.iter()), } } } // Datomic accepts variable or placeholder. DataScript accepts recursive bindings. Mentat sticks // to the non-recursive form Datomic accepts, which is much simpler to process. #[derive(Clone, Debug, Eq, Hash, PartialEq)] pub enum VariableOrPlaceholder { Placeholder, Variable(Variable), } impl VariableOrPlaceholder { pub fn into_var(self) -> Option { match self { VariableOrPlaceholder::Placeholder => None, VariableOrPlaceholder::Variable(var) => Some(var), } } pub fn var(&self) -> Option<&Variable> { match self { VariableOrPlaceholder::Placeholder => None, VariableOrPlaceholder::Variable(ref var) => Some(var), } } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum Binding { BindScalar(Variable), BindColl(Variable), BindRel(Vec), BindTuple(Vec), } impl Binding { /// Return each variable or `None`, in order. pub fn variables(&self) -> Vec> { match self { &Binding::BindScalar(ref var) | &Binding::BindColl(ref var) => vec![Some(var.clone())], &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => { vars.iter().map(|x| x.var().cloned()).collect() } } } /// Return `true` if no variables are bound, i.e., all binding entries are placeholders. pub fn is_empty(&self) -> bool { match self { &Binding::BindScalar(_) | &Binding::BindColl(_) => false, &Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => { vars.iter().all(|x| x.var().is_none()) } } } /// Return `true` if no variable is bound twice, i.e., each binding entry is either a /// placeholder or unique. /// /// ``` /// use edn::query::{Binding,Variable,VariableOrPlaceholder}; /// use std::rc::Rc; /// /// let v = Variable::from_valid_name("?foo"); /// let vv = VariableOrPlaceholder::Variable(v); /// let p = VariableOrPlaceholder::Placeholder; /// /// let e = Binding::BindTuple(vec![p.clone()]); /// let b = Binding::BindTuple(vec![p.clone(), vv.clone()]); /// let d = Binding::BindTuple(vec![vv.clone(), p, vv]); /// assert!(b.is_valid()); // One var, one placeholder: OK. /// assert!(!e.is_valid()); // Empty: not OK. /// assert!(!d.is_valid()); // Duplicate var: not OK. /// ``` pub fn is_valid(&self) -> bool { match self { Binding::BindScalar(_) | &Binding::BindColl(_) => true, Binding::BindRel(ref vars) | &Binding::BindTuple(ref vars) => { let mut acc = HashSet::::new(); for var in vars { if let VariableOrPlaceholder::Variable(ref var) = *var { if !acc.insert(var.clone()) { // It's invalid if there was an equal var already present in the set -- // i.e., we have a duplicate var. return false; } } } // We're not valid if every place is a placeholder! !acc.is_empty() } } } } // Note that the "implicit blank" rule applies. // A pattern with a reversed attribute — :foo/_bar — is reversed // at the point of parsing. These `Pattern` instances only represent // one direction. #[derive(Clone, Debug, Eq, PartialEq)] pub struct Pattern { pub source: Option, pub entity: PatternNonValuePlace, pub attribute: PatternNonValuePlace, pub value: PatternValuePlace, pub tx: PatternNonValuePlace, } impl Pattern { pub fn simple( e: PatternNonValuePlace, a: PatternNonValuePlace, v: PatternValuePlace, ) -> Option { Pattern::new(None, e, a, v, PatternNonValuePlace::Placeholder) } pub fn new( src: Option, e: PatternNonValuePlace, a: PatternNonValuePlace, v: PatternValuePlace, tx: PatternNonValuePlace, ) -> Option { let aa = a.clone(); // Too tired of fighting borrow scope for now. if let PatternNonValuePlace::Ident(ref k) = aa { if k.is_backward() { // e and v have different types; we must convert them. // Not every parseable value is suitable for the entity field! // As such, this is a failable constructor. let e_v = e.to_pattern_value_place(); if let Some(v_e) = v.to_pattern_non_value_place() { return Some(Pattern { source: src, entity: v_e, attribute: k.to_reversed().into(), value: e_v, tx, }); } else { return None; } } } Some(Pattern { source: src, entity: e, attribute: a, value: v, tx, }) } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct Predicate { pub operator: PlainSymbol, pub args: Vec, } #[derive(Clone, Debug, Eq, PartialEq)] pub struct WhereFn { pub operator: PlainSymbol, pub args: Vec, pub binding: Binding, } #[derive(Clone, Debug, Eq, PartialEq)] pub enum UnifyVars { /// `Implicit` means the variables in an `or` or `not` are derived from the enclosed pattern. /// DataScript regards these vars as 'free': these variables don't need to be bound by the /// enclosing environment. /// /// Datomic's documentation implies that all implicit variables are required: /// /// > Datomic will attempt to push the or clause down until all necessary variables are bound, /// > and will throw an exception if that is not possible. /// /// but that would render top-level `or` expressions (as used in Datomic's own examples!) /// impossible, so we assume that this is an error in the documentation. /// /// All contained 'arms' in an `or` with implicit variables must bind the same vars. Implicit, /// `Explicit` means the variables in an `or-join` or `not-join` are explicitly listed, /// specified with `required-vars` syntax. /// /// DataScript parses these as free, but allows (incorrectly) the use of more complicated /// `rule-vars` syntax. /// /// Only the named variables will be unified with the enclosing query. /// /// Every 'arm' in an `or-join` must mention the entire set of explicit vars. Explicit(BTreeSet), } impl WhereClause { pub fn is_pattern(&self) -> bool { matches!(self, WhereClause::Pattern(_)) } } #[derive(Clone, Debug, Eq, PartialEq)] pub enum OrWhereClause { Clause(WhereClause), And(Vec), } impl OrWhereClause { pub fn is_pattern_or_patterns(&self) -> bool { match self { OrWhereClause::Clause(WhereClause::Pattern(_)) => true, OrWhereClause::And(ref clauses) => clauses.iter().all(|clause| clause.is_pattern()), _ => false, } } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct OrJoin { pub unify_vars: UnifyVars, pub clauses: Vec, /// Caches the result of `collect_mentioned_variables`. mentioned_vars: Option>, } #[derive(Clone, Debug, Eq, PartialEq)] pub struct NotJoin { pub unify_vars: UnifyVars, pub clauses: Vec, } impl NotJoin { pub fn new(unify_vars: UnifyVars, clauses: Vec) -> NotJoin { NotJoin { unify_vars, clauses, } } } #[derive(Clone, Debug, Eq, PartialEq)] pub struct TypeAnnotation { pub value_type: Keyword, pub variable: Variable, } #[allow(dead_code)] #[derive(Clone, Debug, Eq, PartialEq)] pub enum WhereClause { NotJoin(NotJoin), OrJoin(OrJoin), Pred(Predicate), WhereFn(WhereFn), RuleExpr, Pattern(Pattern), TypeAnnotation(TypeAnnotation), } #[allow(dead_code)] #[derive(Debug, Eq, PartialEq)] pub struct ParsedQuery { pub find_spec: FindSpec, pub default_source: SrcVar, pub with: Vec, pub in_vars: Vec, pub in_sources: BTreeSet, pub limit: Limit, pub where_clauses: Vec, pub order: Option>, } pub(crate) enum QueryPart { FindSpec(FindSpec), WithVars(Vec), InVars(Vec), Limit(Limit), WhereClauses(Vec), Order(Vec), } /// A `ParsedQuery` represents a parsed but potentially invalid query to the query algebrizer. /// Such a query is syntactically valid but might be semantically invalid, for example because /// constraints on the set of variables are not respected. /// /// We split `ParsedQuery` from `FindQuery` because it's not easy to generalize over containers /// (here, `Vec` and `BTreeSet`) in Rust. impl ParsedQuery { pub(crate) fn from_parts( parts: Vec, ) -> std::result::Result { let mut find_spec: Option = None; let mut with: Option> = None; let mut in_vars: Option> = None; let mut limit: Option = None; let mut where_clauses: Option> = None; let mut order: Option> = None; for part in parts.into_iter() { match part { QueryPart::FindSpec(x) => { if find_spec.is_some() { return Err("find query has repeated :find"); } find_spec = Some(x) } QueryPart::WithVars(x) => { if with.is_some() { return Err("find query has repeated :with"); } with = Some(x) } QueryPart::InVars(x) => { if in_vars.is_some() { return Err("find query has repeated :in"); } in_vars = Some(x) } QueryPart::Limit(x) => { if limit.is_some() { return Err("find query has repeated :limit"); } limit = Some(x) } QueryPart::WhereClauses(x) => { if where_clauses.is_some() { return Err("find query has repeated :where"); } where_clauses = Some(x) } QueryPart::Order(x) => { if order.is_some() { return Err("find query has repeated :order"); } order = Some(x) } } } Ok(ParsedQuery { find_spec: find_spec.ok_or("expected :find")?, default_source: SrcVar::DefaultSrc, with: with.unwrap_or_else(Vec::new), // in_vars: in_vars.unwrap_or_else(Vec::new), in_sources: BTreeSet::default(), limit: limit.unwrap_or(Limit::None), where_clauses: where_clauses.ok_or("expected :where")?, order, }) } } impl OrJoin { pub fn new(unify_vars: UnifyVars, clauses: Vec) -> OrJoin { OrJoin { unify_vars, clauses, mentioned_vars: None, } } /// Return true if either the `OrJoin` is `UnifyVars::Implicit`, or if /// every variable mentioned inside the join is also mentioned in the `UnifyVars` list. pub fn is_fully_unified(&self) -> bool { match &self.unify_vars { UnifyVars::Implicit => true, UnifyVars::Explicit(ref vars) => { // We know that the join list must be a subset of the vars in the pattern, or // it would have failed validation. That allows us to simply compare counts here. // TODO: in debug mode, do a full intersection, and verify that our count check // returns the same results. // Use the cached list if we have one. if let Some(ref mentioned) = self.mentioned_vars { vars.len() == mentioned.len() } else { vars.len() == self.collect_mentioned_variables().len() } } } } } pub trait ContainsVariables { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet); fn collect_mentioned_variables(&self) -> BTreeSet { let mut out = BTreeSet::new(); self.accumulate_mentioned_variables(&mut out); out } } impl ContainsVariables for WhereClause { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { use self::WhereClause::*; match self { OrJoin(ref o) => o.accumulate_mentioned_variables(acc), Pred(ref p) => p.accumulate_mentioned_variables(acc), Pattern(ref p) => p.accumulate_mentioned_variables(acc), NotJoin(ref n) => n.accumulate_mentioned_variables(acc), WhereFn(ref f) => f.accumulate_mentioned_variables(acc), TypeAnnotation(ref a) => a.accumulate_mentioned_variables(acc), RuleExpr => (), } } } impl ContainsVariables for OrWhereClause { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { use self::OrWhereClause::*; match self { And(ref clauses) => { for clause in clauses { clause.accumulate_mentioned_variables(acc) } } Clause(ref clause) => clause.accumulate_mentioned_variables(acc), } } } impl ContainsVariables for OrJoin { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { for clause in &self.clauses { clause.accumulate_mentioned_variables(acc); } } } impl OrJoin { pub fn dismember(self) -> (Vec, UnifyVars, BTreeSet) { let vars = match self.mentioned_vars { Some(m) => m, None => self.collect_mentioned_variables(), }; (self.clauses, self.unify_vars, vars) } pub fn mentioned_variables<'a>(&'a mut self) -> &'a BTreeSet { if self.mentioned_vars.is_none() { let m = self.collect_mentioned_variables(); self.mentioned_vars = Some(m); } if let Some(ref mentioned) = self.mentioned_vars { mentioned } else { unreachable!() } } } impl ContainsVariables for NotJoin { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { for clause in &self.clauses { clause.accumulate_mentioned_variables(acc); } } } impl ContainsVariables for Predicate { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { for arg in &self.args { if let FnArg::Variable(ref v) = *arg { acc_ref(acc, v) } } } } impl ContainsVariables for TypeAnnotation { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { acc_ref(acc, &self.variable); } } impl ContainsVariables for Binding { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { match self { Binding::BindScalar(ref v) | &Binding::BindColl(ref v) => acc_ref(acc, v), Binding::BindRel(ref vs) | &Binding::BindTuple(ref vs) => { for v in vs { if let VariableOrPlaceholder::Variable(ref v) = *v { acc_ref(acc, v); } } } } } } impl ContainsVariables for WhereFn { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { for arg in &self.args { if let FnArg::Variable(ref v) = *arg { acc_ref(acc, v) } } self.binding.accumulate_mentioned_variables(acc); } } fn acc_ref(acc: &mut BTreeSet, v: &T) { // Roll on, reference entries! if !acc.contains(v) { acc.insert(v.clone()); } } impl ContainsVariables for Pattern { fn accumulate_mentioned_variables(&self, acc: &mut BTreeSet) { if let PatternNonValuePlace::Variable(ref v) = self.entity { acc_ref(acc, v) } if let PatternNonValuePlace::Variable(ref v) = self.attribute { acc_ref(acc, v) } if let PatternValuePlace::Variable(ref v) = self.value { acc_ref(acc, v) } if let PatternNonValuePlace::Variable(ref v) = self.tx { acc_ref(acc, v) } } }