diff --git a/parser-utils/src/lib.rs b/parser-utils/src/lib.rs index 5e8cf923..c41f820b 100644 --- a/parser-utils/src/lib.rs +++ b/parser-utils/src/lib.rs @@ -38,8 +38,20 @@ pub type ResultParser = Expected ParseResult>>; #[macro_export] macro_rules! assert_parses_to { ( $parser: expr, $input: expr, $expected: expr ) => {{ - let mut par = $parser(); - let result = par.parse($input.with_spans().into_atom_stream()).map(|x| x.0); // TODO: check remainder of stream. + let par = $parser(); + let result = par.skip(eof()).parse($input.with_spans().into_atom_stream()).map(|x| x.0); + assert_eq!(result, Ok($expected)); + }} +} + +/// `assert_edn_parses_to!` simplifies some of the boilerplate around running a parser function +/// against string input and expecting a certain result. +#[macro_export] +macro_rules! assert_edn_parses_to { + ( $parser: expr, $input: expr, $expected: expr ) => {{ + let par = $parser(); + let input = edn::parse::value($input).expect("to be able to parse input as EDN"); + let result = par.skip(eof()).parse(input.into_atom_stream()).map(|x| x.0); assert_eq!(result, Ok($expected)); }} } diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index 1872b8f9..eaa3f869 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -96,6 +96,7 @@ fn unit_type_set(t: ValueType) -> HashSet { /// /// - Ordinary pattern clauses turn into `FROM` parts and `WHERE` parts using `=`. /// - Predicate clauses turn into the same, but with other functions. +/// - Function clauses turn `WHERE` parts using function-specific comparisons. /// - `not` turns into `NOT EXISTS` with `WHERE` clauses inside the subquery to /// bind it to the outer variables, or adds simple `WHERE` clauses to the outer /// clause. @@ -581,6 +582,9 @@ impl ConjoiningClauses { WhereClause::Pred(p) => { self.apply_predicate(schema, p) }, + WhereClause::WhereFn(f) => { + self.apply_where_fn(schema, f) + }, WhereClause::OrJoin(o) => { validate_or_join(&o) //?; @@ -606,4 +610,4 @@ fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { #[cfg(test)] pub fn ident(ns: &str, name: &str) -> PatternNonValuePlace { PatternNonValuePlace::Ident(::std::rc::Rc::new(NamespacedKeyword::new(ns, name))) -} \ No newline at end of file +} diff --git a/query-algebrizer/src/clauses/pattern.rs b/query-algebrizer/src/clauses/pattern.rs index cede4ef1..fcdd480e 100644 --- a/query-algebrizer/src/clauses/pattern.rs +++ b/query-algebrizer/src/clauses/pattern.rs @@ -8,8 +8,6 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::rc::Rc; - use mentat_core::{ Schema, TypedValue, @@ -268,6 +266,7 @@ mod testing { use super::*; use std::collections::BTreeMap; + use std::rc::Rc; use mentat_core::attribute::Unique; use mentat_core::{ diff --git a/query-algebrizer/src/clauses/predicate.rs b/query-algebrizer/src/clauses/predicate.rs index 4242d9a0..9beaf1d0 100644 --- a/query-algebrizer/src/clauses/predicate.rs +++ b/query-algebrizer/src/clauses/predicate.rs @@ -10,35 +10,48 @@ use mentat_core::{ Schema, + TypedValue, + ValueType, }; use mentat_query::{ + Binding, + FnArg, + NonIntegerConstant, Predicate, + SrcVar, + VariableOrPlaceholder, + WhereFn, }; use clauses::ConjoiningClauses; use errors::{ - Result, + Error, ErrorKind, + Result, }; use types::{ ColumnConstraint, + DatomsColumn, + DatomsTable, + FulltextColumn, + FulltextQualifiedAlias, NumericComparison, + QualifiedAlias, + QueryValue, }; /// Application of predicates. impl ConjoiningClauses { - /// There are several kinds of predicates/functions in our Datalog: + /// There are several kinds of predicates in our Datalog: /// - A limited set of binary comparison operators: < > <= >= !=. /// These are converted into SQLite binary comparisons and some type constraints. - /// - A set of predicates like `fulltext` and `get-else` that are translated into - /// SQL `MATCH`es or joins, yielding bindings. /// - In the future, some predicates that are implemented via function calls in SQLite. /// /// At present we have implemented only the five built-in comparison binary operators. - pub fn apply_predicate<'s, 'p>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { + pub fn apply_predicate<'s>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> { // Because we'll be growing the set of built-in predicates, handling each differently, // and ultimately allowing user-specified predicates, we match on the predicate name first. if let Some(op) = NumericComparison::from_datalog_operator(predicate.operator.0.as_str()) { @@ -53,7 +66,7 @@ impl ConjoiningClauses { /// - Ensures that the predicate functions name a known operator. /// - Accumulates a `NumericInequality` constraint into the `wheres` list. #[allow(unused_variables)] - pub fn apply_numeric_predicate<'s, 'p>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { + pub fn apply_numeric_predicate<'s>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> { if predicate.args.len() != 2 { bail!(ErrorKind::InvalidNumberOfArguments(predicate.operator.clone(), predicate.args.len(), 2)); } @@ -81,6 +94,161 @@ impl ConjoiningClauses { self.wheres.add_intersection(constraint); Ok(()) } + + + /// There are several kinds of functions binding variables in our Datalog: + /// - A set of functions like `fulltext` and `get-else` that are translated into + /// SQL `MATCH`es or joins, yielding bindings. + /// - In the future, some functions that are implemented via function calls in SQLite. + /// + /// At present we have implemented only the `fulltext` operator. + pub fn apply_where_fn<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + // Because we'll be growing the set of built-in functions, handling each differently, and + // ultimately allowing user-specified functions, we match on the function name first. + match where_fn.operator.0.as_str() { + "fulltext" => self.apply_fulltext(schema, where_fn), + _ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())), + } + } + + /// This function: + /// - Resolves variables and converts types to those more amenable to SQL. + /// - Ensures that the predicate functions name a known operator. + /// - Accumulates a `NumericInequality` constraint into the `wheres` list. + #[allow(unused_variables)] + pub fn apply_fulltext<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + if where_fn.args.len() != 3 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 3)); + } + + // TODO: binding-specific error messages. + let mut bindings = match where_fn.binding { + Binding::BindRel(bindings) => { + if bindings.len() > 4 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), bindings.len(), 4)); + } + bindings.into_iter() + }, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "bindings".into(), 999)), + }; + + // Go from arguments -- parser output -- to columns or values. + // Any variables that aren't bound by this point in the linear processing of clauses will + // cause the application of the predicate to fail. + let mut args = where_fn.args.into_iter(); + + // TODO: process source variables. + match args.next().unwrap() { + FnArg::SrcVar(SrcVar::DefaultSrc) => {}, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)), + } + + // TODO: accept placeholder and set of attributes. Alternately, consider putting the search + // term before the attribute arguments and collect the (variadic) attributes into a set. + // let a: Entid = self.resolve_attribute_argument(&where_fn.operator, 1, args.next().unwrap())?; + // + // TODO: allow non-constant attributes. + // TODO: improve the expression of this matching, possibly by using attribute_for_* uniformly. + let a = match args.next().unwrap() { + FnArg::Ident(i) => schema.get_entid(&i), + // Must be an entid. + FnArg::EntidOrInteger(e) => Some(e), + _ => None, + }; + + let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; + let attribute = schema.attribute_for_entid(a).cloned().ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; + + let fulltext_values = DatomsTable::FulltextValues; + let datoms_table = DatomsTable::Datoms; + + let fulltext_values_alias = (self.aliaser)(fulltext_values); + let datoms_table_alias = (self.aliaser)(datoms_table); + + // TODO: constrain types in more general cases? + self.constrain_attribute(datoms_table_alias.clone(), a); + + self.wheres.add_intersection(ColumnConstraint::Equals( + QualifiedAlias(datoms_table_alias.clone(), DatomsColumn::Value), + QueryValue::FulltextColumn(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Rowid)))); + + // search is either text or a variable. + // TODO: should this just use `resolve_argument`? Should it add a new `resolve_*` function? + let search = match args.next().unwrap() { + FnArg::Variable(var) => { + self.column_bindings + .get(&var) + .and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone()))) + .ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))? + }, + FnArg::Constant(NonIntegerConstant::Text(s)) => { + QueryValue::TypedValue(TypedValue::typed_string(s.as_str())) + }, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)), + }; + + // TODO: should we build the FQA in ::Matches, preventing nonsense like matching on ::Rowid? + let constraint = ColumnConstraint::Matches(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Text), search); + self.wheres.add_intersection(constraint); + + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::Ref); + + let entity_alias = QualifiedAlias(datoms_table_alias.clone(), DatomsColumn::Entity); + self.column_bindings.entry(var).or_insert(vec![]).push(entity_alias); + } + + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::String); + + // TODO: figure out how to represent a FulltextQualifiedAlias. + // let value_alias = FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Text); + // self.column_bindings.entry(var).or_insert(vec![]).push(value_alias); + } + + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::Ref); + + let tx_alias = QualifiedAlias(datoms_table_alias.clone(), DatomsColumn::Tx); + self.column_bindings.entry(var).or_insert(vec![]).push(tx_alias); + } + + if let Some(VariableOrPlaceholder::Variable(var)) = bindings.next() { + // TODO: can we just check for late binding here? + // Do we have, or will we have, an external binding for this variable? + if self.bound_value(&var).is_some() || self.input_variables.contains(&var) { + // That's a paddlin'! + bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "illegal bound variable".into(), 999)) + } + self.constrain_var_to_type(var.clone(), ValueType::Double); + + // TODO: produce this using SQLite's matchinfo. + self.value_bindings.insert(var.clone(), TypedValue::Double(0.0.into())); + + // TODO: figure out how to represent a constant binding. + // self.column_bindings.entry(var).or_insert(vec![]).push(score_alias); + } + + Ok(()) + } } #[cfg(test)] @@ -88,6 +256,8 @@ mod testing { use super::*; use std::collections::HashSet; + use std::rc::Rc; + use mentat_core::attribute::Unique; use mentat_core::{ Attribute, @@ -96,13 +266,16 @@ mod testing { }; use mentat_query::{ + Binding, FnArg, NamespacedKeyword, Pattern, PatternNonValuePlace, PatternValuePlace, PlainSymbol, + SrcVar, Variable, + VariableOrPlaceholder, }; use clauses::{ @@ -229,4 +402,68 @@ mod testing { .collect(), ValueType::String)); } -} \ No newline at end of file + + #[test] + fn test_apply_fulltext() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); + + let op = PlainSymbol::new("fulltext"); + cc.apply_fulltext(&schema, WhereFn { + operator: op, + args: vec![ + FnArg::SrcVar(SrcVar::DefaultSrc), + FnArg::Ident(NamespacedKeyword::new("foo", "fts")), + FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))), + ], + binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?entity")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?value")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?tx")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?score"))]), + }).expect("to be able to apply_fulltext"); + + assert!(!cc.is_known_empty); + + // Finally, expand column bindings. + cc.expand_column_bindings(); + assert!(!cc.is_known_empty); + + let clauses = cc.wheres; + assert_eq!(clauses.len(), 3); + + assert_eq!(clauses.0[0], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute), + QueryValue::Entid(100)).into()); + assert_eq!(clauses.0[1], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value), + QueryValue::FulltextColumn(FulltextQualifiedAlias("fulltext_values00".to_string(), FulltextColumn::Rowid))).into()); + assert_eq!(clauses.0[2], ColumnConstraint::Matches(FulltextQualifiedAlias("fulltext_values00".to_string(), FulltextColumn::Text), + QueryValue::TypedValue(TypedValue::String(Rc::new("needle".into())))).into()); + + let bindings = cc.column_bindings; + assert_eq!(bindings.len(), 2); + + assert_eq!(bindings.get(&Variable::from_valid_name("?entity")).expect("column binding for ?entity").clone(), + vec![QualifiedAlias("datoms01".to_string(), DatomsColumn::Entity)]); + assert_eq!(bindings.get(&Variable::from_valid_name("?tx")).expect("column binding for ?tx").clone(), + vec![QualifiedAlias("datoms01".to_string(), DatomsColumn::Tx)]); + + let known_types = cc.known_types; + assert_eq!(known_types.len(), 4); + + assert_eq!(known_types.get(&Variable::from_valid_name("?entity")).expect("known types for ?entity").clone(), + vec![ValueType::Ref].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?value")).expect("known types for ?value").clone(), + vec![ValueType::String].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?tx")).expect("known types for ?tx").clone(), + vec![ValueType::Ref].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?score")).expect("known types for ?score").clone(), + vec![ValueType::Double].into_iter().collect()); + } +} diff --git a/query-algebrizer/src/clauses/resolve.rs b/query-algebrizer/src/clauses/resolve.rs index aba0ba35..857e5af1 100644 --- a/query-algebrizer/src/clauses/resolve.rs +++ b/query-algebrizer/src/clauses/resolve.rs @@ -56,7 +56,7 @@ impl ConjoiningClauses { Constant(NonIntegerConstant::Text(_)) | Constant(NonIntegerConstant::BigInteger(_)) => { self.mark_known_empty(EmptyBecause::NonNumericArgument); - bail!(ErrorKind::NonNumericArgument(function.clone(), position)); + bail!(ErrorKind::InvalidArgument(function.clone(), "numeric".into(), position)); }, Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))), } diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index dff08556..d6b4b333 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -35,9 +35,9 @@ error_chain! { display("unbound variable: {}", name) } - NonNumericArgument(function: PlainSymbol, position: usize) { + InvalidArgument(function: PlainSymbol, expected_type: String, position: usize) { description("invalid argument") - display("invalid argument to {}: expected numeric in position {}.", function, position) + display("invalid argument to {}: expected {} in position {}.", function, expected_type, position) } NonMatchingVariablesInOrClause { diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index a0e90db3..e386e108 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -99,6 +99,8 @@ pub use types::{ ColumnIntersection, DatomsColumn, DatomsTable, + FulltextColumn, + FulltextQualifiedAlias, QualifiedAlias, QueryValue, SourceAlias, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index 121e5f85..a640c647 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -71,6 +71,23 @@ impl DatomsColumn { } } +/// One of the named columns of our fulltext values table. +#[derive(PartialEq, Eq, Clone, Debug)] +pub enum FulltextColumn { + Rowid, + Text, +} + +impl FulltextColumn { + pub fn as_str(&self) -> &'static str { + use self::FulltextColumn::*; + match *self { + Rowid => "rowid", + Text => "text", + } + } +} + /// A specific instance of a table within a query. E.g., "datoms123". pub type TableAlias = String; @@ -94,6 +111,16 @@ impl Debug for QualifiedAlias { } } +/// A particular column of a particular aliased fulltext table. E.g., "fulltext_values123", Rowid. +#[derive(PartialEq, Eq, Clone)] +pub struct FulltextQualifiedAlias(pub TableAlias, pub FulltextColumn); + +impl Debug for FulltextQualifiedAlias { + fn fmt(&self, f: &mut Formatter) -> Result { + write!(f, "{}.{}", self.0, self.1.as_str()) + } +} + impl QualifiedAlias { pub fn for_type_tag(&self) -> QualifiedAlias { QualifiedAlias(self.0.clone(), DatomsColumn::ValueTypeTag) @@ -103,6 +130,7 @@ impl QualifiedAlias { #[derive(PartialEq, Eq)] pub enum QueryValue { Column(QualifiedAlias), + FulltextColumn(FulltextQualifiedAlias), Entid(Entid), TypedValue(TypedValue), @@ -120,6 +148,9 @@ impl Debug for QueryValue { &Column(ref qa) => { write!(f, "{:?}", qa) }, + &FulltextColumn(ref qa) => { + write!(f, "{:?}", qa) + }, &Entid(ref entid) => { write!(f, "entity({:?})", entid) }, @@ -192,6 +223,9 @@ pub enum ColumnConstraint { right: QueryValue, }, HasType(TableAlias, ValueType), + // TODO: Merge this with NumericInequality? I expect the fine-grained information to be + // valuable when optimizing. + Matches(FulltextQualifiedAlias, QueryValue), } #[derive(PartialEq, Eq, Debug)] @@ -290,6 +324,10 @@ impl Debug for ColumnConstraint { write!(f, "{:?} {:?} {:?}", left, operator, right) }, + &Matches(ref qa, ref thing) => { + write!(f, "{:?} MATCHES {:?}", qa, thing) + }, + &HasType(ref qa, value_type) => { write!(f, "{:?}.value_type_tag = {:?}", qa, value_type) }, @@ -301,6 +339,7 @@ impl Debug for ColumnConstraint { pub enum EmptyBecause { // Var, existing, desired. TypeMismatch(Variable, HashSet, ValueType), + NonAttributeArgument, NonNumericArgument, NonStringFulltextValue, UnresolvedIdent(NamespacedKeyword), @@ -319,6 +358,9 @@ impl Debug for EmptyBecause { write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}", var, desired, existing) }, + &NonAttributeArgument => { + write!(f, "Non-attribute argument in attribute place") + }, &NonNumericArgument => { write!(f, "Non-numeric argument in numeric place") }, @@ -346,4 +388,4 @@ impl Debug for EmptyBecause { }, } } -} \ No newline at end of file +} diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs index f5b6a8d9..1a5870ef 100644 --- a/query-parser/src/parse.rs +++ b/query-parser/src/parse.rs @@ -35,6 +35,7 @@ use self::mentat_parser_utils::value_and_span::{ }; use self::mentat_query::{ + Binding, Element, FindQuery, FindSpec, @@ -50,7 +51,9 @@ use self::mentat_query::{ SrcVar, UnifyVars, Variable, + VariableOrPlaceholder, WhereClause, + WhereFn, }; error_chain! { @@ -129,11 +132,36 @@ def_parser!(Where, pattern_non_value_place, PatternNonValuePlace, { satisfy_map(PatternNonValuePlace::from_value) }); -def_matches_plain_symbol!(Where, and, "and"); -def_matches_plain_symbol!(Where, or, "or"); +def_parser!(Where, and, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "and" + } else { + false + } + }) +}); -def_matches_plain_symbol!(Where, or_join, "or-join"); +def_parser!(Where, or, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "or" + } else { + false + } + }) +}); + +def_parser!(Where, or_join, edn::ValueAndSpan, { + satisfy(|v: edn::ValueAndSpan| { + if let edn::SpannedValue::PlainSymbol(ref s) = v.inner { + s.0.as_str() == "or-join" + } else { + false + } + }) +}); def_parser!(Where, rule_vars, Vec, { seq() @@ -198,6 +226,25 @@ def_parser!(Where, pred, WhereClause, { }))) }); +/// A vector containing a parenthesized function expression and a binding. +def_parser!(Where, where_fn, WhereClause, { + // Accept either a nested list or a nested vector here: + // `[(foo ?x ?y) binding]` or `[[foo ?x ?y] binding]` + vector() + .of_exactly( + (seq().of_exactly( + (Query::predicate_fn(), Query::arguments())), + Bind::binding()) + .map(|((f, args), binding)| { + WhereClause::WhereFn( + WhereFn { + operator: f.0, + args: args, + binding: binding, + }) + })) +}); + def_parser!(Where, pattern, WhereClause, { vector() .of_exactly( @@ -248,6 +295,7 @@ def_parser!(Where, clause, WhereClause, { try(Where::or_clause()), try(Where::pred()), + try(Where::where_fn()), ]) }); @@ -262,6 +310,8 @@ def_matches_plain_symbol!(Find, period, "."); def_matches_plain_symbol!(Find, ellipsis, "..."); +def_matches_plain_symbol!(Find, placeholder, "_"); + def_parser!(Find, find_scalar, FindSpec, { Query::variable() .skip(Find::period()) @@ -366,6 +416,47 @@ def_parser!(Find, query, FindQuery, { }) }); +pub struct Bind; + +def_parser!(Bind, bind_scalar, Binding, { + Query::variable() + .skip(eof()) + .map(|var: Variable| -> Binding { Binding::BindScalar(var) }) +}); + +def_parser!(Bind, variable_or_placeholder, VariableOrPlaceholder, { + Query::variable().map(VariableOrPlaceholder::Variable) + .or(Find::placeholder().map(|_| VariableOrPlaceholder::Placeholder)) +}); + +def_parser!(Bind, bind_coll, Binding, { + vector() + .of_exactly(Query::variable() + .skip(Find::ellipsis())) + .map(Binding::BindColl) +}); + +def_parser!(Bind, bind_rel, Binding, { + vector().of_exactly( + many1::, _>(Bind::variable_or_placeholder()) + .map(Binding::BindRel)) +}); + +def_parser!(Bind, bind_tuple, Binding, { + many1::, _>(Bind::variable_or_placeholder()) + .skip(eof()) + .map(Binding::BindTuple) +}); + +def_parser!(Bind, binding, Binding, { + // Any one of the four binding types might apply, so we combine them with `choice`. Our parsers + // consume input, so we need to wrap them in `try` so that they operate independently. + choice([try(Bind::bind_scalar()), + try(Bind::bind_coll()), + try(Bind::bind_tuple()), + try(Bind::bind_rel())]) +}); + pub fn parse_find_string(string: &str) -> Result { let expr = edn::parse::value(string)?; Find::query() @@ -385,6 +476,7 @@ mod test { use self::combine::Parser; use self::edn::OrderedFloat; use self::mentat_query::{ + Binding, Element, FindSpec, NonIntegerConstant, @@ -393,6 +485,7 @@ mod test { PatternValuePlace, SrcVar, Variable, + VariableOrPlaceholder, }; use super::*; @@ -598,4 +691,86 @@ mod test { FindSpec::FindTuple(vec![Element::Variable(variable(vx)), Element::Variable(variable(vy))])); } + + #[test] + fn test_bind_scalar() { + let vx = edn::PlainSymbol::new("?x"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[?x]", + Binding::BindScalar(variable(vx))); + } + + #[test] + fn test_bind_coll() { + let vx = edn::PlainSymbol::new("?x"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[[?x ...]]", + Binding::BindColl(variable(vx))); + } + + #[test] + fn test_bind_rel() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let vw = edn::PlainSymbol::new("?w"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[[?x ?y _ ?w]]", + Binding::BindRel(vec![VariableOrPlaceholder::Variable(variable(vx)), + VariableOrPlaceholder::Variable(variable(vy)), + VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(variable(vw)), + ])); + } + + #[test] + fn test_bind_tuple() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let vw = edn::PlainSymbol::new("?w"); + assert_edn_parses_to!(|| vector().of_exactly(Bind::binding()), + "[?x ?y _ ?w]", + Binding::BindTuple(vec![VariableOrPlaceholder::Variable(variable(vx)), + VariableOrPlaceholder::Variable(variable(vy)), + VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(variable(vw)), + ])); + } + + #[test] + fn test_where_fn() { + assert_edn_parses_to!(Where::where_fn, + "[(f ?x 1) ?y]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![FnArg::Variable(Variable::from_valid_name("?x")), + FnArg::EntidOrInteger(1)], + binding: Binding::BindScalar(Variable::from_valid_name("?y")), + })); + + assert_edn_parses_to!(Where::where_fn, + "[(f ?x) [?y ...]]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![FnArg::Variable(Variable::from_valid_name("?x"))], + binding: Binding::BindColl(Variable::from_valid_name("?y")), + })); + + // assert_edn_parses_to!(Where::where_fn, + // "[(f) [?y _]]", + // WhereClause::WhereFn(WhereFn { + // operator: edn::PlainSymbol::new("f"), + // args: vec![], + // binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?y")), + // VariableOrPlaceholder::Placeholder]), + // })); + + assert_edn_parses_to!(Where::where_fn, + "[(f) _ ?y]", + WhereClause::WhereFn(WhereFn { + operator: edn::PlainSymbol::new("f"), + args: vec![], + binding: Binding::BindTuple(vec![VariableOrPlaceholder::Placeholder, + VariableOrPlaceholder::Variable(Variable::from_valid_name("?y"))]), + })); + } } diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index f1d94596..1df7a66e 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -210,7 +210,7 @@ fn project_elements<'a, I: IntoIterator>( let columns = query.cc .column_bindings .get(var) - .expect("Every variable has a binding"); + .expect(format!("Every variable should have a binding, but {} does not", var.as_str()).as_str()); let qa = columns[0].clone(); let name = column_name(var); @@ -490,4 +490,4 @@ pub fn query_projection(query: &AlgebraicQuery) -> CombinedProjection { }, } } -} \ No newline at end of file +} diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 1d3d7869..cc7c1e1f 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -20,6 +20,8 @@ use mentat_core::{ use mentat_query_algebrizer::{ DatomsColumn, + FulltextColumn, + FulltextQualifiedAlias, QualifiedAlias, QueryValue, SourceAlias, @@ -44,6 +46,7 @@ use mentat_sql::{ /// implementation for each storage backend. Passing `TypedValue`s here allows for that. pub enum ColumnOrExpression { Column(QualifiedAlias), + FulltextColumn(FulltextQualifiedAlias), Entid(Entid), // Because it's so common. Integer(i32), // We use these for type codes etc. Long(i64), @@ -55,6 +58,7 @@ impl From for ColumnOrExpression { fn from(v: QueryValue) -> Self { match v { QueryValue::Column(c) => ColumnOrExpression::Column(c), + QueryValue::FulltextColumn(c) => ColumnOrExpression::FulltextColumn(c), QueryValue::Entid(e) => ColumnOrExpression::Entid(e), QueryValue::PrimitiveLong(v) => ColumnOrExpression::Long(v), QueryValue::TypedValue(v) => ColumnOrExpression::Value(v), @@ -109,6 +113,14 @@ impl Constraint { right: right, } } + + pub fn fulltext_match(left: ColumnOrExpression, right: ColumnOrExpression) -> Constraint { + Constraint::Infix { + op: Op("MATCH"), // SQLite specific! + left: left, + right: right, + } + } } #[allow(dead_code)] @@ -157,6 +169,11 @@ fn push_column(qb: &mut QueryBuilder, col: &DatomsColumn) { qb.push_sql(col.as_str()); } +// We know that FulltextColumns are safe to serialize. +fn push_fulltext_column(qb: &mut QueryBuilder, col: &FulltextColumn) { + qb.push_sql(col.as_str()); +} + //--------------------------------------------------------- // Turn that representation into SQL. @@ -199,6 +216,12 @@ impl QueryFragment for ColumnOrExpression { push_column(out, column); Ok(()) }, + &FulltextColumn(FulltextQualifiedAlias(ref table, ref column)) => { + out.push_identifier(table.as_str())?; + out.push_sql("."); + push_fulltext_column(out, column); + Ok(()) + }, &Entid(entid) => { out.push_sql(entid.to_string().as_str()); Ok(()) @@ -406,13 +429,20 @@ impl SelectQuery { #[cfg(test)] mod tests { use super::*; + + use std::rc::Rc; + use mentat_query_algebrizer::DatomsTable; - fn build_constraint(c: Constraint) -> String { + fn build_constraint_query(c: Constraint) -> SQLQuery { let mut builder = SQLiteQueryBuilder::new(); c.push_sql(&mut builder) .map(|_| builder.finish()) - .unwrap().sql + .expect("to produce a query for the given constraint") + } + + fn build_constraint(c: Constraint) -> String { + build_constraint_query(c).sql } #[test] @@ -469,6 +499,25 @@ mod tests { assert_eq!("((123 = 456 AND 789 = 246))", build_constraint(c)); } + #[test] + fn test_matches_constraint() { + let c = Constraint::Infix { + op: Op("MATCHES"), + left: ColumnOrExpression::FulltextColumn(FulltextQualifiedAlias("fulltext01".to_string(), FulltextColumn::Text)), + right: ColumnOrExpression::Value(TypedValue::String(Rc::new("needle".to_string()))), + }; + let q = build_constraint_query(c); + assert_eq!("`fulltext01`.text MATCHES $v0", q.sql); + assert_eq!(vec![("$v0".to_string(), Rc::new("needle".to_string()))], q.args); + + let c = Constraint::Infix { + op: Op("="), + left: ColumnOrExpression::FulltextColumn(FulltextQualifiedAlias("fulltext01".to_string(), FulltextColumn::Rowid)), + right: ColumnOrExpression::Column(QualifiedAlias("datoms02".to_string(), DatomsColumn::Value)), + }; + assert_eq!("`fulltext01`.rowid = `datoms02`.v", build_constraint(c)); + } + #[test] fn test_end_to_end() { // [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]] diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index e6b4b375..aa17a3d8 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -32,6 +32,8 @@ use mentat_query_algebrizer::{ ConjoiningClauses, DatomsColumn, DatomsTable, + FulltextColumn, + FulltextQualifiedAlias, QualifiedAlias, QueryValue, SourceAlias, @@ -69,6 +71,12 @@ impl ToColumn for QualifiedAlias { } } +impl ToColumn for FulltextQualifiedAlias { + fn to_column(self) -> ColumnOrExpression { + ColumnOrExpression::FulltextColumn(self) + } +} + impl ToConstraint for ColumnIntersection { fn to_constraint(self) -> Constraint { Constraint::And { @@ -108,6 +116,11 @@ impl ToConstraint for ColumnConstraint { Equals(left, QueryValue::Column(right)) => Constraint::equal(left.to_column(), right.to_column()), + Equals(left, QueryValue::FulltextColumn(right)) => + // TODO: figure out if this is the correct abstraction. Can we make it so that + // FulltextColumns::Text is not accepted here? + Constraint::equal(left.to_column(), right.to_column()), + Equals(qa, QueryValue::PrimitiveLong(value)) => { let tag_column = qa.for_type_tag().to_column(); let value_column = qa.to_column(); @@ -148,6 +161,14 @@ impl ToConstraint for ColumnConstraint { } }, + Matches(left, right) => { + Constraint::Infix { + op: Op("MATCH"), + left: ColumnOrExpression::FulltextColumn(left), + right: right.into(), + } + }, + HasType(table, value_type) => { let column = QualifiedAlias(table, DatomsColumn::ValueTypeTag).to_column(); Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag())) diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 29ad6c75..d4786c50 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -58,6 +58,13 @@ fn prepopulated_schema() -> Schema { value_type: ValueType::String, ..Default::default() }); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); schema } @@ -241,4 +248,14 @@ fn test_numeric_not_equals_known_attribute() { let SQLQuery { sql, args } = translate(&schema, input, None); assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1"); assert_eq!(args, vec![]); -} \ No newline at end of file +} + +#[test] +fn test_fulltext() { + let schema = prepopulated_schema(); + + let input = r#"[:find ?entity ?value ?tx ?score :where [(fulltext $ :foo/fts "needle") [?entity ?value ?tx ?score]]]"#; + let SQLQuery { sql, args } = translate(&schema, input, None); + assert_eq!(sql, "SELECT `datoms00`.e AS `?entity`, `datoms00`.v AS `?value`, `datoms00`.tx AS `?tx`, 0.0 AS `?score` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "needle")]); +} diff --git a/query/src/lib.rs b/query/src/lib.rs index b9037117..18edfc3d 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -147,7 +147,11 @@ impl FromValue for SrcVar { impl SrcVar { pub fn from_symbol(sym: &PlainSymbol) -> Option { if sym.is_src_symbol() { - Some(SrcVar::NamedSrc(sym.plain_name().to_string())) + if sym.0 == "$" { + Some(SrcVar::DefaultSrc) + } else { + Some(SrcVar::NamedSrc(sym.plain_name().to_string())) + } } else { None } @@ -185,6 +189,7 @@ pub enum FnArg { impl FromValue for FnArg { fn from_value(v: edn::ValueAndSpan) -> Option { +<<<<<<< HEAD // TODO: support SrcVars. Variable::from_value(v.clone()) // TODO: don't clone! .and_then(|v| Some(FnArg::Variable(v))) @@ -195,6 +200,36 @@ impl FromValue for FnArg { edn::SpannedValue::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))), _ => unimplemented!(), }}) +======= + use edn::SpannedValue::*; + match v.inner { + Integer(x) => + Some(FnArg::EntidOrInteger(x)), + PlainSymbol(ref x) if x.is_src_symbol() => + SrcVar::from_symbol(x).map(FnArg::SrcVar), + PlainSymbol(ref x) if x.is_var_symbol() => + Variable::from_symbol(x).map(FnArg::Variable), + PlainSymbol(_) => None, + NamespacedKeyword(ref x) => + Some(FnArg::Ident(x.clone())), + Boolean(x) => + Some(FnArg::Constant(NonIntegerConstant::Boolean(x))), + Float(x) => + Some(FnArg::Constant(NonIntegerConstant::Float(x))), + BigInteger(ref x) => + Some(FnArg::Constant(NonIntegerConstant::BigInteger(x.clone()))), + Text(ref x) => + // TODO: intern strings. #398. + Some(FnArg::Constant(NonIntegerConstant::Text(Rc::new(x.clone())))), + Nil | + NamespacedSymbol(_) | + Keyword(_) | + Vector(_) | + List(_) | + Set(_) | + Map(_) => None, + } +>>>>>>> 71d3aa29ed3b383f030e9b3d13eeef5a12820be1 } } @@ -455,6 +490,25 @@ impl FindSpec { } } +// Datomic accepts variable or placeholder. DataScript accepts recursive bindings. Mentat sticks +// to the non-recursive form Datomic accepts, which is much simpler to process. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum VariableOrPlaceholder { + Placeholder, + Variable(Variable), +} + +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum Binding { + BindRel(Vec), + + BindColl(Variable), + + BindTuple(Vec), + + BindScalar(Variable), +} + // Note that the "implicit blank" rule applies. // A pattern with a reversed attribute — :foo/_bar — is reversed // at the point of parsing. These `Pattern` instances only represent @@ -510,6 +564,13 @@ pub struct Predicate { pub args: Vec, } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct WhereFn { + pub operator: PlainSymbol, + pub args: Vec, + pub binding: Binding, +} + #[derive(Clone, Debug, Eq, PartialEq)] pub enum UnifyVars { /// `Implicit` means the variables in an `or` or `not` are derived from the enclosed pattern. @@ -577,7 +638,7 @@ pub enum WhereClause { NotJoin, OrJoin(OrJoin), Pred(Predicate), - WhereFn, + WhereFn(WhereFn), RuleExpr, Pattern(Pattern), } @@ -630,7 +691,7 @@ impl ContainsVariables for WhereClause { &Pattern(ref p) => p.accumulate_mentioned_variables(acc), &Not => (), &NotJoin => (), - &WhereFn => (), + &WhereFn(_) => (), &RuleExpr => (), } }