diff --git a/core/src/util.rs b/core/src/util.rs index 5f3f7321..77d02735 100644 --- a/core/src/util.rs +++ b/core/src/util.rs @@ -56,4 +56,33 @@ impl OptionEffect for Option { } self } -} \ No newline at end of file +} + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] +pub enum Either { + Left(L), + Right(R), +} + +// Cribbed from https://github.com/bluss/either/blob/f793721f3fdeb694f009e731b23a2858286bc0d6/src/lib.rs#L219-L259. +impl Either { + pub fn map_left(self, f: F) -> Either + where F: FnOnce(L) -> M + { + use self::Either::*; + match self { + Left(l) => Left(f(l)), + Right(r) => Right(r), + } + } + + pub fn map_right(self, f: F) -> Either + where F: FnOnce(R) -> S + { + use self::Either::*; + match self { + Left(l) => Left(l), + Right(r) => Right(f(r)), + } + } +} diff --git a/db/src/internal_types.rs b/db/src/internal_types.rs index f97f1b0a..2955faf1 100644 --- a/db/src/internal_types.rs +++ b/db/src/internal_types.rs @@ -15,6 +15,8 @@ use std::collections::HashMap; use std::rc::Rc; +use mentat_core::util::Either; + use errors; use errors::ErrorKind; use types::{ @@ -33,33 +35,6 @@ pub enum Term { AddOrRetract(OpType, E, Entid, V), } -#[derive(Clone, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] -pub enum Either { - Left(L), - Right(R), -} - -// Cribbed from https://github.com/bluss/either/blob/f793721f3fdeb694f009e731b23a2858286bc0d6/src/lib.rs#L219-L259. -impl Either { - pub fn map_left(self, f: F) -> Either - where F: FnOnce(L) -> M - { - match self { - Left(l) => Left(f(l)), - Right(r) => Right(r), - } - } - - pub fn map_right(self, f: F) -> Either - where F: FnOnce(R) -> S - { - match self { - Left(l) => Left(l), - Right(r) => Right(f(r)), - } - } -} - use self::Either::*; /// An entid that's either already in the store, or newly allocated to a tempid. diff --git a/db/src/tx.rs b/db/src/tx.rs index 9ad0f496..50cb2c56 100644 --- a/db/src/tx.rs +++ b/db/src/tx.rs @@ -64,7 +64,6 @@ use edn::{ use entids; use errors::{ErrorKind, Result}; use internal_types::{ - Either, KnownEntid, KnownEntidOr, LookupRef, @@ -76,7 +75,10 @@ use internal_types::{ TermWithTempIdsAndLookupRefs, TermWithoutTempIds, TypedValueOr, - replace_lookup_ref}; + replace_lookup_ref, +}; + +use mentat_core::util::Either; use mentat_core::{ DateTime, diff --git a/db/src/upsert_resolution.rs b/db/src/upsert_resolution.rs index 03cbf971..3c74562f 100644 --- a/db/src/upsert_resolution.rs +++ b/db/src/upsert_resolution.rs @@ -28,7 +28,9 @@ use internal_types::{ TermWithoutTempIds, TermWithTempIds, }; -use internal_types::Either::*; + +use mentat_core::util::Either::*; + use mentat_core::{ attribute, Attribute, diff --git a/query-algebrizer/src/clauses/convert.rs b/query-algebrizer/src/clauses/convert.rs new file mode 100644 index 00000000..a9975699 --- /dev/null +++ b/query-algebrizer/src/clauses/convert.rs @@ -0,0 +1,185 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::rc::Rc; + +use mentat_core::{ + Schema, + SQLValueType, + TypedValue, + ValueType, +}; + +use mentat_query::{ + FnArg, + NonIntegerConstant, + Variable, +}; + +use clauses::{ + ConjoiningClauses, +}; + +use errors::{ + ErrorKind, + Result, +}; + +use types::{ + EmptyBecause, + ValueTypeSet, +}; + +macro_rules! coerce_to_typed_value { + ($var: ident, $val: ident, $types: expr, $type: path, $constructor: path) => { { + Ok(if !$types.contains($type) { + Impossible(EmptyBecause::TypeMismatch { + var: $var.clone(), + existing: $types, + desired: ValueTypeSet::of_one($type), + }) + } else { + Val($constructor($val).into()) + }) + } } +} + +pub enum ValueConversion { + Val(TypedValue), + Impossible(EmptyBecause), +} + +/// Conversion of FnArgs to TypedValues. +impl ConjoiningClauses { + /// Convert the provided `FnArg` to a `TypedValue`. + /// The conversion depends on, and can fail because of: + /// - Existing known types of a variable to which this arg will be bound. + /// - Existing bindings of a variable `FnArg`. + pub fn typed_value_from_arg<'s>(&self, schema: &'s Schema, var: &Variable, arg: FnArg, known_types: ValueTypeSet) -> Result { + use self::ValueConversion::*; + if known_types.is_empty() { + // If this happens, it likely means the pattern has already failed! + return Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::any(), + })); + } + + match arg { + // Longs are potentially ambiguous: they might be longs or entids. + FnArg::EntidOrInteger(x) => { + match (ValueType::Ref.accommodates_integer(x), + known_types.contains(ValueType::Ref), + known_types.contains(ValueType::Long)) { + (true, true, true) => { + // Ambiguous: this arg could be an entid or a long. + // We default to long. + Ok(Val(TypedValue::Long(x))) + }, + (true, true, false) => { + // This can only be a ref. + Ok(Val(TypedValue::Ref(x))) + }, + (_, false, true) => { + // This can only be a long. + Ok(Val(TypedValue::Long(x))) + }, + (false, true, _) => { + // This isn't a valid ref, but that's the type to which this must conform! + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_longs(), + })) + }, + (_, false, false) => { + // Non-overlapping type sets. + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_longs(), + })) + }, + } + }, + + // If you definitely want to look up an ident, do it before running the query. + FnArg::IdentOrKeyword(x) => { + match (known_types.contains(ValueType::Ref), + known_types.contains(ValueType::Keyword)) { + (true, true) => { + // Ambiguous: this could be a keyword or an ident. + // Default to keyword. + Ok(Val(TypedValue::Keyword(Rc::new(x)))) + }, + (true, false) => { + // This can only be an ident. Look it up! + match schema.get_entid(&x).map(TypedValue::Ref) { + Some(e) => Ok(Val(e)), + None => Ok(Impossible(EmptyBecause::UnresolvedIdent(x.clone()))), + } + }, + (false, true) => { + Ok(Val(TypedValue::Keyword(Rc::new(x)))) + }, + (false, false) => { + Ok(Impossible(EmptyBecause::TypeMismatch { + var: var.clone(), + existing: known_types, + desired: ValueTypeSet::of_keywords(), + })) + }, + } + }, + + FnArg::Variable(in_var) => { + // TODO: technically you could ground an existing variable inside the query…. + if !self.input_variables.contains(&in_var) { + bail!(ErrorKind::UnboundVariable((*in_var.0).clone())); + } + match self.bound_value(&in_var) { + // The type is already known if it's a bound variable…. + Some(ref in_value) => Ok(Val(in_value.clone())), + None => { + // The variable is present in `:in`, but it hasn't yet been provided. + // This is a restriction we will eventually relax: we don't yet have a way + // to collect variables as part of a computed table or substitution. + bail!(ErrorKind::UnboundVariable((*in_var.0).clone())) + }, + } + }, + + // This isn't implemented yet. + FnArg::Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(), + + // These don't make sense here. + FnArg::Vector(_) | + FnArg::SrcVar(_) => bail!(ErrorKind::InvalidGroundConstant), + + // These are all straightforward. + FnArg::Constant(NonIntegerConstant::Boolean(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Boolean, TypedValue::Boolean) + }, + FnArg::Constant(NonIntegerConstant::Instant(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Instant, TypedValue::Instant) + }, + FnArg::Constant(NonIntegerConstant::Uuid(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Uuid, TypedValue::Uuid) + }, + FnArg::Constant(NonIntegerConstant::Float(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::Double, TypedValue::Double) + }, + FnArg::Constant(NonIntegerConstant::Text(x)) => { + coerce_to_typed_value!(var, x, known_types, ValueType::String, TypedValue::String) + }, + } + } +} diff --git a/query-algebrizer/src/clauses/fulltext.rs b/query-algebrizer/src/clauses/fulltext.rs new file mode 100644 index 00000000..a0e077b7 --- /dev/null +++ b/query-algebrizer/src/clauses/fulltext.rs @@ -0,0 +1,348 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::{ + Schema, + TypedValue, + ValueType, +}; + +use mentat_query::{ + Binding, + FnArg, + NonIntegerConstant, + SrcVar, + VariableOrPlaceholder, + WhereFn, +}; + +use clauses::{ + ConjoiningClauses, +}; + +use errors::{ + BindingError, + ErrorKind, + Result, +}; + +use types::{ + Column, + ColumnConstraint, + DatomsColumn, + DatomsTable, + EmptyBecause, + FulltextColumn, + QualifiedAlias, + QueryValue, + SourceAlias, +}; + +impl ConjoiningClauses { + #[allow(unused_variables)] + pub fn apply_fulltext<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + if where_fn.args.len() != 3 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 3)); + } + + if where_fn.binding.is_empty() { + // The binding must introduce at least one bound variable. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::NoBoundVariable)); + } + + if !where_fn.binding.is_valid() { + // The binding must not duplicate bound variables. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::RepeatedBoundVariable)); + } + + // We should have exactly four bindings. Destructure them now. + let bindings = match where_fn.binding { + Binding::BindRel(bindings) => { + let bindings_count = bindings.len(); + if bindings_count < 1 || bindings_count > 4 { + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), + BindingError::InvalidNumberOfBindings { + number: bindings.len(), + expected: 4, + })); + } + bindings + }, + Binding::BindScalar(_) | + Binding::BindTuple(_) | + Binding::BindColl(_) => bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::ExpectedBindRel)), + }; + let mut bindings = bindings.into_iter(); + let b_entity = bindings.next().unwrap(); + let b_value = bindings.next().unwrap_or(VariableOrPlaceholder::Placeholder); + let b_tx = bindings.next().unwrap_or(VariableOrPlaceholder::Placeholder); + let b_score = bindings.next().unwrap_or(VariableOrPlaceholder::Placeholder); + + let mut args = where_fn.args.into_iter(); + + // TODO: process source variables. + match args.next().unwrap() { + FnArg::SrcVar(SrcVar::DefaultSrc) => {}, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)), + } + + // TODO: accept placeholder and set of attributes. Alternately, consider putting the search + // term before the attribute arguments and collect the (variadic) attributes into a set. + // let a: Entid = self.resolve_attribute_argument(&where_fn.operator, 1, args.next().unwrap())?; + // + // TODO: improve the expression of this matching, possibly by using attribute_for_* uniformly. + let a = match args.next().unwrap() { + FnArg::IdentOrKeyword(i) => schema.get_entid(&i), + // Must be an entid. + FnArg::EntidOrInteger(e) => Some(e), + FnArg::Variable(v) => { + // If it's already bound, then let's expand the variable. + // TODO: allow non-constant attributes. + match self.bound_value(&v) { + Some(TypedValue::Ref(entid)) => Some(entid), + Some(tv) => { + bail!(ErrorKind::InputTypeDisagreement(v.name().clone(), ValueType::Ref, tv.value_type())); + }, + None => { + bail!(ErrorKind::UnboundVariable((*v.0).clone())); + } + } + }, + _ => None, + }; + + // An unknown ident, or an entity that isn't present in the store, or isn't a fulltext + // attribute, is likely enough to be a coding error that we choose to bail instead of + // marking the pattern as known-empty. + let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; + let attribute = schema.attribute_for_entid(a).cloned().ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?; + + if !attribute.fulltext { + // We can never get results from a non-fulltext attribute! + println!("Can't run fulltext on non-fulltext attribute {}.", a); + self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(a)); + return Ok(()); + } + + let fulltext_values_alias = self.next_alias_for_table(DatomsTable::FulltextValues); + let datoms_table_alias = self.next_alias_for_table(DatomsTable::Datoms); + + // We do a fulltext lookup by joining the fulltext values table against datoms -- just + // like applying a pattern, but two tables contribute instead of one. + self.from.push(SourceAlias(DatomsTable::FulltextValues, fulltext_values_alias.clone())); + self.from.push(SourceAlias(DatomsTable::Datoms, datoms_table_alias.clone())); + + // TODO: constrain the type in the more general cases (e.g., `a` is a var). + self.constrain_attribute(datoms_table_alias.clone(), a); + + // Join the datoms table to the fulltext values table. + self.wheres.add_intersection(ColumnConstraint::Equals( + QualifiedAlias(datoms_table_alias.clone(), Column::Fixed(DatomsColumn::Value)), + QueryValue::Column(QualifiedAlias(fulltext_values_alias.clone(), Column::Fulltext(FulltextColumn::Rowid))))); + + // `search` is either text or a variable. + // If it's simple text, great. + // If it's a variable, it'll be in one of three states: + // - It's already bound, either by input or by a previous pattern like `ground`. + // - It's not already bound, but it's a defined input of type Text. Not yet implemented: TODO. + // - It's not bound. The query cannot be algebrized. + let search: TypedValue = match args.next().unwrap() { + FnArg::Constant(NonIntegerConstant::Text(s)) => { + TypedValue::String(s) + }, + FnArg::Variable(in_var) => { + match self.bound_value(&in_var) { + Some(t @ TypedValue::String(_)) => t, + Some(_) => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)), + None => { + if self.input_variables.contains(&in_var) && + self.known_type(&in_var) == Some(ValueType::String) { + // Sorry, we haven't implemented late binding. + } + bail!(ErrorKind::UnboundVariable((*in_var.0).clone())); + }, + } + }, + _ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)), + }; + + let constraint = ColumnConstraint::Matches(QualifiedAlias(fulltext_values_alias.clone(), + Column::Fulltext(FulltextColumn::Text)), + QueryValue::TypedValue(search)); + self.wheres.add_intersection(constraint); + + if let VariableOrPlaceholder::Variable(ref var) = b_entity { + // It must be a ref. + self.constrain_var_to_type(var.clone(), ValueType::Ref); + if self.is_known_empty() { + return Ok(()); + } + + self.bind_column_to_var(schema, datoms_table_alias.clone(), DatomsColumn::Entity, var.clone()); + } + + if let VariableOrPlaceholder::Variable(ref var) = b_value { + // This'll be bound to strings. + self.constrain_var_to_type(var.clone(), ValueType::String); + if self.is_known_empty() { + return Ok(()); + } + + self.bind_column_to_var(schema, fulltext_values_alias.clone(), Column::Fulltext(FulltextColumn::Text), var.clone()); + } + + if let VariableOrPlaceholder::Variable(ref var) = b_tx { + // Txs must be refs. + self.constrain_var_to_type(var.clone(), ValueType::Ref); + if self.is_known_empty() { + return Ok(()); + } + + self.bind_column_to_var(schema, datoms_table_alias.clone(), DatomsColumn::Tx, var.clone()); + } + + if let VariableOrPlaceholder::Variable(ref var) = b_score { + // Scores are doubles. + self.constrain_var_to_type(var.clone(), ValueType::Double); + + // We do not allow the score to be bound. + if self.value_bindings.contains_key(var) || self.input_variables.contains(var) { + bail!(ErrorKind::InvalidBinding(var.name(), BindingError::UnexpectedBinding)); + } + + // We bind the value ourselves. This handily takes care of substituting into existing uses. + // TODO: produce real scores using SQLite's matchinfo. + self.bind_value(var, TypedValue::Double(0.0.into())); + } + + Ok(()) + } +} + +#[cfg(test)] +mod testing { + use super::*; + + use std::rc::Rc; + + use mentat_core::{ + Attribute, + ValueType, + }; + + use mentat_query::{ + Binding, + FnArg, + NamespacedKeyword, + PlainSymbol, + Variable, + }; + + use clauses::{ + add_attribute, + associate_ident, + }; + + #[test] + fn test_apply_fulltext() { + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 101); + add_attribute(&mut schema, 101, Attribute { + value_type: ValueType::String, + fulltext: false, + ..Default::default() + }); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); + + let op = PlainSymbol::new("fulltext"); + cc.apply_fulltext(&schema, WhereFn { + operator: op, + args: vec![ + FnArg::SrcVar(SrcVar::DefaultSrc), + FnArg::IdentOrKeyword(NamespacedKeyword::new("foo", "fts")), + FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))), + ], + binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?entity")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?value")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?tx")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?score"))]), + }).expect("to be able to apply_fulltext"); + + assert!(!cc.is_known_empty()); + + // Finally, expand column bindings. + cc.expand_column_bindings(); + assert!(!cc.is_known_empty()); + + let clauses = cc.wheres; + assert_eq!(clauses.len(), 3); + + assert_eq!(clauses.0[0], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Attribute)), + QueryValue::Entid(100)).into()); + assert_eq!(clauses.0[1], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Value)), + QueryValue::Column(QualifiedAlias("fulltext_values00".to_string(), Column::Fulltext(FulltextColumn::Rowid)))).into()); + assert_eq!(clauses.0[2], ColumnConstraint::Matches(QualifiedAlias("fulltext_values00".to_string(), Column::Fulltext(FulltextColumn::Text)), + QueryValue::TypedValue(TypedValue::String(Rc::new("needle".into())))).into()); + + let bindings = cc.column_bindings; + assert_eq!(bindings.len(), 3); + + assert_eq!(bindings.get(&Variable::from_valid_name("?entity")).expect("column binding for ?entity").clone(), + vec![QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Entity))]); + assert_eq!(bindings.get(&Variable::from_valid_name("?value")).expect("column binding for ?value").clone(), + vec![QualifiedAlias("fulltext_values00".to_string(), Column::Fulltext(FulltextColumn::Text))]); + assert_eq!(bindings.get(&Variable::from_valid_name("?tx")).expect("column binding for ?tx").clone(), + vec![QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Tx))]); + + // Score is a value binding. + let values = cc.value_bindings; + assert_eq!(values.get(&Variable::from_valid_name("?score")).expect("column binding for ?score").clone(), + TypedValue::Double(0.0.into())); + + let known_types = cc.known_types; + assert_eq!(known_types.len(), 4); + + assert_eq!(known_types.get(&Variable::from_valid_name("?entity")).expect("known types for ?entity").clone(), + vec![ValueType::Ref].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?value")).expect("known types for ?value").clone(), + vec![ValueType::String].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?tx")).expect("known types for ?tx").clone(), + vec![ValueType::Ref].into_iter().collect()); + assert_eq!(known_types.get(&Variable::from_valid_name("?score")).expect("known types for ?score").clone(), + vec![ValueType::Double].into_iter().collect()); + + let mut cc = ConjoiningClauses::default(); + let op = PlainSymbol::new("fulltext"); + cc.apply_fulltext(&schema, WhereFn { + operator: op, + args: vec![ + FnArg::SrcVar(SrcVar::DefaultSrc), + FnArg::IdentOrKeyword(NamespacedKeyword::new("foo", "bar")), + FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))), + ], + binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?entity")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?value")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?tx")), + VariableOrPlaceholder::Variable(Variable::from_valid_name("?score"))]), + }).expect("to be able to apply_fulltext"); + + // It's not a fulltext attribute, so the CC cannot yield results. + assert!(cc.is_known_empty()); + } +} diff --git a/query-algebrizer/src/clauses/ground.rs b/query-algebrizer/src/clauses/ground.rs new file mode 100644 index 00000000..d7ad7557 --- /dev/null +++ b/query-algebrizer/src/clauses/ground.rs @@ -0,0 +1,390 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::{ + Schema, + TypedValue, + ValueType, +}; + +use mentat_query::{ + Binding, + FnArg, + Variable, + VariableOrPlaceholder, + WhereFn, +}; + +use clauses::{ + ConjoiningClauses, + PushComputed, +}; + +use clauses::convert::ValueConversion; + +use errors::{ + BindingError, + ErrorKind, + Result, +}; + +use types::{ + ComputedTable, + EmptyBecause, + SourceAlias, + ValueTypeSet, + VariableColumn, +}; + +impl ConjoiningClauses { + /// Take a relation: a matrix of values which will successively bind to named variables of + /// the provided types. + /// Construct a computed table to yield this relation. + /// This function will panic if some invariants are not met. + fn collect_named_bindings<'s>(&mut self, schema: &'s Schema, names: Vec, types: Vec, values: Vec) { + if values.is_empty() { + return; + } + + assert!(!names.is_empty()); + assert_eq!(names.len(), types.len()); + assert!(values.len() >= names.len()); + assert_eq!(values.len() % names.len(), 0); // It's an exact multiple. + + let named_values = ComputedTable::NamedValues { + names: names.clone(), + values: values, + }; + + let table = self.computed_tables.push_computed(named_values); + let alias = self.next_alias_for_table(table); + + // Stitch the computed table into column_bindings, so we get cross-linking. + for (name, ty) in names.iter().zip(types.into_iter()) { + self.constrain_var_to_type(name.clone(), ty); + self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(name.clone()), name.clone()); + } + + self.from.push(SourceAlias(table, alias)); + } + + fn apply_ground_place<'s>(&mut self, schema: &'s Schema, var: VariableOrPlaceholder, arg: FnArg) -> Result<()> { + match var { + VariableOrPlaceholder::Placeholder => Ok(()), + VariableOrPlaceholder::Variable(var) => self.apply_ground_var(schema, var, arg), + } + } + + /// Constrain the CC to associate the given var with the given ground argument. + /// Marks known-empty on failure. + fn apply_ground_var<'s>(&mut self, schema: &'s Schema, var: Variable, arg: FnArg) -> Result<()> { + let known_types = self.known_type_set(&var); + match self.typed_value_from_arg(schema, &var, arg, known_types)? { + ValueConversion::Val(value) => self.apply_ground_value(var, value), + ValueConversion::Impossible(because) => { + self.mark_known_empty(because); + Ok(()) + }, + } + } + + /// Marks known-empty on failure. + fn apply_ground_value(&mut self, var: Variable, value: TypedValue) -> Result<()> { + if let Some(existing) = self.bound_value(&var) { + if existing != value { + self.mark_known_empty(EmptyBecause::ConflictingBindings { + var: var.clone(), + existing: existing.clone(), + desired: value, + }); + return Ok(()) + } + } else { + self.bind_value(&var, value.clone()); + } + + Ok(()) + } + + pub fn apply_ground<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { + if where_fn.args.len() != 1 { + bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 1)); + } + + let mut args = where_fn.args.into_iter(); + + if where_fn.binding.is_empty() { + // The binding must introduce at least one bound variable. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::NoBoundVariable)); + } + + if !where_fn.binding.is_valid() { + // The binding must not duplicate bound variables. + bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::RepeatedBoundVariable)); + } + + // Scalar and tuple bindings are a little special: because there's only one value, + // we can immediately substitute the value as a known value in the CC, additionally + // generating a WHERE clause if columns have already been bound. + match (where_fn.binding, args.next().unwrap()) { + (Binding::BindScalar(var), constant) => + self.apply_ground_var(schema, var, constant), + + (Binding::BindTuple(places), FnArg::Vector(children)) => { + // Just the same, but we bind more than one column at a time. + if children.len() != places.len() { + // Number of arguments don't match the number of values. TODO: better error message. + bail!(ErrorKind::GroundBindingsMismatch); + } + for (place, arg) in places.into_iter().zip(children.into_iter()) { + self.apply_ground_place(schema, place, arg)? // TODO: short-circuit on impossible. + } + Ok(()) + }, + + // Collection bindings and rel bindings are similar in that they are both + // implemented as a subquery with a projection list and a set of values. + // The difference is that BindColl has only a single variable, and its values + // are all in a single structure. That makes it substantially simpler! + (Binding::BindColl(var), FnArg::Vector(children)) => { + if children.is_empty() { + bail!(ErrorKind::InvalidGroundConstant); + } + + // Turn a collection of arguments into a Vec of `TypedValue`s of the same type. + let known_types = self.known_type_set(&var); + // Check that every value has the same type. + let mut accumulated_types = ValueTypeSet::none(); + let mut skip: Option = None; + let values = children.into_iter() + .filter_map(|arg| -> Option> { + // We need to get conversion errors out. + // We also want to mark known-empty on impossibilty, but + // still detect serious errors. + match self.typed_value_from_arg(schema, &var, arg, known_types) { + Ok(ValueConversion::Val(tv)) => { + if accumulated_types.insert(tv.value_type()) && + !accumulated_types.is_unit() { + // Values not all of the same type. + Some(Err(ErrorKind::InvalidGroundConstant.into())) + } else { + Some(Ok(tv)) + } + }, + Ok(ValueConversion::Impossible(because)) => { + // Skip this value. + skip = Some(because); + None + }, + Err(e) => Some(Err(e.into())), + } + }) + .collect::>>()?; + + if values.is_empty() { + let because = skip.expect("we skipped all rows for a reason"); + self.mark_known_empty(because); + return Ok(()); + } + + // Otherwise, we now have the values and the type. + let types = vec![accumulated_types.exemplar().unwrap()]; + let names = vec![var.clone()]; + + self.collect_named_bindings(schema, names, types, values); + Ok(()) + }, + + (Binding::BindRel(places), FnArg::Vector(rows)) => { + if rows.is_empty() { + bail!(ErrorKind::InvalidGroundConstant); + } + + // Grab the known types to which these args must conform, and track + // the places that won't be bound in the output. + let template: Vec> = + places.iter() + .map(|x| match x { + &VariableOrPlaceholder::Placeholder => None, + &VariableOrPlaceholder::Variable(ref v) => Some((v.clone(), self.known_type_set(v))), + }) + .collect(); + + // The expected 'width' of the matrix is the number of named variables. + let full_width = places.len(); + let names: Vec = places.into_iter().filter_map(|x| x.into_var()).collect(); + let expected_width = names.len(); + let expected_rows = rows.len(); + + if expected_width == 0 { + // They can't all be placeholders. + bail!(ErrorKind::InvalidGroundConstant); + } + + // Accumulate values into `matrix` and types into `a_t_f_c`. + // This representation of a rectangular matrix is more efficient than one composed + // of N separate vectors. + let mut matrix = Vec::with_capacity(expected_width * expected_rows); + let mut accumulated_types_for_columns = vec![ValueTypeSet::none(); expected_width]; + + // Loop so we can bail out. + let mut skipped_all: Option = None; + for row in rows.into_iter() { + match row { + FnArg::Vector(cols) => { + // Make sure that every row is the same length. + if cols.len() != full_width { + bail!(ErrorKind::InvalidGroundConstant); + } + + // TODO: don't accumulate twice. + let mut vals = Vec::with_capacity(expected_width); + let mut skip: Option = None; + for (col, pair) in cols.into_iter().zip(template.iter()) { + // Now we have (val, Option<(name, known_types)>). Silly, + // but this is how we iter! + // Convert each item in the row. + // If any value in the row is impossible, then skip the row. + // If all rows are impossible, fail the entire CC. + if let &Some(ref pair) = pair { + match self.typed_value_from_arg(schema, &pair.0, col, pair.1)? { + ValueConversion::Val(tv) => vals.push(tv), + ValueConversion::Impossible(because) => { + // Skip this row. It cannot produce bindings. + skip = Some(because); + break; + }, + } + } + } + + if skip.is_some() { + // Skip this row and record why, in case we skip all. + skipped_all = skip; + continue; + } + + // Accumulate the values into the matrix and the types into the type set. + for (val, acc) in vals.into_iter().zip(accumulated_types_for_columns.iter_mut()) { + let inserted = acc.insert(val.value_type()); + if inserted && !acc.is_unit() { + // Heterogeneous types. + bail!(ErrorKind::InvalidGroundConstant); + } + matrix.push(val); + } + + }, + _ => bail!(ErrorKind::InvalidGroundConstant), + } + } + + // Do we have rows? If not, the CC cannot succeed. + if matrix.is_empty() { + // We will either have bailed or will have accumulated *something* into the matrix, + // so we can safely unwrap here. + self.mark_known_empty(skipped_all.expect("we skipped for a reason")); + return Ok(()); + } + + // Take the single type from each set. We know there's only one: we got at least one + // type, 'cos we bailed out for zero rows, and we also bailed out each time we + // inserted a second type. + // By restricting to homogeneous columns, we greatly simplify projection. In the + // future, we could loosen this restriction, at the cost of projecting (some) value + // type tags. If and when we want to algebrize in two phases and allow for + // late-binding input variables, we'll probably be able to loosen this restriction + // with little penalty. + let types = accumulated_types_for_columns.into_iter() + .map(|x| x.exemplar().unwrap()) + .collect(); + self.collect_named_bindings(schema, names, types, matrix); + Ok(()) + }, + (_, _) => bail!(ErrorKind::InvalidGroundConstant), + } + } +} + +#[cfg(test)] +mod testing { + use super::*; + + use mentat_core::{ + Attribute, + ValueType, + }; + + use mentat_query::{ + Binding, + FnArg, + NamespacedKeyword, + PlainSymbol, + Variable, + }; + + use clauses::{ + add_attribute, + associate_ident, + }; + + use types::{ + ValueTypeSet, + }; + + #[test] + fn test_apply_ground() { + let vz = Variable::from_valid_name("?z"); + + let mut cc = ConjoiningClauses::default(); + let mut schema = Schema::default(); + + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); + + // It's awkward enough to write these expansions that we give the details for the simplest + // case only. See the tests of the translator for more extensive (albeit looser) coverage. + let op = PlainSymbol::new("ground"); + cc.apply_ground(&schema, WhereFn { + operator: op, + args: vec![ + FnArg::EntidOrInteger(10), + ], + binding: Binding::BindScalar(vz.clone()), + }).expect("to be able to apply_ground"); + + assert!(!cc.is_known_empty()); + + // Finally, expand column bindings. + cc.expand_column_bindings(); + assert!(!cc.is_known_empty()); + + let clauses = cc.wheres; + assert_eq!(clauses.len(), 0); + + let column_bindings = cc.column_bindings; + assert_eq!(column_bindings.len(), 0); // Scalar doesn't need this. + + let known_types = cc.known_types; + assert_eq!(known_types.len(), 1); + assert_eq!(known_types.get(&vz).expect("to know the type of ?z"), + &ValueTypeSet::of_one(ValueType::Long)); + + let value_bindings = cc.value_bindings; + assert_eq!(value_bindings.len(), 1); + assert_eq!(value_bindings.get(&vz).expect("to have a value for ?z"), + &TypedValue::Long(10)); // We default to Long instead of entid. + } +} diff --git a/query-algebrizer/src/clauses/mod.rs b/query-algebrizer/src/clauses/mod.rs index e94713da..e457987a 100644 --- a/query-algebrizer/src/clauses/mod.rs +++ b/query-algebrizer/src/clauses/mod.rs @@ -54,6 +54,7 @@ use types::{ DatomsColumn, DatomsTable, EmptyBecause, + FulltextColumn, QualifiedAlias, QueryValue, SourceAlias, @@ -61,12 +62,16 @@ use types::{ ValueTypeSet, }; +mod convert; // Converting args to values. mod inputs; mod or; mod not; mod pattern; mod predicate; mod resolve; + +mod ground; +mod fulltext; mod where_fn; use validate::{ @@ -335,7 +340,24 @@ impl ConjoiningClauses { impl ConjoiningClauses { /// Be careful with this. It'll overwrite existing bindings. pub fn bind_value(&mut self, var: &Variable, value: TypedValue) { - self.constrain_var_to_type(var.clone(), value.value_type()); + let vt = value.value_type(); + self.constrain_var_to_type(var.clone(), vt); + + // Are there any existing column bindings for this variable? + // If so, generate a constraint against the primary column. + if let Some(vec) = self.column_bindings.get(var) { + if let Some(col) = vec.first() { + self.wheres.add_intersection(ColumnConstraint::Equals(col.clone(), QueryValue::TypedValue(value.clone()))); + } + } + + // Are we also trying to figure out the type of the value when the query runs? + // If so, constrain that! + if let Some(qa) = self.extracted_types.get(&var) { + self.wheres.add_intersection(ColumnConstraint::HasType(qa.0.clone(), vt)); + } + + // Finally, store the binding for future use. self.value_bindings.insert(var.clone(), value); } @@ -377,6 +399,13 @@ impl ConjoiningClauses { self.constrain_column_to_constant(table, column, bound_val); }, + Column::Fulltext(FulltextColumn::Rowid) | + Column::Fulltext(FulltextColumn::Text) => { + // We never expose `rowid` via queries. We do expose `text`, but only + // indirectly, by joining against `datoms`. Therefore, these are meaningless. + unimplemented!() + }, + Column::Fixed(DatomsColumn::ValueTypeTag) => { // I'm pretty sure this is meaningless right now, because we will never bind // a type tag to a variable -- there's no syntax for doing so. @@ -450,8 +479,14 @@ impl ConjoiningClauses { } pub fn constrain_column_to_constant>(&mut self, table: TableAlias, column: C, constant: TypedValue) { - let column = column.into(); - self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) + match constant { + // Be a little more explicit. + TypedValue::Ref(entid) => self.constrain_column_to_entity(table, column, entid), + _ => { + let column = column.into(); + self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant))) + }, + } } pub fn constrain_column_to_entity>(&mut self, table: TableAlias, column: C, entity: Entid) { diff --git a/query-algebrizer/src/clauses/where_fn.rs b/query-algebrizer/src/clauses/where_fn.rs index 40c047fb..5a6aca5e 100644 --- a/query-algebrizer/src/clauses/where_fn.rs +++ b/query-algebrizer/src/clauses/where_fn.rs @@ -8,188 +8,23 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::rc::Rc; - use mentat_core::{ Schema, - SQLValueType, - TypedValue, - ValueType, }; use mentat_query::{ - Binding, - FnArg, - NonIntegerConstant, - Variable, - VariableOrPlaceholder, WhereFn, }; use clauses::{ ConjoiningClauses, - PushComputed, }; use errors::{ - BindingError, ErrorKind, Result, }; -use super::QualifiedAlias; - -use types::{ - ColumnConstraint, - ComputedTable, - EmptyBecause, - SourceAlias, - ValueTypeSet, - VariableColumn, -}; - -macro_rules! coerce_to_typed_value { - ($var: ident, $val: ident, $types: expr, $type: path, $constructor: path) => { { - Ok(if !$types.contains($type) { - Impossible(EmptyBecause::TypeMismatch { - var: $var.clone(), - existing: $types, - desired: ValueTypeSet::of_one($type), - }) - } else { - Val($constructor($val).into()) - }) - } } -} - -enum ValueConversion { - Val(TypedValue), - Impossible(EmptyBecause), -} - -/// Conversion of FnArgs to TypedValues. -impl ConjoiningClauses { - /// Convert the provided `FnArg` to a `TypedValue`. - /// The conversion depends on, and can fail because of: - /// - Existing known types of a variable to which this arg will be bound. - /// - Existing bindings of a variable `FnArg`. - fn typed_value_from_arg<'s>(&self, schema: &'s Schema, var: &Variable, arg: FnArg, known_types: ValueTypeSet) -> Result { - use self::ValueConversion::*; - if known_types.is_empty() { - // If this happens, it likely means the pattern has already failed! - return Ok(Impossible(EmptyBecause::TypeMismatch { - var: var.clone(), - existing: known_types, - desired: ValueTypeSet::any(), - })); - } - - match arg { - // Longs are potentially ambiguous: they might be longs or entids. - FnArg::EntidOrInteger(x) => { - match (ValueType::Ref.accommodates_integer(x), - known_types.contains(ValueType::Ref), - known_types.contains(ValueType::Long)) { - (true, true, true) => { - // Ambiguous: this arg could be an entid or a long. - // We default to long. - Ok(Val(TypedValue::Long(x))) - }, - (true, true, false) => { - // This can only be a ref. - Ok(Val(TypedValue::Ref(x))) - }, - (_, false, true) => { - // This can only be a long. - Ok(Val(TypedValue::Long(x))) - }, - (false, true, _) => { - // This isn't a valid ref, but that's the type to which this must conform! - Ok(Impossible(EmptyBecause::TypeMismatch { - var: var.clone(), - existing: known_types, - desired: ValueTypeSet::of_longs(), - })) - }, - (_, false, false) => { - // Non-overlapping type sets. - Ok(Impossible(EmptyBecause::TypeMismatch { - var: var.clone(), - existing: known_types, - desired: ValueTypeSet::of_longs(), - })) - }, - } - }, - - // If you definitely want to look up an ident, do it before running the query. - FnArg::IdentOrKeyword(x) => { - match (known_types.contains(ValueType::Ref), - known_types.contains(ValueType::Keyword)) { - (true, true) => { - // Ambiguous: this could be a keyword or an ident. - // Default to keyword. - Ok(Val(TypedValue::Keyword(Rc::new(x)))) - }, - (true, false) => { - // This can only be an ident. Look it up! - match schema.get_entid(&x).map(TypedValue::Ref) { - Some(e) => Ok(Val(e)), - None => Ok(Impossible(EmptyBecause::UnresolvedIdent(x.clone()))), - } - }, - (false, true) => { - Ok(Val(TypedValue::Keyword(Rc::new(x)))) - }, - (false, false) => { - Ok(Impossible(EmptyBecause::TypeMismatch { - var: var.clone(), - existing: known_types, - desired: ValueTypeSet::of_keywords(), - })) - }, - } - }, - - FnArg::Variable(in_var) => { - // TODO: technically you could ground an existing variable inside the query…. - if !self.input_variables.contains(&in_var) { - bail!(ErrorKind::UnboundVariable((*in_var.0).clone())); - } - match self.bound_value(&in_var) { - // The type is already known if it's a bound variable…. - Some(ref in_value) => Ok(Val(in_value.clone())), - None => bail!(ErrorKind::UnboundVariable((*in_var.0).clone())), - } - }, - - // This isn't implemented yet. - FnArg::Constant(NonIntegerConstant::BigInteger(_)) => unimplemented!(), - - // These don't make sense here. - FnArg::Vector(_) | - FnArg::SrcVar(_) => bail!(ErrorKind::InvalidGroundConstant), - - // These are all straightforward. - FnArg::Constant(NonIntegerConstant::Boolean(x)) => { - coerce_to_typed_value!(var, x, known_types, ValueType::Boolean, TypedValue::Boolean) - }, - FnArg::Constant(NonIntegerConstant::Instant(x)) => { - coerce_to_typed_value!(var, x, known_types, ValueType::Instant, TypedValue::Instant) - }, - FnArg::Constant(NonIntegerConstant::Uuid(x)) => { - coerce_to_typed_value!(var, x, known_types, ValueType::Uuid, TypedValue::Uuid) - }, - FnArg::Constant(NonIntegerConstant::Float(x)) => { - coerce_to_typed_value!(var, x, known_types, ValueType::Double, TypedValue::Double) - }, - FnArg::Constant(NonIntegerConstant::Text(x)) => { - coerce_to_typed_value!(var, x, known_types, ValueType::String, TypedValue::String) - }, - } - } -} - /// Application of `where` functions. impl ConjoiningClauses { /// There are several kinds of functions binding variables in our Datalog: @@ -202,369 +37,9 @@ impl ConjoiningClauses { // Because we'll be growing the set of built-in functions, handling each differently, and // ultimately allowing user-specified functions, we match on the function name first. match where_fn.operator.0.as_str() { + "fulltext" => self.apply_fulltext(schema, where_fn), "ground" => self.apply_ground(schema, where_fn), _ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())), } } - - fn apply_ground_place<'s>(&mut self, schema: &'s Schema, var: VariableOrPlaceholder, arg: FnArg) -> Result<()> { - match var { - VariableOrPlaceholder::Placeholder => Ok(()), - VariableOrPlaceholder::Variable(var) => self.apply_ground_var(schema, var, arg), - } - } - - /// Constrain the CC to associate the given var with the given ground argument. - /// Marks known-empty on failure. - fn apply_ground_var<'s>(&mut self, schema: &'s Schema, var: Variable, arg: FnArg) -> Result<()> { - let known_types = self.known_type_set(&var); - match self.typed_value_from_arg(schema, &var, arg, known_types)? { - ValueConversion::Val(value) => self.apply_ground_value(var, value), - ValueConversion::Impossible(because) => { - self.mark_known_empty(because); - Ok(()) - }, - } - } - - /// Marks known-empty on failure. - fn apply_ground_value(&mut self, var: Variable, value: TypedValue) -> Result<()> { - if let Some(existing) = self.bound_value(&var) { - if existing != value { - self.mark_known_empty(EmptyBecause::ConflictingBindings { - var: var.clone(), - existing: existing.clone(), - desired: value, - }); - return Ok(()) - } - } else { - self.bind_value(&var, value.clone()); - } - - let vt = value.value_type(); - - // Check to see whether this variable is already associated to a column. - // If so, we want to add an equality filter (or, in the future, redo the existing patterns). - if let Some(QualifiedAlias(table, column)) = self.column_bindings - .get(&var) - .and_then(|vec| vec.get(0).cloned()) { - self.constrain_column_to_constant(table, column, value); - } - - // Are we also trying to figure out the type of the value when the query runs? - // If so, constrain that! - if let Some(table) = self.extracted_types.get(&var) - .map(|qa| qa.0.clone()) { - self.wheres.add_intersection(ColumnConstraint::HasType(table, vt)); - } - - Ok(()) - } - - /// Take a relation: a matrix of values which will successively bind to named variables of - /// the provided types. - /// Construct a computed table to yield this relation. - /// This function will panic if some invariants are not met. - fn collect_named_bindings<'s>(&mut self, schema: &'s Schema, names: Vec, types: Vec, values: Vec) { - if values.is_empty() { - return; - } - - assert!(!names.is_empty()); - assert_eq!(names.len(), types.len()); - assert!(values.len() >= names.len()); - assert_eq!(values.len() % names.len(), 0); // It's an exact multiple. - - let named_values = ComputedTable::NamedValues { - names: names.clone(), - values: values, - }; - - let table = self.computed_tables.push_computed(named_values); - let alias = self.next_alias_for_table(table); - - // Stitch the computed table into column_bindings, so we get cross-linking. - for (name, ty) in names.iter().zip(types.into_iter()) { - self.constrain_var_to_type(name.clone(), ty); - self.bind_column_to_var(schema, alias.clone(), VariableColumn::Variable(name.clone()), name.clone()); - } - - self.from.push(SourceAlias(table, alias)); - } - - pub fn apply_ground<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> { - if where_fn.args.len() != 1 { - bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 1)); - } - - let mut args = where_fn.args.into_iter(); - - if where_fn.binding.is_empty() { - // The binding must introduce at least one bound variable. - bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::NoBoundVariable)); - } - - if !where_fn.binding.is_valid() { - // The binding must not duplicate bound variables. - bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::RepeatedBoundVariable)); - } - - // Scalar and tuple bindings are a little special: because there's only one value, - // we can immediately substitute the value as a known value in the CC, additionally - // generating a WHERE clause if columns have already been bound. - match (where_fn.binding, args.next().unwrap()) { - (Binding::BindScalar(var), constant) => - self.apply_ground_var(schema, var, constant), - - (Binding::BindTuple(places), FnArg::Vector(children)) => { - // Just the same, but we bind more than one column at a time. - if children.len() != places.len() { - // Number of arguments don't match the number of values. TODO: better error message. - bail!(ErrorKind::GroundBindingsMismatch); - } - for (place, arg) in places.into_iter().zip(children.into_iter()) { - self.apply_ground_place(schema, place, arg)? // TODO: short-circuit on impossible. - } - Ok(()) - }, - - // Collection bindings and rel bindings are similar in that they are both - // implemented as a subquery with a projection list and a set of values. - // The difference is that BindColl has only a single variable, and its values - // are all in a single structure. That makes it substantially simpler! - (Binding::BindColl(var), FnArg::Vector(children)) => { - if children.is_empty() { - bail!(ErrorKind::InvalidGroundConstant); - } - - // Turn a collection of arguments into a Vec of `TypedValue`s of the same type. - let known_types = self.known_type_set(&var); - // Check that every value has the same type. - let mut accumulated_types = ValueTypeSet::none(); - let mut skip: Option = None; - let values = children.into_iter() - .filter_map(|arg| -> Option> { - // We need to get conversion errors out. - // We also want to mark known-empty on impossibilty, but - // still detect serious errors. - match self.typed_value_from_arg(schema, &var, arg, known_types) { - Ok(ValueConversion::Val(tv)) => { - if accumulated_types.insert(tv.value_type()) && - !accumulated_types.is_unit() { - // Values not all of the same type. - Some(Err(ErrorKind::InvalidGroundConstant.into())) - } else { - Some(Ok(tv)) - } - }, - Ok(ValueConversion::Impossible(because)) => { - // Skip this value. - skip = Some(because); - None - }, - Err(e) => Some(Err(e.into())), - } - }) - .collect::>>()?; - - if values.is_empty() { - let because = skip.expect("we skipped all rows for a reason"); - self.mark_known_empty(because); - return Ok(()); - } - - // Otherwise, we now have the values and the type. - let types = vec![accumulated_types.exemplar().unwrap()]; - let names = vec![var.clone()]; - - self.collect_named_bindings(schema, names, types, values); - Ok(()) - }, - - (Binding::BindRel(places), FnArg::Vector(rows)) => { - if rows.is_empty() { - bail!(ErrorKind::InvalidGroundConstant); - } - - // Grab the known types to which these args must conform, and track - // the places that won't be bound in the output. - let template: Vec> = - places.iter() - .map(|x| match x { - &VariableOrPlaceholder::Placeholder => None, - &VariableOrPlaceholder::Variable(ref v) => Some((v.clone(), self.known_type_set(v))), - }) - .collect(); - - // The expected 'width' of the matrix is the number of named variables. - let full_width = places.len(); - let names: Vec = places.into_iter().filter_map(|x| x.into_var()).collect(); - let expected_width = names.len(); - let expected_rows = rows.len(); - - if expected_width == 0 { - // They can't all be placeholders. - bail!(ErrorKind::InvalidGroundConstant); - } - - // Accumulate values into `matrix` and types into `a_t_f_c`. - // This representation of a rectangular matrix is more efficient than one composed - // of N separate vectors. - let mut matrix = Vec::with_capacity(expected_width * expected_rows); - let mut accumulated_types_for_columns = vec![ValueTypeSet::none(); expected_width]; - - // Loop so we can bail out. - let mut skipped_all: Option = None; - for row in rows.into_iter() { - match row { - FnArg::Vector(cols) => { - // Make sure that every row is the same length. - if cols.len() != full_width { - bail!(ErrorKind::InvalidGroundConstant); - } - - // TODO: don't accumulate twice. - let mut vals = Vec::with_capacity(expected_width); - let mut skip: Option = None; - for (col, pair) in cols.into_iter().zip(template.iter()) { - // Now we have (val, Option<(name, known_types)>). Silly, - // but this is how we iter! - // Convert each item in the row. - // If any value in the row is impossible, then skip the row. - // If all rows are impossible, fail the entire CC. - if let &Some(ref pair) = pair { - match self.typed_value_from_arg(schema, &pair.0, col, pair.1)? { - ValueConversion::Val(tv) => vals.push(tv), - ValueConversion::Impossible(because) => { - // Skip this row. It cannot produce bindings. - skip = Some(because); - break; - }, - } - } - } - - if skip.is_some() { - // Skip this row and record why, in case we skip all. - skipped_all = skip; - continue; - } - - // Accumulate the values into the matrix and the types into the type set. - for (val, acc) in vals.into_iter().zip(accumulated_types_for_columns.iter_mut()) { - let inserted = acc.insert(val.value_type()); - if inserted && !acc.is_unit() { - // Heterogeneous types. - bail!(ErrorKind::InvalidGroundConstant); - } - matrix.push(val); - } - - }, - _ => bail!(ErrorKind::InvalidGroundConstant), - } - } - - // Do we have rows? If not, the CC cannot succeed. - if matrix.is_empty() { - // We will either have bailed or will have accumulated *something* into the matrix, - // so we can safely unwrap here. - self.mark_known_empty(skipped_all.expect("we skipped for a reason")); - return Ok(()); - } - - // Take the single type from each set. We know there's only one: we got at least one - // type, 'cos we bailed out for zero rows, and we also bailed out each time we - // inserted a second type. - // By restricting to homogeneous columns, we greatly simplify projection. In the - // future, we could loosen this restriction, at the cost of projecting (some) value - // type tags. If and when we want to algebrize in two phases and allow for - // late-binding input variables, we'll probably be able to loosen this restriction - // with little penalty. - let types = accumulated_types_for_columns.into_iter() - .map(|x| x.exemplar().unwrap()) - .collect(); - self.collect_named_bindings(schema, names, types, matrix); - Ok(()) - }, - (_, _) => bail!(ErrorKind::InvalidGroundConstant), - } - } -} - -#[cfg(test)] -mod testing { - use super::*; - - use mentat_core::{ - Attribute, - ValueType, - }; - - use mentat_query::{ - Binding, - FnArg, - NamespacedKeyword, - PlainSymbol, - Variable, - }; - - use clauses::{ - add_attribute, - associate_ident, - }; - - use types::{ - ValueTypeSet, - }; - - #[test] - fn test_apply_ground() { - let vz = Variable::from_valid_name("?z"); - - let mut cc = ConjoiningClauses::default(); - let mut schema = Schema::default(); - - associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); - add_attribute(&mut schema, 100, Attribute { - value_type: ValueType::String, - index: true, - fulltext: true, - ..Default::default() - }); - - // It's awkward enough to write these expansions that we give the details for the simplest - // case only. See the tests of the translator for more extensive (albeit looser) coverage. - let op = PlainSymbol::new("ground"); - cc.apply_ground(&schema, WhereFn { - operator: op, - args: vec![ - FnArg::EntidOrInteger(10), - ], - binding: Binding::BindScalar(vz.clone()), - }).expect("to be able to apply_ground"); - - assert!(!cc.is_known_empty()); - - // Finally, expand column bindings. - cc.expand_column_bindings(); - assert!(!cc.is_known_empty()); - - let clauses = cc.wheres; - assert_eq!(clauses.len(), 0); - - let column_bindings = cc.column_bindings; - assert_eq!(column_bindings.len(), 0); // Scalar doesn't need this. - - let known_types = cc.known_types; - assert_eq!(known_types.len(), 1); - assert_eq!(known_types.get(&vz).expect("to know the type of ?z"), - &ValueTypeSet::of_one(ValueType::Long)); - - let value_bindings = cc.value_bindings; - assert_eq!(value_bindings.len(), 1); - assert_eq!(value_bindings.get(&vz).expect("to have a value for ?z"), - &TypedValue::Long(10)); // We default to Long instead of entid. - } } diff --git a/query-algebrizer/src/errors.rs b/query-algebrizer/src/errors.rs index 852e07fc..65dfcee6 100644 --- a/query-algebrizer/src/errors.rs +++ b/query-algebrizer/src/errors.rs @@ -21,7 +21,16 @@ use self::mentat_query::{ #[derive(Clone, Debug, Eq, PartialEq)] pub enum BindingError { NoBoundVariable, + UnexpectedBinding, RepeatedBoundVariable, // TODO: include repeated variable(s). + + /// Expected `[[?x ?y]]` but got some other type of binding. Mentat is deliberately more strict + /// than Datomic: we won't try to make sense of non-obvious (and potentially erroneous) bindings. + ExpectedBindRel, + + /// Expected `[?x1 … ?xN]` or `[[?x1 … ?xN]]` but got some other number of bindings. Mentat is + /// deliberately more strict than Datomic: we prefer placeholders to omission. + InvalidNumberOfBindings { number: usize, expected: usize }, } error_chain! { diff --git a/query-algebrizer/src/lib.rs b/query-algebrizer/src/lib.rs index 9bf9b8d1..950c488a 100644 --- a/query-algebrizer/src/lib.rs +++ b/query-algebrizer/src/lib.rs @@ -217,6 +217,7 @@ pub use types::{ ComputedTable, DatomsColumn, DatomsTable, + FulltextColumn, OrderBy, QualifiedAlias, QueryValue, diff --git a/query-algebrizer/src/types.rs b/query-algebrizer/src/types.rs index ab7222bb..e9f11e7d 100644 --- a/query-algebrizer/src/types.rs +++ b/query-algebrizer/src/types.rs @@ -85,6 +85,13 @@ pub enum DatomsColumn { ValueTypeTag, } +/// One of the named columns of our fulltext values table. +#[derive(PartialEq, Eq, Clone)] +pub enum FulltextColumn { + Rowid, + Text, +} + #[derive(PartialEq, Eq, Clone)] pub enum VariableColumn { Variable(Variable), @@ -94,6 +101,7 @@ pub enum VariableColumn { #[derive(PartialEq, Eq, Clone)] pub enum Column { Fixed(DatomsColumn), + Fulltext(FulltextColumn), Variable(VariableColumn), } @@ -157,11 +165,34 @@ impl Debug for Column { fn fmt(&self, f: &mut Formatter) -> Result { match self { &Column::Fixed(ref c) => c.fmt(f), + &Column::Fulltext(ref c) => c.fmt(f), &Column::Variable(ref v) => v.fmt(f), } } } +impl FulltextColumn { + pub fn as_str(&self) -> &'static str { + use self::FulltextColumn::*; + match *self { + Rowid => "rowid", + Text => "text", + } + } +} + +impl ColumnName for FulltextColumn { + fn column_name(&self) -> String { + self.as_str().to_string() + } +} + +impl Debug for FulltextColumn { + fn fmt(&self, f: &mut Formatter) -> Result { + write!(f, "{}", self.as_str()) + } +} + /// A specific instance of a table within a query. E.g., "datoms123". pub type TableAlias = String; @@ -301,6 +332,9 @@ pub enum ColumnConstraint { }, HasType(TableAlias, ValueType), NotExists(ComputedTable), + // TODO: Merge this with NumericInequality? I expect the fine-grained information to be + // valuable when optimizing. + Matches(QualifiedAlias, QueryValue), } #[derive(PartialEq, Eq, Debug)] @@ -411,6 +445,10 @@ impl Debug for ColumnConstraint { write!(f, "{:?} {:?} {:?}", left, operator, right) }, + &Matches(ref qa, ref thing) => { + write!(f, "{:?} MATCHES {:?}", qa, thing) + }, + &HasType(ref qa, value_type) => { write!(f, "{:?}.value_type_tag = {:?}", qa, value_type) }, @@ -426,6 +464,7 @@ pub enum EmptyBecause { ConflictingBindings { var: Variable, existing: TypedValue, desired: TypedValue }, TypeMismatch { var: Variable, existing: ValueTypeSet, desired: ValueTypeSet }, NoValidTypes(Variable), + NonAttributeArgument, NonNumericArgument, NonStringFulltextValue, UnresolvedIdent(NamespacedKeyword), @@ -451,6 +490,9 @@ impl Debug for EmptyBecause { &NoValidTypes(ref var) => { write!(f, "Type mismatch: {:?} has no valid types", var) }, + &NonAttributeArgument => { + write!(f, "Non-attribute argument in attribute place") + }, &NonNumericArgument => { write!(f, "Non-numeric argument in numeric place") }, @@ -582,3 +624,28 @@ impl ValueTypeSet { self.0.len() == 1 } } + +impl IntoIterator for ValueTypeSet { + type Item = ValueType; + type IntoIter = ::enum_set::Iter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl ::std::iter::FromIterator for ValueTypeSet { + fn from_iter>(iterator: I) -> Self { + let mut ret = Self::none(); + ret.0.extend(iterator); + ret + } +} + +impl ::std::iter::Extend for ValueTypeSet { + fn extend>(&mut self, iter: I) { + for element in iter { + self.0.insert(element); + } + } +} diff --git a/query-algebrizer/tests/fulltext.rs b/query-algebrizer/tests/fulltext.rs new file mode 100644 index 00000000..5b1cb1d7 --- /dev/null +++ b/query-algebrizer/tests/fulltext.rs @@ -0,0 +1,102 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate mentat_core; +extern crate mentat_query; +extern crate mentat_query_algebrizer; +extern crate mentat_query_parser; + +use mentat_core::{ + Attribute, + Entid, + Schema, + ValueType, +}; + +use mentat_query_parser::{ + parse_find_string, +}; + +use mentat_query::{ + NamespacedKeyword, +}; + +use mentat_query_algebrizer::{ + ConjoiningClauses, + algebrize, +}; + + +// These are helpers that tests use to build Schema instances. +fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) { + schema.entid_map.insert(e, i.clone()); + schema.ident_map.insert(i.clone(), e); +} + +fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) { + schema.schema_map.insert(e, a); +} + +fn prepopulated_schema() -> Schema { + let mut schema = Schema::default(); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "description"), 66); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "parent"), 67); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69); + add_attribute(&mut schema, 65, Attribute { + value_type: ValueType::String, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 66, Attribute { + value_type: ValueType::String, + fulltext: true, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 67, Attribute { + value_type: ValueType::String, + multival: true, + ..Default::default() + }); + add_attribute(&mut schema, 68, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + add_attribute(&mut schema, 69, Attribute { + value_type: ValueType::Long, + multival: false, + ..Default::default() + }); + schema +} + +fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { + let parsed = parse_find_string(input).expect("query input to have parsed"); + algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc +} + +#[test] +fn test_apply_fulltext() { + let schema = prepopulated_schema(); + + // If you use a non-FTS attribute, we will short-circuit. + let query = r#"[:find ?val + :where [(fulltext $ :foo/name "hello") [[?entity ?val _ _]]]]"#; + assert!(alg(&schema, query).is_known_empty()); + + // If you get a type mismatch, we will short-circuit. + let query = r#"[:find ?val + :where [(fulltext $ :foo/description "hello") [[?entity ?val ?tx ?score]]] + [?score :foo/bar _]]"#; + assert!(alg(&schema, query).is_known_empty()); +} diff --git a/query-algebrizer/tests/ground.rs b/query-algebrizer/tests/ground.rs index 0a2bdf69..b7f811db 100644 --- a/query-algebrizer/tests/ground.rs +++ b/query-algebrizer/tests/ground.rs @@ -13,6 +13,8 @@ extern crate mentat_query; extern crate mentat_query_algebrizer; extern crate mentat_query_parser; +use std::collections::BTreeMap; + use mentat_core::{ Attribute, Entid, @@ -37,7 +39,9 @@ use mentat_query_algebrizer::{ ComputedTable, Error, ErrorKind, + QueryInputs, algebrize, + algebrize_with_inputs, }; // These are helpers that tests use to build Schema instances. @@ -92,6 +96,11 @@ fn bails(schema: &Schema, input: &str) -> Error { algebrize(schema.into(), parsed).expect_err("algebrize to have failed") } +fn bails_with_inputs(schema: &Schema, input: &str, inputs: QueryInputs) -> Error { + let parsed = parse_find_string(input).expect("query input to have parsed"); + algebrize_with_inputs(schema, parsed, 0, inputs).expect_err("algebrize to have failed") +} + fn alg(schema: &Schema, input: &str) -> ConjoiningClauses { let parsed = parse_find_string(input).expect("query input to have parsed"); algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc @@ -313,3 +322,26 @@ fn test_ground_nonexistent_variable_invalid() { }, } } + +#[test] +fn test_unbound_input_variable_invalid() { + let schema = prepopulated_schema(); + let q = r#"[:find ?y ?age :in ?x :where [(ground [?x]) [?y ...]] [?y :foo/age ?age]]"#; + + // This fails even if we know the type: we don't support grounding bindings + // that aren't known at algebrizing time. + let mut types = BTreeMap::default(); + types.insert(Variable::from_valid_name("?x"), ValueType::Ref); + + let i = QueryInputs::new(types, BTreeMap::default()).expect("valid QueryInputs"); + + let e = bails_with_inputs(&schema, &q, i); + match e { + Error(ErrorKind::UnboundVariable(v), _) => { + assert_eq!(v.0, "?x"); + }, + _ => { + panic!(); + }, + } +} diff --git a/query-sql/src/lib.rs b/query-sql/src/lib.rs index 470a8024..f6edbfa8 100644 --- a/query-sql/src/lib.rs +++ b/query-sql/src/lib.rs @@ -124,6 +124,14 @@ impl Constraint { right: right, } } + + pub fn fulltext_match(left: ColumnOrExpression, right: ColumnOrExpression) -> Constraint { + Constraint::Infix { + op: Op("MATCH"), // SQLite specific! + left: left, + right: right, + } + } } #[allow(dead_code)] @@ -198,6 +206,10 @@ fn push_column(qb: &mut QueryBuilder, col: &Column) -> BuildQueryResult { qb.push_sql(d.as_str()); Ok(()) }, + &Column::Fulltext(ref d) => { + qb.push_sql(d.as_str()); + Ok(()) + }, &Column::Variable(ref vc) => push_variable_column(qb, vc), } } @@ -555,22 +567,30 @@ impl SelectQuery { #[cfg(test)] mod tests { use super::*; + use std::rc::Rc; + use mentat_query_algebrizer::{ + Column, DatomsColumn, DatomsTable, + FulltextColumn, }; - fn build(c: &QueryFragment) -> String { + fn build_query(c: &QueryFragment) -> SQLQuery { let mut builder = SQLiteQueryBuilder::new(); c.push_sql(&mut builder) .map(|_| builder.finish()) - .unwrap().sql + .expect("to produce a query for the given constraint") + } + + fn build(c: &QueryFragment) -> String { + build_query(c).sql } #[test] fn test_in_constraint() { let none = Constraint::In { - left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), DatomsColumn::Value)), + left: ColumnOrExpression::Column(QualifiedAlias::new("datoms01".to_string(), Column::Fixed(DatomsColumn::Value))), list: vec![], }; @@ -651,6 +671,25 @@ mod tests { "SELECT 0 AS `?a`, 0 AS `?b` WHERE 0 UNION ALL VALUES (0, 1), (1, 2)"); } + #[test] + fn test_matches_constraint() { + let c = Constraint::Infix { + op: Op("MATCHES"), + left: ColumnOrExpression::Column(QualifiedAlias("fulltext01".to_string(), Column::Fulltext(FulltextColumn::Text))), + right: ColumnOrExpression::Value(TypedValue::String(Rc::new("needle".to_string()))), + }; + let q = build_query(&c); + assert_eq!("`fulltext01`.text MATCHES $v0", q.sql); + assert_eq!(vec![("$v0".to_string(), Rc::new(mentat_sql::Value::Text("needle".to_string())))], q.args); + + let c = Constraint::Infix { + op: Op("="), + left: ColumnOrExpression::Column(QualifiedAlias("fulltext01".to_string(), Column::Fulltext(FulltextColumn::Rowid))), + right: ColumnOrExpression::Column(QualifiedAlias("datoms02".to_string(), Column::Fixed(DatomsColumn::Value))), + }; + assert_eq!("`fulltext01`.rowid = `datoms02`.v", build(&c)); + } + #[test] fn test_end_to_end() { // [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]] diff --git a/query-translator/src/translate.rs b/query-translator/src/translate.rs index 0928aa90..af3ff6b5 100644 --- a/query-translator/src/translate.rs +++ b/query-translator/src/translate.rs @@ -148,6 +148,14 @@ impl ToConstraint for ColumnConstraint { } }, + Matches(left, right) => { + Constraint::Infix { + op: Op("MATCH"), + left: ColumnOrExpression::Column(left), + right: right.into(), + } + }, + HasType(table, value_type) => { let column = QualifiedAlias::new(table, DatomsColumn::ValueTypeTag).to_column(); Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag())) diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 5b3b0c1f..66face17 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -15,6 +15,8 @@ extern crate mentat_query_parser; extern crate mentat_query_translator; extern crate mentat_sql; +use std::collections::BTreeMap; + use std::rc::Rc; use mentat_query::{ @@ -69,6 +71,13 @@ fn prepopulated_typed_schema(foo_type: ValueType) -> Schema { value_type: foo_type, ..Default::default() }); + associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100); + add_attribute(&mut schema, 100, Attribute { + value_type: ValueType::String, + index: true, + fulltext: true, + ..Default::default() + }); schema } @@ -226,7 +235,7 @@ fn test_unknown_attribute_double_value() { // In general, doubles _could_ be 1.0, which might match a boolean or a ref. Set tag = 5 to // make sure we only match numbers. - assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95 AND `datoms00`.value_type_tag = 5"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.v = 9.95e0 AND `datoms00`.value_type_tag = 5"); assert_eq!(args, vec![]); } @@ -295,7 +304,7 @@ fn test_numeric_gte_known_attribute() { let schema = prepopulated_typed_schema(ValueType::Double); let query = r#"[:find ?x :where [?x :foo/bar ?y] [(>= ?y 12.9)]]"#; let SQLQuery { sql, args } = translate(&schema, query); - assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 12.9"); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v >= 1.29e1"); assert_eq!(args, vec![]); } @@ -727,3 +736,155 @@ fn test_not_with_ground() { (SELECT 1 FROM (SELECT 0 AS `?v` WHERE 0 UNION ALL VALUES (28), (29)) AS `c00` \ WHERE `datoms00`.v = `c00`.`?v`)"); } + +#[test] +fn test_fulltext() { + let schema = prepopulated_typed_schema(ValueType::Double); + + let query = r#"[:find ?entity ?value ?tx ?score :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \ + `fulltext_values00`.text AS `?value`, \ + `datoms01`.tx AS `?tx`, \ + 0e0 AS `?score` \ + FROM `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0"); + assert_eq!(args, vec![make_arg("$v0", "needle"),]); + + let query = r#"[:find ?entity ?value ?tx :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + // Observe that the computed table isn't dropped, even though `?score` isn't bound in the final conjoining clause. + assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \ + `fulltext_values00`.text AS `?value`, \ + `datoms01`.tx AS `?tx` \ + FROM `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0"); + assert_eq!(args, vec![make_arg("$v0", "needle"),]); + + let query = r#"[:find ?entity ?value ?tx :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx _]]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + // Observe that the computed table isn't included at all when `?score` isn't bound. + assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \ + `fulltext_values00`.text AS `?value`, \ + `datoms01`.tx AS `?tx` \ + FROM `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0"); + assert_eq!(args, vec![make_arg("$v0", "needle"),]); + + let query = r#"[:find ?entity ?value ?tx :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]] [?entity :foo/bar ?score]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \ + `fulltext_values00`.text AS `?value`, \ + `datoms01`.tx AS `?tx` \ + FROM `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01`, \ + `datoms` AS `datoms02` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0 \ + AND `datoms02`.a = 99 \ + AND `datoms02`.v = 0e0 \ + AND `datoms01`.e = `datoms02`.e"); + assert_eq!(args, vec![make_arg("$v0", "needle"),]); + + let query = r#"[:find ?entity ?value ?tx :where [?entity :foo/bar ?score] [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]]]"#; + let SQLQuery { sql, args } = translate(&schema, query); + assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?entity`, \ + `fulltext_values01`.text AS `?value`, \ + `datoms02`.tx AS `?tx` \ + FROM `datoms` AS `datoms00`, \ + `fulltext_values` AS `fulltext_values01`, \ + `datoms` AS `datoms02` \ + WHERE `datoms00`.a = 99 \ + AND `datoms02`.a = 100 \ + AND `datoms02`.v = `fulltext_values01`.rowid \ + AND `fulltext_values01`.text MATCH $v0 \ + AND `datoms00`.v = 0e0 \ + AND `datoms00`.e = `datoms02`.e"); + assert_eq!(args, vec![make_arg("$v0", "needle"),]); +} + +#[test] +fn test_fulltext_inputs() { + let schema = prepopulated_typed_schema(ValueType::String); + + // Bind ?entity. We expect the output to collide. + let query = r#"[:find ?val + :in ?entity + :where [(fulltext $ :foo/fts "hello") [[?entity ?val _ _]]]]"#; + let mut types = BTreeMap::default(); + types.insert(Variable::from_valid_name("?entity"), ValueType::Ref); + let inputs = QueryInputs::new(types, BTreeMap::default()).expect("valid inputs"); + + // Without binding the value. q_once will err if you try this! + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + assert_eq!(sql, "SELECT DISTINCT `fulltext_values00`.text AS `?val` \ + FROM \ + `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0"); + assert_eq!(args, vec![make_arg("$v0", "hello"),]); + + // With the value bound. + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?entity"), TypedValue::Ref(111))]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + assert_eq!(sql, "SELECT DISTINCT `fulltext_values00`.text AS `?val` \ + FROM \ + `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0 \ + AND `datoms01`.e = 111"); + assert_eq!(args, vec![make_arg("$v0", "hello"),]); + + // Same again, but retrieving the entity. + let query = r#"[:find ?entity . + :in ?entity + :where [(fulltext $ :foo/fts "hello") [[?entity _ _]]]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?entity"), TypedValue::Ref(111))]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + assert_eq!(sql, "SELECT 111 AS `?entity` FROM \ + `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0 \ + AND `datoms01`.e = 111 \ + LIMIT 1"); + assert_eq!(args, vec![make_arg("$v0", "hello"),]); + + // A larger pattern. + let query = r#"[:find ?entity ?value ?friend + :in ?entity + :where + [(fulltext $ :foo/fts "hello") [[?entity ?value]]] + [?entity :foo/bar ?friend]]"#; + let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?entity"), TypedValue::Ref(121))]); + let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs); + assert_eq!(sql, "SELECT DISTINCT 121 AS `?entity`, \ + `fulltext_values00`.text AS `?value`, \ + `datoms02`.v AS `?friend` \ + FROM \ + `fulltext_values` AS `fulltext_values00`, \ + `datoms` AS `datoms01`, \ + `datoms` AS `datoms02` \ + WHERE `datoms01`.a = 100 \ + AND `datoms01`.v = `fulltext_values00`.rowid \ + AND `fulltext_values00`.text MATCH $v0 \ + AND `datoms01`.e = 121 \ + AND `datoms02`.e = 121 \ + AND `datoms02`.a = 99"); + assert_eq!(args, vec![make_arg("$v0", "hello"),]); +} diff --git a/sql/src/lib.rs b/sql/src/lib.rs index b7b7ab0c..84583d17 100644 --- a/sql/src/lib.rs +++ b/sql/src/lib.rs @@ -160,7 +160,14 @@ impl QueryBuilder for SQLiteQueryBuilder { &Ref(entid) => self.push_sql(entid.to_string().as_str()), &Boolean(v) => self.push_sql(if v { "1" } else { "0" }), &Long(v) => self.push_sql(v.to_string().as_str()), - &Double(OrderedFloat(v)) => self.push_sql(v.to_string().as_str()), + &Double(OrderedFloat(v)) => { + // Rust's floats print without a trailing '.' in some cases. + // https://github.com/rust-lang/rust/issues/30967 + // We format with 'e' -- scientific notation -- so that SQLite treats them as + // floats and not integers. This is most noticeable for fulltext scores, which + // will currently (2017-06) always be 0, and need to round-trip as doubles. + self.push_sql(format!("{:e}", v).as_str()); + }, &Instant(dt) => { self.push_sql(format!("{}", dt.to_micros()).as_str()); // TODO: argument instead? }, @@ -260,9 +267,13 @@ mod tests { s.push_static_arg(string_arg("frobnicate")); s.push_sql(" OR "); s.push_static_arg(string_arg("swoogle")); + s.push_sql(" OR "); + s.push_identifier("bar").unwrap(); + s.push_sql(" = "); + s.push_typed_value(&TypedValue::Double(1.0.into())).unwrap(); let q = s.finish(); - assert_eq!(q.sql.as_str(), "SELECT `foo` WHERE `bar` = $v0 OR $v1"); + assert_eq!(q.sql.as_str(), "SELECT `foo` WHERE `bar` = $v0 OR $v1 OR `bar` = 1e0"); assert_eq!(q.args, vec![("$v0".to_string(), string_arg("frobnicate")), ("$v1".to_string(), string_arg("swoogle"))]); diff --git a/tests/query.rs b/tests/query.rs index cdc88ce9..9b685de4 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -254,3 +254,40 @@ fn test_instants_and_uuids() { _ => panic!("Expected query to work."), } } + +#[test] +fn test_fulltext() { + let mut c = new_connection("").expect("Couldn't open conn."); + let mut conn = Conn::connect(&mut c).expect("Couldn't open DB."); + conn.transact(&mut c, r#"[ + [:db/add "s" :db/ident :foo/fts] + [:db/add "s" :db/valueType :db.type/string] + [:db/add "s" :db/fulltext true] + [:db/add "s" :db/cardinality :db.cardinality/many] + ]"#).unwrap(); + let v = conn.transact(&mut c, r#"[ + [:db/add "v" :foo/fts "hello darkness my old friend"] + [:db/add "v" :foo/fts "I've come to talk with you again"] + ]"#).unwrap().tempids.get("v").cloned().expect("v was mapped"); + + let r = conn.q_once(&mut c, + r#"[:find [?x ?val ?score] + :where [(fulltext $ :foo/fts "darkness") [[?x ?val _ ?score]]]]"#, None); + match r { + Result::Ok(QueryResults::Tuple(Some(vals))) => { + let mut vals = vals.into_iter(); + match (vals.next(), vals.next(), vals.next(), vals.next()) { + (Some(TypedValue::Ref(x)), + Some(TypedValue::String(text)), + Some(TypedValue::Double(score)), + None) => { + assert_eq!(x, v); + assert_eq!(text.as_str(), "hello darkness my old friend"); + assert_eq!(score, 0.0f64.into()); + }, + _ => panic!("Unexpected results."), + } + }, + _ => panic!("Expected query to work."), + } +}