Implement fulltext
. (#477) r=nalexander
* You can't use fulltext search on a non-fulltext attribute. * Allow for implicit placeholder bindings in fulltext.
This commit is contained in:
parent
565a0e9ff9
commit
3f264e9eb2
7 changed files with 654 additions and 5 deletions
348
query-algebrizer/src/clauses/fulltext.rs
Normal file
348
query-algebrizer/src/clauses/fulltext.rs
Normal file
|
@ -0,0 +1,348 @@
|
||||||
|
// Copyright 2016 Mozilla
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
|
||||||
|
// this file except in compliance with the License. You may obtain a copy of the
|
||||||
|
// License at http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed
|
||||||
|
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||||
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||||
|
// specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
use mentat_core::{
|
||||||
|
Schema,
|
||||||
|
TypedValue,
|
||||||
|
ValueType,
|
||||||
|
};
|
||||||
|
|
||||||
|
use mentat_query::{
|
||||||
|
Binding,
|
||||||
|
FnArg,
|
||||||
|
NonIntegerConstant,
|
||||||
|
SrcVar,
|
||||||
|
VariableOrPlaceholder,
|
||||||
|
WhereFn,
|
||||||
|
};
|
||||||
|
|
||||||
|
use clauses::{
|
||||||
|
ConjoiningClauses,
|
||||||
|
};
|
||||||
|
|
||||||
|
use errors::{
|
||||||
|
BindingError,
|
||||||
|
ErrorKind,
|
||||||
|
Result,
|
||||||
|
};
|
||||||
|
|
||||||
|
use types::{
|
||||||
|
Column,
|
||||||
|
ColumnConstraint,
|
||||||
|
DatomsColumn,
|
||||||
|
DatomsTable,
|
||||||
|
EmptyBecause,
|
||||||
|
FulltextColumn,
|
||||||
|
QualifiedAlias,
|
||||||
|
QueryValue,
|
||||||
|
SourceAlias,
|
||||||
|
};
|
||||||
|
|
||||||
|
impl ConjoiningClauses {
|
||||||
|
#[allow(unused_variables)]
|
||||||
|
pub fn apply_fulltext<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> {
|
||||||
|
if where_fn.args.len() != 3 {
|
||||||
|
bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
if where_fn.binding.is_empty() {
|
||||||
|
// The binding must introduce at least one bound variable.
|
||||||
|
bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::NoBoundVariable));
|
||||||
|
}
|
||||||
|
|
||||||
|
if !where_fn.binding.is_valid() {
|
||||||
|
// The binding must not duplicate bound variables.
|
||||||
|
bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::RepeatedBoundVariable));
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should have exactly four bindings. Destructure them now.
|
||||||
|
let bindings = match where_fn.binding {
|
||||||
|
Binding::BindRel(bindings) => {
|
||||||
|
let bindings_count = bindings.len();
|
||||||
|
if bindings_count < 1 || bindings_count > 4 {
|
||||||
|
bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(),
|
||||||
|
BindingError::InvalidNumberOfBindings {
|
||||||
|
number: bindings.len(),
|
||||||
|
expected: 4,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
bindings
|
||||||
|
},
|
||||||
|
Binding::BindScalar(_) |
|
||||||
|
Binding::BindTuple(_) |
|
||||||
|
Binding::BindColl(_) => bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::ExpectedBindRel)),
|
||||||
|
};
|
||||||
|
let mut bindings = bindings.into_iter();
|
||||||
|
let b_entity = bindings.next().unwrap();
|
||||||
|
let b_value = bindings.next().unwrap_or(VariableOrPlaceholder::Placeholder);
|
||||||
|
let b_tx = bindings.next().unwrap_or(VariableOrPlaceholder::Placeholder);
|
||||||
|
let b_score = bindings.next().unwrap_or(VariableOrPlaceholder::Placeholder);
|
||||||
|
|
||||||
|
let mut args = where_fn.args.into_iter();
|
||||||
|
|
||||||
|
// TODO: process source variables.
|
||||||
|
match args.next().unwrap() {
|
||||||
|
FnArg::SrcVar(SrcVar::DefaultSrc) => {},
|
||||||
|
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)),
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: accept placeholder and set of attributes. Alternately, consider putting the search
|
||||||
|
// term before the attribute arguments and collect the (variadic) attributes into a set.
|
||||||
|
// let a: Entid = self.resolve_attribute_argument(&where_fn.operator, 1, args.next().unwrap())?;
|
||||||
|
//
|
||||||
|
// TODO: improve the expression of this matching, possibly by using attribute_for_* uniformly.
|
||||||
|
let a = match args.next().unwrap() {
|
||||||
|
FnArg::IdentOrKeyword(i) => schema.get_entid(&i),
|
||||||
|
// Must be an entid.
|
||||||
|
FnArg::EntidOrInteger(e) => Some(e),
|
||||||
|
FnArg::Variable(v) => {
|
||||||
|
// If it's already bound, then let's expand the variable.
|
||||||
|
// TODO: allow non-constant attributes.
|
||||||
|
match self.bound_value(&v) {
|
||||||
|
Some(TypedValue::Ref(entid)) => Some(entid),
|
||||||
|
Some(tv) => {
|
||||||
|
bail!(ErrorKind::InputTypeDisagreement(v.name().clone(), ValueType::Ref, tv.value_type()));
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
bail!(ErrorKind::UnboundVariable((*v.0).clone()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// An unknown ident, or an entity that isn't present in the store, or isn't a fulltext
|
||||||
|
// attribute, is likely enough to be a coding error that we choose to bail instead of
|
||||||
|
// marking the pattern as known-empty.
|
||||||
|
let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?;
|
||||||
|
let attribute = schema.attribute_for_entid(a).cloned().ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?;
|
||||||
|
|
||||||
|
if !attribute.fulltext {
|
||||||
|
// We can never get results from a non-fulltext attribute!
|
||||||
|
println!("Can't run fulltext on non-fulltext attribute {}.", a);
|
||||||
|
self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(a));
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let fulltext_values_alias = self.next_alias_for_table(DatomsTable::FulltextValues);
|
||||||
|
let datoms_table_alias = self.next_alias_for_table(DatomsTable::Datoms);
|
||||||
|
|
||||||
|
// We do a fulltext lookup by joining the fulltext values table against datoms -- just
|
||||||
|
// like applying a pattern, but two tables contribute instead of one.
|
||||||
|
self.from.push(SourceAlias(DatomsTable::FulltextValues, fulltext_values_alias.clone()));
|
||||||
|
self.from.push(SourceAlias(DatomsTable::Datoms, datoms_table_alias.clone()));
|
||||||
|
|
||||||
|
// TODO: constrain the type in the more general cases (e.g., `a` is a var).
|
||||||
|
self.constrain_attribute(datoms_table_alias.clone(), a);
|
||||||
|
|
||||||
|
// Join the datoms table to the fulltext values table.
|
||||||
|
self.wheres.add_intersection(ColumnConstraint::Equals(
|
||||||
|
QualifiedAlias(datoms_table_alias.clone(), Column::Fixed(DatomsColumn::Value)),
|
||||||
|
QueryValue::Column(QualifiedAlias(fulltext_values_alias.clone(), Column::Fulltext(FulltextColumn::Rowid)))));
|
||||||
|
|
||||||
|
// `search` is either text or a variable.
|
||||||
|
// If it's simple text, great.
|
||||||
|
// If it's a variable, it'll be in one of three states:
|
||||||
|
// - It's already bound, either by input or by a previous pattern like `ground`.
|
||||||
|
// - It's not already bound, but it's a defined input of type Text. Not yet implemented: TODO.
|
||||||
|
// - It's not bound. The query cannot be algebrized.
|
||||||
|
let search: TypedValue = match args.next().unwrap() {
|
||||||
|
FnArg::Constant(NonIntegerConstant::Text(s)) => {
|
||||||
|
TypedValue::String(s)
|
||||||
|
},
|
||||||
|
FnArg::Variable(in_var) => {
|
||||||
|
match self.bound_value(&in_var) {
|
||||||
|
Some(t @ TypedValue::String(_)) => t,
|
||||||
|
Some(_) => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)),
|
||||||
|
None => {
|
||||||
|
if self.input_variables.contains(&in_var) &&
|
||||||
|
self.known_type(&in_var) == Some(ValueType::String) {
|
||||||
|
// Sorry, we haven't implemented late binding.
|
||||||
|
}
|
||||||
|
bail!(ErrorKind::UnboundVariable((*in_var.0).clone()));
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let constraint = ColumnConstraint::Matches(QualifiedAlias(fulltext_values_alias.clone(),
|
||||||
|
Column::Fulltext(FulltextColumn::Text)),
|
||||||
|
QueryValue::TypedValue(search));
|
||||||
|
self.wheres.add_intersection(constraint);
|
||||||
|
|
||||||
|
if let VariableOrPlaceholder::Variable(ref var) = b_entity {
|
||||||
|
// It must be a ref.
|
||||||
|
self.constrain_var_to_type(var.clone(), ValueType::Ref);
|
||||||
|
if self.is_known_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
self.bind_column_to_var(schema, datoms_table_alias.clone(), DatomsColumn::Entity, var.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let VariableOrPlaceholder::Variable(ref var) = b_value {
|
||||||
|
// This'll be bound to strings.
|
||||||
|
self.constrain_var_to_type(var.clone(), ValueType::String);
|
||||||
|
if self.is_known_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
self.bind_column_to_var(schema, fulltext_values_alias.clone(), Column::Fulltext(FulltextColumn::Text), var.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let VariableOrPlaceholder::Variable(ref var) = b_tx {
|
||||||
|
// Txs must be refs.
|
||||||
|
self.constrain_var_to_type(var.clone(), ValueType::Ref);
|
||||||
|
if self.is_known_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
self.bind_column_to_var(schema, datoms_table_alias.clone(), DatomsColumn::Tx, var.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let VariableOrPlaceholder::Variable(ref var) = b_score {
|
||||||
|
// Scores are doubles.
|
||||||
|
self.constrain_var_to_type(var.clone(), ValueType::Double);
|
||||||
|
|
||||||
|
// We do not allow the score to be bound.
|
||||||
|
if self.value_bindings.contains_key(var) || self.input_variables.contains(var) {
|
||||||
|
bail!(ErrorKind::InvalidBinding(var.name(), BindingError::UnexpectedBinding));
|
||||||
|
}
|
||||||
|
|
||||||
|
// We bind the value ourselves. This handily takes care of substituting into existing uses.
|
||||||
|
// TODO: produce real scores using SQLite's matchinfo.
|
||||||
|
self.bind_value(var, TypedValue::Double(0.0.into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod testing {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
|
use mentat_core::{
|
||||||
|
Attribute,
|
||||||
|
ValueType,
|
||||||
|
};
|
||||||
|
|
||||||
|
use mentat_query::{
|
||||||
|
Binding,
|
||||||
|
FnArg,
|
||||||
|
NamespacedKeyword,
|
||||||
|
PlainSymbol,
|
||||||
|
Variable,
|
||||||
|
};
|
||||||
|
|
||||||
|
use clauses::{
|
||||||
|
add_attribute,
|
||||||
|
associate_ident,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_apply_fulltext() {
|
||||||
|
let mut cc = ConjoiningClauses::default();
|
||||||
|
let mut schema = Schema::default();
|
||||||
|
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "bar"), 101);
|
||||||
|
add_attribute(&mut schema, 101, Attribute {
|
||||||
|
value_type: ValueType::String,
|
||||||
|
fulltext: false,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100);
|
||||||
|
add_attribute(&mut schema, 100, Attribute {
|
||||||
|
value_type: ValueType::String,
|
||||||
|
index: true,
|
||||||
|
fulltext: true,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
|
||||||
|
let op = PlainSymbol::new("fulltext");
|
||||||
|
cc.apply_fulltext(&schema, WhereFn {
|
||||||
|
operator: op,
|
||||||
|
args: vec![
|
||||||
|
FnArg::SrcVar(SrcVar::DefaultSrc),
|
||||||
|
FnArg::IdentOrKeyword(NamespacedKeyword::new("foo", "fts")),
|
||||||
|
FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))),
|
||||||
|
],
|
||||||
|
binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?entity")),
|
||||||
|
VariableOrPlaceholder::Variable(Variable::from_valid_name("?value")),
|
||||||
|
VariableOrPlaceholder::Variable(Variable::from_valid_name("?tx")),
|
||||||
|
VariableOrPlaceholder::Variable(Variable::from_valid_name("?score"))]),
|
||||||
|
}).expect("to be able to apply_fulltext");
|
||||||
|
|
||||||
|
assert!(!cc.is_known_empty());
|
||||||
|
|
||||||
|
// Finally, expand column bindings.
|
||||||
|
cc.expand_column_bindings();
|
||||||
|
assert!(!cc.is_known_empty());
|
||||||
|
|
||||||
|
let clauses = cc.wheres;
|
||||||
|
assert_eq!(clauses.len(), 3);
|
||||||
|
|
||||||
|
assert_eq!(clauses.0[0], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Attribute)),
|
||||||
|
QueryValue::Entid(100)).into());
|
||||||
|
assert_eq!(clauses.0[1], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Value)),
|
||||||
|
QueryValue::Column(QualifiedAlias("fulltext_values00".to_string(), Column::Fulltext(FulltextColumn::Rowid)))).into());
|
||||||
|
assert_eq!(clauses.0[2], ColumnConstraint::Matches(QualifiedAlias("fulltext_values00".to_string(), Column::Fulltext(FulltextColumn::Text)),
|
||||||
|
QueryValue::TypedValue(TypedValue::String(Rc::new("needle".into())))).into());
|
||||||
|
|
||||||
|
let bindings = cc.column_bindings;
|
||||||
|
assert_eq!(bindings.len(), 3);
|
||||||
|
|
||||||
|
assert_eq!(bindings.get(&Variable::from_valid_name("?entity")).expect("column binding for ?entity").clone(),
|
||||||
|
vec![QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Entity))]);
|
||||||
|
assert_eq!(bindings.get(&Variable::from_valid_name("?value")).expect("column binding for ?value").clone(),
|
||||||
|
vec![QualifiedAlias("fulltext_values00".to_string(), Column::Fulltext(FulltextColumn::Text))]);
|
||||||
|
assert_eq!(bindings.get(&Variable::from_valid_name("?tx")).expect("column binding for ?tx").clone(),
|
||||||
|
vec![QualifiedAlias("datoms01".to_string(), Column::Fixed(DatomsColumn::Tx))]);
|
||||||
|
|
||||||
|
// Score is a value binding.
|
||||||
|
let values = cc.value_bindings;
|
||||||
|
assert_eq!(values.get(&Variable::from_valid_name("?score")).expect("column binding for ?score").clone(),
|
||||||
|
TypedValue::Double(0.0.into()));
|
||||||
|
|
||||||
|
let known_types = cc.known_types;
|
||||||
|
assert_eq!(known_types.len(), 4);
|
||||||
|
|
||||||
|
assert_eq!(known_types.get(&Variable::from_valid_name("?entity")).expect("known types for ?entity").clone(),
|
||||||
|
vec![ValueType::Ref].into_iter().collect());
|
||||||
|
assert_eq!(known_types.get(&Variable::from_valid_name("?value")).expect("known types for ?value").clone(),
|
||||||
|
vec![ValueType::String].into_iter().collect());
|
||||||
|
assert_eq!(known_types.get(&Variable::from_valid_name("?tx")).expect("known types for ?tx").clone(),
|
||||||
|
vec![ValueType::Ref].into_iter().collect());
|
||||||
|
assert_eq!(known_types.get(&Variable::from_valid_name("?score")).expect("known types for ?score").clone(),
|
||||||
|
vec![ValueType::Double].into_iter().collect());
|
||||||
|
|
||||||
|
let mut cc = ConjoiningClauses::default();
|
||||||
|
let op = PlainSymbol::new("fulltext");
|
||||||
|
cc.apply_fulltext(&schema, WhereFn {
|
||||||
|
operator: op,
|
||||||
|
args: vec![
|
||||||
|
FnArg::SrcVar(SrcVar::DefaultSrc),
|
||||||
|
FnArg::IdentOrKeyword(NamespacedKeyword::new("foo", "bar")),
|
||||||
|
FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))),
|
||||||
|
],
|
||||||
|
binding: Binding::BindRel(vec![VariableOrPlaceholder::Variable(Variable::from_valid_name("?entity")),
|
||||||
|
VariableOrPlaceholder::Variable(Variable::from_valid_name("?value")),
|
||||||
|
VariableOrPlaceholder::Variable(Variable::from_valid_name("?tx")),
|
||||||
|
VariableOrPlaceholder::Variable(Variable::from_valid_name("?score"))]),
|
||||||
|
}).expect("to be able to apply_fulltext");
|
||||||
|
|
||||||
|
// It's not a fulltext attribute, so the CC cannot yield results.
|
||||||
|
assert!(cc.is_known_empty());
|
||||||
|
}
|
||||||
|
}
|
|
@ -71,6 +71,7 @@ mod predicate;
|
||||||
mod resolve;
|
mod resolve;
|
||||||
|
|
||||||
mod ground;
|
mod ground;
|
||||||
|
mod fulltext;
|
||||||
mod where_fn;
|
mod where_fn;
|
||||||
|
|
||||||
use validate::{
|
use validate::{
|
||||||
|
@ -352,9 +353,8 @@ impl ConjoiningClauses {
|
||||||
|
|
||||||
// Are we also trying to figure out the type of the value when the query runs?
|
// Are we also trying to figure out the type of the value when the query runs?
|
||||||
// If so, constrain that!
|
// If so, constrain that!
|
||||||
if let Some(table) = self.extracted_types.get(&var)
|
if let Some(qa) = self.extracted_types.get(&var) {
|
||||||
.map(|qa| qa.0.clone()) {
|
self.wheres.add_intersection(ColumnConstraint::HasType(qa.0.clone(), vt));
|
||||||
self.wheres.add_intersection(ColumnConstraint::HasType(table, value.value_type()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finally, store the binding for future use.
|
// Finally, store the binding for future use.
|
||||||
|
@ -479,8 +479,14 @@ impl ConjoiningClauses {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn constrain_column_to_constant<C: Into<Column>>(&mut self, table: TableAlias, column: C, constant: TypedValue) {
|
pub fn constrain_column_to_constant<C: Into<Column>>(&mut self, table: TableAlias, column: C, constant: TypedValue) {
|
||||||
|
match constant {
|
||||||
|
// Be a little more explicit.
|
||||||
|
TypedValue::Ref(entid) => self.constrain_column_to_entity(table, column, entid),
|
||||||
|
_ => {
|
||||||
let column = column.into();
|
let column = column.into();
|
||||||
self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant)))
|
self.wheres.add_intersection(ColumnConstraint::Equals(QualifiedAlias(table, column), QueryValue::TypedValue(constant)))
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn constrain_column_to_entity<C: Into<Column>>(&mut self, table: TableAlias, column: C, entity: Entid) {
|
pub fn constrain_column_to_entity<C: Into<Column>>(&mut self, table: TableAlias, column: C, entity: Entid) {
|
||||||
|
|
|
@ -37,6 +37,7 @@ impl ConjoiningClauses {
|
||||||
// Because we'll be growing the set of built-in functions, handling each differently, and
|
// Because we'll be growing the set of built-in functions, handling each differently, and
|
||||||
// ultimately allowing user-specified functions, we match on the function name first.
|
// ultimately allowing user-specified functions, we match on the function name first.
|
||||||
match where_fn.operator.0.as_str() {
|
match where_fn.operator.0.as_str() {
|
||||||
|
"fulltext" => self.apply_fulltext(schema, where_fn),
|
||||||
"ground" => self.apply_ground(schema, where_fn),
|
"ground" => self.apply_ground(schema, where_fn),
|
||||||
_ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())),
|
_ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())),
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ use self::mentat_query::{
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
pub enum BindingError {
|
pub enum BindingError {
|
||||||
NoBoundVariable,
|
NoBoundVariable,
|
||||||
|
UnexpectedBinding,
|
||||||
RepeatedBoundVariable, // TODO: include repeated variable(s).
|
RepeatedBoundVariable, // TODO: include repeated variable(s).
|
||||||
|
|
||||||
/// Expected `[[?x ?y]]` but got some other type of binding. Mentat is deliberately more strict
|
/// Expected `[[?x ?y]]` but got some other type of binding. Mentat is deliberately more strict
|
||||||
|
|
102
query-algebrizer/tests/fulltext.rs
Normal file
102
query-algebrizer/tests/fulltext.rs
Normal file
|
@ -0,0 +1,102 @@
|
||||||
|
// Copyright 2016 Mozilla
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
|
||||||
|
// this file except in compliance with the License. You may obtain a copy of the
|
||||||
|
// License at http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software distributed
|
||||||
|
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||||
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||||
|
// specific language governing permissions and limitations under the License.
|
||||||
|
|
||||||
|
extern crate mentat_core;
|
||||||
|
extern crate mentat_query;
|
||||||
|
extern crate mentat_query_algebrizer;
|
||||||
|
extern crate mentat_query_parser;
|
||||||
|
|
||||||
|
use mentat_core::{
|
||||||
|
Attribute,
|
||||||
|
Entid,
|
||||||
|
Schema,
|
||||||
|
ValueType,
|
||||||
|
};
|
||||||
|
|
||||||
|
use mentat_query_parser::{
|
||||||
|
parse_find_string,
|
||||||
|
};
|
||||||
|
|
||||||
|
use mentat_query::{
|
||||||
|
NamespacedKeyword,
|
||||||
|
};
|
||||||
|
|
||||||
|
use mentat_query_algebrizer::{
|
||||||
|
ConjoiningClauses,
|
||||||
|
algebrize,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// These are helpers that tests use to build Schema instances.
|
||||||
|
fn associate_ident(schema: &mut Schema, i: NamespacedKeyword, e: Entid) {
|
||||||
|
schema.entid_map.insert(e, i.clone());
|
||||||
|
schema.ident_map.insert(i.clone(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) {
|
||||||
|
schema.schema_map.insert(e, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prepopulated_schema() -> Schema {
|
||||||
|
let mut schema = Schema::default();
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "name"), 65);
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "description"), 66);
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "parent"), 67);
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "age"), 68);
|
||||||
|
associate_ident(&mut schema, NamespacedKeyword::new("foo", "height"), 69);
|
||||||
|
add_attribute(&mut schema, 65, Attribute {
|
||||||
|
value_type: ValueType::String,
|
||||||
|
multival: false,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
add_attribute(&mut schema, 66, Attribute {
|
||||||
|
value_type: ValueType::String,
|
||||||
|
fulltext: true,
|
||||||
|
multival: true,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
add_attribute(&mut schema, 67, Attribute {
|
||||||
|
value_type: ValueType::String,
|
||||||
|
multival: true,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
add_attribute(&mut schema, 68, Attribute {
|
||||||
|
value_type: ValueType::Long,
|
||||||
|
multival: false,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
add_attribute(&mut schema, 69, Attribute {
|
||||||
|
value_type: ValueType::Long,
|
||||||
|
multival: false,
|
||||||
|
..Default::default()
|
||||||
|
});
|
||||||
|
schema
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alg(schema: &Schema, input: &str) -> ConjoiningClauses {
|
||||||
|
let parsed = parse_find_string(input).expect("query input to have parsed");
|
||||||
|
algebrize(schema.into(), parsed).expect("algebrizing to have succeeded").cc
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_apply_fulltext() {
|
||||||
|
let schema = prepopulated_schema();
|
||||||
|
|
||||||
|
// If you use a non-FTS attribute, we will short-circuit.
|
||||||
|
let query = r#"[:find ?val
|
||||||
|
:where [(fulltext $ :foo/name "hello") [[?entity ?val _ _]]]]"#;
|
||||||
|
assert!(alg(&schema, query).is_known_empty());
|
||||||
|
|
||||||
|
// If you get a type mismatch, we will short-circuit.
|
||||||
|
let query = r#"[:find ?val
|
||||||
|
:where [(fulltext $ :foo/description "hello") [[?entity ?val ?tx ?score]]]
|
||||||
|
[?score :foo/bar _]]"#;
|
||||||
|
assert!(alg(&schema, query).is_known_empty());
|
||||||
|
}
|
|
@ -15,6 +15,8 @@ extern crate mentat_query_parser;
|
||||||
extern crate mentat_query_translator;
|
extern crate mentat_query_translator;
|
||||||
extern crate mentat_sql;
|
extern crate mentat_sql;
|
||||||
|
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
||||||
use mentat_query::{
|
use mentat_query::{
|
||||||
|
@ -734,3 +736,155 @@ fn test_not_with_ground() {
|
||||||
(SELECT 1 FROM (SELECT 0 AS `?v` WHERE 0 UNION ALL VALUES (28), (29)) AS `c00` \
|
(SELECT 1 FROM (SELECT 0 AS `?v` WHERE 0 UNION ALL VALUES (28), (29)) AS `c00` \
|
||||||
WHERE `datoms00`.v = `c00`.`?v`)");
|
WHERE `datoms00`.v = `c00`.`?v`)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fulltext() {
|
||||||
|
let schema = prepopulated_typed_schema(ValueType::Double);
|
||||||
|
|
||||||
|
let query = r#"[:find ?entity ?value ?tx ?score :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]]]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \
|
||||||
|
`fulltext_values00`.text AS `?value`, \
|
||||||
|
`datoms01`.tx AS `?tx`, \
|
||||||
|
0e0 AS `?score` \
|
||||||
|
FROM `fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "needle"),]);
|
||||||
|
|
||||||
|
let query = r#"[:find ?entity ?value ?tx :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]]]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
// Observe that the computed table isn't dropped, even though `?score` isn't bound in the final conjoining clause.
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \
|
||||||
|
`fulltext_values00`.text AS `?value`, \
|
||||||
|
`datoms01`.tx AS `?tx` \
|
||||||
|
FROM `fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "needle"),]);
|
||||||
|
|
||||||
|
let query = r#"[:find ?entity ?value ?tx :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx _]]]]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
// Observe that the computed table isn't included at all when `?score` isn't bound.
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \
|
||||||
|
`fulltext_values00`.text AS `?value`, \
|
||||||
|
`datoms01`.tx AS `?tx` \
|
||||||
|
FROM `fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "needle"),]);
|
||||||
|
|
||||||
|
let query = r#"[:find ?entity ?value ?tx :where [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]] [?entity :foo/bar ?score]]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT `datoms01`.e AS `?entity`, \
|
||||||
|
`fulltext_values00`.text AS `?value`, \
|
||||||
|
`datoms01`.tx AS `?tx` \
|
||||||
|
FROM `fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01`, \
|
||||||
|
`datoms` AS `datoms02` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0 \
|
||||||
|
AND `datoms02`.a = 99 \
|
||||||
|
AND `datoms02`.v = 0e0 \
|
||||||
|
AND `datoms01`.e = `datoms02`.e");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "needle"),]);
|
||||||
|
|
||||||
|
let query = r#"[:find ?entity ?value ?tx :where [?entity :foo/bar ?score] [(fulltext $ :foo/fts "needle") [[?entity ?value ?tx ?score]]]]"#;
|
||||||
|
let SQLQuery { sql, args } = translate(&schema, query);
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT `datoms00`.e AS `?entity`, \
|
||||||
|
`fulltext_values01`.text AS `?value`, \
|
||||||
|
`datoms02`.tx AS `?tx` \
|
||||||
|
FROM `datoms` AS `datoms00`, \
|
||||||
|
`fulltext_values` AS `fulltext_values01`, \
|
||||||
|
`datoms` AS `datoms02` \
|
||||||
|
WHERE `datoms00`.a = 99 \
|
||||||
|
AND `datoms02`.a = 100 \
|
||||||
|
AND `datoms02`.v = `fulltext_values01`.rowid \
|
||||||
|
AND `fulltext_values01`.text MATCH $v0 \
|
||||||
|
AND `datoms00`.v = 0e0 \
|
||||||
|
AND `datoms00`.e = `datoms02`.e");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "needle"),]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fulltext_inputs() {
|
||||||
|
let schema = prepopulated_typed_schema(ValueType::String);
|
||||||
|
|
||||||
|
// Bind ?entity. We expect the output to collide.
|
||||||
|
let query = r#"[:find ?val
|
||||||
|
:in ?entity
|
||||||
|
:where [(fulltext $ :foo/fts "hello") [[?entity ?val _ _]]]]"#;
|
||||||
|
let mut types = BTreeMap::default();
|
||||||
|
types.insert(Variable::from_valid_name("?entity"), ValueType::Ref);
|
||||||
|
let inputs = QueryInputs::new(types, BTreeMap::default()).expect("valid inputs");
|
||||||
|
|
||||||
|
// Without binding the value. q_once will err if you try this!
|
||||||
|
let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs);
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT `fulltext_values00`.text AS `?val` \
|
||||||
|
FROM \
|
||||||
|
`fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "hello"),]);
|
||||||
|
|
||||||
|
// With the value bound.
|
||||||
|
let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?entity"), TypedValue::Ref(111))]);
|
||||||
|
let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs);
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT `fulltext_values00`.text AS `?val` \
|
||||||
|
FROM \
|
||||||
|
`fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0 \
|
||||||
|
AND `datoms01`.e = 111");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "hello"),]);
|
||||||
|
|
||||||
|
// Same again, but retrieving the entity.
|
||||||
|
let query = r#"[:find ?entity .
|
||||||
|
:in ?entity
|
||||||
|
:where [(fulltext $ :foo/fts "hello") [[?entity _ _]]]]"#;
|
||||||
|
let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?entity"), TypedValue::Ref(111))]);
|
||||||
|
let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs);
|
||||||
|
assert_eq!(sql, "SELECT 111 AS `?entity` FROM \
|
||||||
|
`fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0 \
|
||||||
|
AND `datoms01`.e = 111 \
|
||||||
|
LIMIT 1");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "hello"),]);
|
||||||
|
|
||||||
|
// A larger pattern.
|
||||||
|
let query = r#"[:find ?entity ?value ?friend
|
||||||
|
:in ?entity
|
||||||
|
:where
|
||||||
|
[(fulltext $ :foo/fts "hello") [[?entity ?value]]]
|
||||||
|
[?entity :foo/bar ?friend]]"#;
|
||||||
|
let inputs = QueryInputs::with_value_sequence(vec![(Variable::from_valid_name("?entity"), TypedValue::Ref(121))]);
|
||||||
|
let SQLQuery { sql, args } = translate_with_inputs(&schema, query, inputs);
|
||||||
|
assert_eq!(sql, "SELECT DISTINCT 121 AS `?entity`, \
|
||||||
|
`fulltext_values00`.text AS `?value`, \
|
||||||
|
`datoms02`.v AS `?friend` \
|
||||||
|
FROM \
|
||||||
|
`fulltext_values` AS `fulltext_values00`, \
|
||||||
|
`datoms` AS `datoms01`, \
|
||||||
|
`datoms` AS `datoms02` \
|
||||||
|
WHERE `datoms01`.a = 100 \
|
||||||
|
AND `datoms01`.v = `fulltext_values00`.rowid \
|
||||||
|
AND `fulltext_values00`.text MATCH $v0 \
|
||||||
|
AND `datoms01`.e = 121 \
|
||||||
|
AND `datoms02`.e = 121 \
|
||||||
|
AND `datoms02`.a = 99");
|
||||||
|
assert_eq!(args, vec![make_arg("$v0", "hello"),]);
|
||||||
|
}
|
||||||
|
|
|
@ -254,3 +254,40 @@ fn test_instants_and_uuids() {
|
||||||
_ => panic!("Expected query to work."),
|
_ => panic!("Expected query to work."),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fulltext() {
|
||||||
|
let mut c = new_connection("").expect("Couldn't open conn.");
|
||||||
|
let mut conn = Conn::connect(&mut c).expect("Couldn't open DB.");
|
||||||
|
conn.transact(&mut c, r#"[
|
||||||
|
[:db/add "s" :db/ident :foo/fts]
|
||||||
|
[:db/add "s" :db/valueType :db.type/string]
|
||||||
|
[:db/add "s" :db/fulltext true]
|
||||||
|
[:db/add "s" :db/cardinality :db.cardinality/many]
|
||||||
|
]"#).unwrap();
|
||||||
|
let v = conn.transact(&mut c, r#"[
|
||||||
|
[:db/add "v" :foo/fts "hello darkness my old friend"]
|
||||||
|
[:db/add "v" :foo/fts "I've come to talk with you again"]
|
||||||
|
]"#).unwrap().tempids.get("v").cloned().expect("v was mapped");
|
||||||
|
|
||||||
|
let r = conn.q_once(&mut c,
|
||||||
|
r#"[:find [?x ?val ?score]
|
||||||
|
:where [(fulltext $ :foo/fts "darkness") [[?x ?val _ ?score]]]]"#, None);
|
||||||
|
match r {
|
||||||
|
Result::Ok(QueryResults::Tuple(Some(vals))) => {
|
||||||
|
let mut vals = vals.into_iter();
|
||||||
|
match (vals.next(), vals.next(), vals.next(), vals.next()) {
|
||||||
|
(Some(TypedValue::Ref(x)),
|
||||||
|
Some(TypedValue::String(text)),
|
||||||
|
Some(TypedValue::Double(score)),
|
||||||
|
None) => {
|
||||||
|
assert_eq!(x, v);
|
||||||
|
assert_eq!(text.as_str(), "hello darkness my old friend");
|
||||||
|
assert_eq!(score, 0.0f64.into());
|
||||||
|
},
|
||||||
|
_ => panic!("Unexpected results."),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => panic!("Expected query to work."),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue