Part 2: Implement apply_fulltext and column constraints.

No bindings, yet.
This commit is contained in:
Nick Alexander 2017-04-05 11:02:09 -07:00
parent 08cae4bc7c
commit 62fda71fbc
11 changed files with 301 additions and 17 deletions

View file

@ -96,6 +96,7 @@ fn unit_type_set(t: ValueType) -> HashSet<ValueType> {
///
/// - Ordinary pattern clauses turn into `FROM` parts and `WHERE` parts using `=`.
/// - Predicate clauses turn into the same, but with other functions.
/// - Function clauses turn `WHERE` parts using function-specific comparisons.
/// - `not` turns into `NOT EXISTS` with `WHERE` clauses inside the subquery to
/// bind it to the outer variables, or adds simple `WHERE` clauses to the outer
/// clause.
@ -581,6 +582,9 @@ impl ConjoiningClauses {
WhereClause::Pred(p) => {
self.apply_predicate(schema, p)
},
WhereClause::WhereFn(f) => {
self.apply_where_fn(schema, f)
},
WhereClause::OrJoin(o) => {
validate_or_join(&o)
//?;
@ -606,4 +610,4 @@ fn add_attribute(schema: &mut Schema, e: Entid, a: Attribute) {
#[cfg(test)]
pub fn ident(ns: &str, name: &str) -> PatternNonValuePlace {
PatternNonValuePlace::Ident(::std::rc::Rc::new(NamespacedKeyword::new(ns, name)))
}
}

View file

@ -8,8 +8,6 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use std::rc::Rc;
use mentat_core::{
Schema,
TypedValue,
@ -268,6 +266,7 @@ mod testing {
use super::*;
use std::collections::BTreeMap;
use std::rc::Rc;
use mentat_core::attribute::Unique;
use mentat_core::{

View file

@ -10,35 +10,45 @@
use mentat_core::{
Schema,
TypedValue,
};
use mentat_query::{
FnArg,
NonIntegerConstant,
Predicate,
SrcVar,
WhereFn,
};
use clauses::ConjoiningClauses;
use errors::{
Result,
Error,
ErrorKind,
Result,
};
use types::{
ColumnConstraint,
DatomsColumn,
DatomsTable,
FulltextColumn,
FulltextQualifiedAlias,
NumericComparison,
QualifiedAlias,
QueryValue,
};
/// Application of predicates.
impl ConjoiningClauses {
/// There are several kinds of predicates/functions in our Datalog:
/// There are several kinds of predicates in our Datalog:
/// - A limited set of binary comparison operators: < > <= >= !=.
/// These are converted into SQLite binary comparisons and some type constraints.
/// - A set of predicates like `fulltext` and `get-else` that are translated into
/// SQL `MATCH`es or joins, yielding bindings.
/// - In the future, some predicates that are implemented via function calls in SQLite.
///
/// At present we have implemented only the five built-in comparison binary operators.
pub fn apply_predicate<'s, 'p>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> {
pub fn apply_predicate<'s>(&mut self, schema: &'s Schema, predicate: Predicate) -> Result<()> {
// Because we'll be growing the set of built-in predicates, handling each differently,
// and ultimately allowing user-specified predicates, we match on the predicate name first.
if let Some(op) = NumericComparison::from_datalog_operator(predicate.operator.0.as_str()) {
@ -53,7 +63,7 @@ impl ConjoiningClauses {
/// - Ensures that the predicate functions name a known operator.
/// - Accumulates a `NumericInequality` constraint into the `wheres` list.
#[allow(unused_variables)]
pub fn apply_numeric_predicate<'s, 'p>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> {
pub fn apply_numeric_predicate<'s>(&mut self, schema: &'s Schema, comparison: NumericComparison, predicate: Predicate) -> Result<()> {
if predicate.args.len() != 2 {
bail!(ErrorKind::InvalidNumberOfArguments(predicate.operator.clone(), predicate.args.len(), 2));
}
@ -81,6 +91,96 @@ impl ConjoiningClauses {
self.wheres.add_intersection(constraint);
Ok(())
}
/// There are several kinds of functions binding variables in our Datalog:
/// - A set of functions like `fulltext` and `get-else` that are translated into
/// SQL `MATCH`es or joins, yielding bindings.
/// - In the future, some functions that are implemented via function calls in SQLite.
///
/// At present we have implemented only the `fulltext` operator.
pub fn apply_where_fn<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> {
// Because we'll be growing the set of built-in functions, handling each differently, and
// ultimately allowing user-specified functions, we match on the function name first.
match where_fn.operator.0.as_str() {
"fulltext" => self.apply_fulltext(schema, where_fn),
_ => bail!(ErrorKind::UnknownFunction(where_fn.operator.clone())),
}
}
/// This function:
/// - Resolves variables and converts types to those more amenable to SQL.
/// - Ensures that the predicate functions name a known operator.
/// - Accumulates a `NumericInequality` constraint into the `wheres` list.
#[allow(unused_variables)]
pub fn apply_fulltext<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> {
if where_fn.args.len() != 3 {
bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 3));
}
// Go from arguments -- parser output -- to columns or values.
// Any variables that aren't bound by this point in the linear processing of clauses will
// cause the application of the predicate to fail.
let mut args = where_fn.args.into_iter();
// TODO: process source variables.
match args.next().unwrap() {
FnArg::SrcVar(SrcVar::DefaultSrc) => {},
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)),
}
// TODO: accept placeholder and set of attributes. Alternately, consider putting the search
// term before the attribute arguments and collect the (variadic) attributes into a set.
// let a: Entid = self.resolve_attribute_argument(&where_fn.operator, 1, args.next().unwrap())?;
//
// TODO: allow non-constant attributes.
// TODO: improve the expression of this matching, possibly by using attribute_for_* uniformly.
let a = match args.next().unwrap() {
FnArg::Ident(i) => schema.get_entid(&i),
// Must be an entid.
FnArg::EntidOrInteger(e) => Some(e),
_ => None,
};
let a = a.ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?;
let attribute = schema.attribute_for_entid(a).cloned().ok_or(ErrorKind::InvalidArgument(where_fn.operator.clone(), "attribute".into(), 1))?;
let fulltext_values = DatomsTable::FulltextValues;
let datoms_table = DatomsTable::Datoms;
let fulltext_values_alias = (self.aliaser)(fulltext_values);
let datoms_table_alias = (self.aliaser)(datoms_table);
// TODO: constrain types in more general cases?
self.constrain_attribute(datoms_table_alias.clone(), a);
self.wheres.add_intersection(ColumnConstraint::Equals(
QualifiedAlias(datoms_table_alias, DatomsColumn::Value),
QueryValue::FulltextColumn(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Rowid))));
// search is either text or a variable.
// TODO: should this just use `resolve_argument`? Should it add a new `resolve_*` function?
let search = match args.next().unwrap() {
FnArg::Variable(var) => {
self.column_bindings
.get(&var)
.and_then(|cols| cols.first().map(|col| QueryValue::Column(col.clone())))
.ok_or_else(|| Error::from_kind(ErrorKind::UnboundVariable(var.name())))?
},
FnArg::Constant(NonIntegerConstant::Text(s)) => {
QueryValue::TypedValue(TypedValue::typed_string(s.as_str()))
},
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "string".into(), 2)),
};
// TODO: should we build the FQA in ::Matches, preventing nonsense like matching on ::Rowid?
let constraint = ColumnConstraint::Matches(FulltextQualifiedAlias(fulltext_values_alias.clone(), FulltextColumn::Text), search);
self.wheres.add_intersection(constraint);
// TODO: process bindings!
Ok(())
}
}
#[cfg(test)]
@ -88,6 +188,8 @@ mod testing {
use super::*;
use std::collections::HashSet;
use std::rc::Rc;
use mentat_core::attribute::Unique;
use mentat_core::{
Attribute,
@ -96,12 +198,14 @@ mod testing {
};
use mentat_query::{
Binding,
FnArg,
NamespacedKeyword,
Pattern,
PatternNonValuePlace,
PatternValuePlace,
PlainSymbol,
SrcVar,
Variable,
};
@ -229,4 +333,47 @@ mod testing {
.collect(),
ValueType::String));
}
}
#[test]
fn test_apply_fulltext() {
let mut cc = ConjoiningClauses::default();
let mut schema = Schema::default();
associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100);
add_attribute(&mut schema, 100, Attribute {
value_type: ValueType::String,
index: true,
fulltext: true,
..Default::default()
});
let op = PlainSymbol::new("fulltext");
cc.apply_fulltext(&schema, WhereFn {
operator: op,
args: vec![
FnArg::SrcVar(SrcVar::DefaultSrc),
FnArg::Ident(NamespacedKeyword::new("foo", "fts")),
FnArg::Constant(NonIntegerConstant::Text(Rc::new("needle".into()))),
],
binding: Binding::BindScalar(Variable::from_valid_name("?z")),
}).expect("to be able to apply_fulltext");
assert!(!cc.is_known_empty);
// Finally, expand column bindings.
cc.expand_column_bindings();
assert!(!cc.is_known_empty);
let clauses = cc.wheres;
assert_eq!(clauses.len(), 3);
assert_eq!(clauses.0[0], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), DatomsColumn::Attribute),
QueryValue::Entid(100)).into());
assert_eq!(clauses.0[1], ColumnConstraint::Equals(QualifiedAlias("datoms01".to_string(), DatomsColumn::Value),
QueryValue::FulltextColumn(FulltextQualifiedAlias("fulltext_values00".to_string(), FulltextColumn::Rowid))).into());
assert_eq!(clauses.0[2], ColumnConstraint::Matches(FulltextQualifiedAlias("fulltext_values00".to_string(), FulltextColumn::Text),
QueryValue::TypedValue(TypedValue::String(Rc::new("needle".into())))).into());
// TODO: make assertions about types of columns.
}
}

View file

@ -56,7 +56,7 @@ impl ConjoiningClauses {
Constant(NonIntegerConstant::Text(_)) |
Constant(NonIntegerConstant::BigInteger(_)) => {
self.mark_known_empty(EmptyBecause::NonNumericArgument);
bail!(ErrorKind::NonNumericArgument(function.clone(), position));
bail!(ErrorKind::InvalidArgument(function.clone(), "numeric".into(), position));
},
Constant(NonIntegerConstant::Float(f)) => Ok(QueryValue::TypedValue(TypedValue::Double(f))),
}

View file

@ -35,9 +35,9 @@ error_chain! {
display("unbound variable: {}", name)
}
NonNumericArgument(function: PlainSymbol, position: usize) {
InvalidArgument(function: PlainSymbol, expected_type: String, position: usize) {
description("invalid argument")
display("invalid argument to {}: expected numeric in position {}.", function, position)
display("invalid argument to {}: expected {} in position {}.", function, expected_type, position)
}
NonMatchingVariablesInOrClause {

View file

@ -99,6 +99,8 @@ pub use types::{
ColumnIntersection,
DatomsColumn,
DatomsTable,
FulltextColumn,
FulltextQualifiedAlias,
QualifiedAlias,
QueryValue,
SourceAlias,

View file

@ -71,6 +71,23 @@ impl DatomsColumn {
}
}
/// One of the named columns of our fulltext values table.
#[derive(PartialEq, Eq, Clone, Debug)]
pub enum FulltextColumn {
Rowid,
Text,
}
impl FulltextColumn {
pub fn as_str(&self) -> &'static str {
use self::FulltextColumn::*;
match *self {
Rowid => "rowid",
Text => "text",
}
}
}
/// A specific instance of a table within a query. E.g., "datoms123".
pub type TableAlias = String;
@ -94,6 +111,16 @@ impl Debug for QualifiedAlias {
}
}
/// A particular column of a particular aliased fulltext table. E.g., "fulltext_values123", Rowid.
#[derive(PartialEq, Eq, Clone)]
pub struct FulltextQualifiedAlias(pub TableAlias, pub FulltextColumn);
impl Debug for FulltextQualifiedAlias {
fn fmt(&self, f: &mut Formatter) -> Result {
write!(f, "{}.{}", self.0, self.1.as_str())
}
}
impl QualifiedAlias {
pub fn for_type_tag(&self) -> QualifiedAlias {
QualifiedAlias(self.0.clone(), DatomsColumn::ValueTypeTag)
@ -103,6 +130,7 @@ impl QualifiedAlias {
#[derive(PartialEq, Eq)]
pub enum QueryValue {
Column(QualifiedAlias),
FulltextColumn(FulltextQualifiedAlias),
Entid(Entid),
TypedValue(TypedValue),
@ -120,6 +148,9 @@ impl Debug for QueryValue {
&Column(ref qa) => {
write!(f, "{:?}", qa)
},
&FulltextColumn(ref qa) => {
write!(f, "{:?}", qa)
},
&Entid(ref entid) => {
write!(f, "entity({:?})", entid)
},
@ -192,6 +223,9 @@ pub enum ColumnConstraint {
right: QueryValue,
},
HasType(TableAlias, ValueType),
// TODO: Merge this with NumericInequality? I expect the fine-grained information to be
// valuable when optimizing.
Matches(FulltextQualifiedAlias, QueryValue),
}
#[derive(PartialEq, Eq, Debug)]
@ -290,6 +324,10 @@ impl Debug for ColumnConstraint {
write!(f, "{:?} {:?} {:?}", left, operator, right)
},
&Matches(ref qa, ref thing) => {
write!(f, "{:?} MATCHES {:?}", qa, thing)
},
&HasType(ref qa, value_type) => {
write!(f, "{:?}.value_type_tag = {:?}", qa, value_type)
},
@ -301,6 +339,7 @@ impl Debug for ColumnConstraint {
pub enum EmptyBecause {
// Var, existing, desired.
TypeMismatch(Variable, HashSet<ValueType>, ValueType),
NonAttributeArgument,
NonNumericArgument,
NonStringFulltextValue,
UnresolvedIdent(NamespacedKeyword),
@ -319,6 +358,9 @@ impl Debug for EmptyBecause {
write!(f, "Type mismatch: {:?} can't be {:?}, because it's already {:?}",
var, desired, existing)
},
&NonAttributeArgument => {
write!(f, "Non-attribute argument in attribute place")
},
&NonNumericArgument => {
write!(f, "Non-numeric argument in numeric place")
},
@ -346,4 +388,4 @@ impl Debug for EmptyBecause {
},
}
}
}
}

View file

@ -20,6 +20,8 @@ use mentat_core::{
use mentat_query_algebrizer::{
DatomsColumn,
FulltextColumn,
FulltextQualifiedAlias,
QualifiedAlias,
QueryValue,
SourceAlias,
@ -44,6 +46,7 @@ use mentat_sql::{
/// implementation for each storage backend. Passing `TypedValue`s here allows for that.
pub enum ColumnOrExpression {
Column(QualifiedAlias),
FulltextColumn(FulltextQualifiedAlias),
Entid(Entid), // Because it's so common.
Integer(i32), // We use these for type codes etc.
Long(i64),
@ -55,6 +58,7 @@ impl From<QueryValue> for ColumnOrExpression {
fn from(v: QueryValue) -> Self {
match v {
QueryValue::Column(c) => ColumnOrExpression::Column(c),
QueryValue::FulltextColumn(c) => ColumnOrExpression::FulltextColumn(c),
QueryValue::Entid(e) => ColumnOrExpression::Entid(e),
QueryValue::PrimitiveLong(v) => ColumnOrExpression::Long(v),
QueryValue::TypedValue(v) => ColumnOrExpression::Value(v),
@ -109,6 +113,14 @@ impl Constraint {
right: right,
}
}
pub fn fulltext_match(left: ColumnOrExpression, right: ColumnOrExpression) -> Constraint {
Constraint::Infix {
op: Op("MATCH"), // SQLite specific!
left: left,
right: right,
}
}
}
#[allow(dead_code)]
@ -157,6 +169,11 @@ fn push_column(qb: &mut QueryBuilder, col: &DatomsColumn) {
qb.push_sql(col.as_str());
}
// We know that FulltextColumns are safe to serialize.
fn push_fulltext_column(qb: &mut QueryBuilder, col: &FulltextColumn) {
qb.push_sql(col.as_str());
}
//---------------------------------------------------------
// Turn that representation into SQL.
@ -199,6 +216,12 @@ impl QueryFragment for ColumnOrExpression {
push_column(out, column);
Ok(())
},
&FulltextColumn(FulltextQualifiedAlias(ref table, ref column)) => {
out.push_identifier(table.as_str())?;
out.push_sql(".");
push_fulltext_column(out, column);
Ok(())
},
&Entid(entid) => {
out.push_sql(entid.to_string().as_str());
Ok(())
@ -406,13 +429,20 @@ impl SelectQuery {
#[cfg(test)]
mod tests {
use super::*;
use std::rc::Rc;
use mentat_query_algebrizer::DatomsTable;
fn build_constraint(c: Constraint) -> String {
fn build_constraint_query(c: Constraint) -> SQLQuery {
let mut builder = SQLiteQueryBuilder::new();
c.push_sql(&mut builder)
.map(|_| builder.finish())
.unwrap().sql
.expect("to produce a query for the given constraint")
}
fn build_constraint(c: Constraint) -> String {
build_constraint_query(c).sql
}
#[test]
@ -469,6 +499,25 @@ mod tests {
assert_eq!("((123 = 456 AND 789 = 246))", build_constraint(c));
}
#[test]
fn test_matches_constraint() {
let c = Constraint::Infix {
op: Op("MATCHES"),
left: ColumnOrExpression::FulltextColumn(FulltextQualifiedAlias("fulltext01".to_string(), FulltextColumn::Text)),
right: ColumnOrExpression::Value(TypedValue::String(Rc::new("needle".to_string()))),
};
let q = build_constraint_query(c);
assert_eq!("`fulltext01`.text MATCHES $v0", q.sql);
assert_eq!(vec![("$v0".to_string(), Rc::new("needle".to_string()))], q.args);
let c = Constraint::Infix {
op: Op("="),
left: ColumnOrExpression::FulltextColumn(FulltextQualifiedAlias("fulltext01".to_string(), FulltextColumn::Rowid)),
right: ColumnOrExpression::Column(QualifiedAlias("datoms02".to_string(), DatomsColumn::Value)),
};
assert_eq!("`fulltext01`.rowid = `datoms02`.v", build_constraint(c));
}
#[test]
fn test_end_to_end() {
// [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]]

View file

@ -32,6 +32,8 @@ use mentat_query_algebrizer::{
ConjoiningClauses,
DatomsColumn,
DatomsTable,
FulltextColumn,
FulltextQualifiedAlias,
QualifiedAlias,
QueryValue,
SourceAlias,
@ -69,6 +71,12 @@ impl ToColumn for QualifiedAlias {
}
}
impl ToColumn for FulltextQualifiedAlias {
fn to_column(self) -> ColumnOrExpression {
ColumnOrExpression::FulltextColumn(self)
}
}
impl ToConstraint for ColumnIntersection {
fn to_constraint(self) -> Constraint {
Constraint::And {
@ -108,6 +116,11 @@ impl ToConstraint for ColumnConstraint {
Equals(left, QueryValue::Column(right)) =>
Constraint::equal(left.to_column(), right.to_column()),
Equals(left, QueryValue::FulltextColumn(right)) =>
// TODO: figure out if this is the correct abstraction. Can we make it so that
// FulltextColumns::Text is not accepted here?
Constraint::equal(left.to_column(), right.to_column()),
Equals(qa, QueryValue::PrimitiveLong(value)) => {
let tag_column = qa.for_type_tag().to_column();
let value_column = qa.to_column();
@ -148,6 +161,14 @@ impl ToConstraint for ColumnConstraint {
}
},
Matches(left, right) => {
Constraint::Infix {
op: Op("MATCH"),
left: ColumnOrExpression::FulltextColumn(left),
right: right.into(),
}
},
HasType(table, value_type) => {
let column = QualifiedAlias(table, DatomsColumn::ValueTypeTag).to_column();
Constraint::equal(column, ColumnOrExpression::Integer(value_type.value_type_tag()))

View file

@ -58,6 +58,13 @@ fn prepopulated_schema() -> Schema {
value_type: ValueType::String,
..Default::default()
});
associate_ident(&mut schema, NamespacedKeyword::new("foo", "fts"), 100);
add_attribute(&mut schema, 100, Attribute {
value_type: ValueType::String,
index: true,
fulltext: true,
..Default::default()
});
schema
}
@ -241,4 +248,14 @@ fn test_numeric_not_equals_known_attribute() {
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v <> 12 LIMIT 1");
assert_eq!(args, vec![]);
}
}
#[test]
fn test_fulltext() {
let schema = prepopulated_schema();
let input = r#"[:find ?x . :where [(fulltext $ :foo/fts "yyy") [?x]]]"#;
let SQLQuery { sql, args } = translate(&schema, input, None);
assert_eq!(sql, "SELECT `datoms00`.e AS `?x` FROM `datoms` AS `datoms00` WHERE `datoms00`.a = 99 AND `datoms00`.v = $v0 LIMIT 1");
assert_eq!(args, vec![make_arg("$v0", "yyy")]);
}

View file

@ -192,7 +192,10 @@ impl FromValue<FnArg> for FnArg {
println!("from_value {}", v.inner);
match v.inner {
edn::SpannedValue::Integer(i) => Some(FnArg::EntidOrInteger(i)),
edn::SpannedValue::Boolean(b) => Some(FnArg::Constant(NonIntegerConstant::Boolean(b))),
edn::SpannedValue::BigInteger(x) => Some(FnArg::Constant(NonIntegerConstant::BigInteger(x))),
edn::SpannedValue::Float(f) => Some(FnArg::Constant(NonIntegerConstant::Float(f))),
edn::SpannedValue::Text(ref s) => Some(FnArg::Constant(NonIntegerConstant::Text(Rc::new(s.clone())))),
_ => unimplemented!(),
}})
}