Rework caching and use it inside the query engine. (#553) r=emily

This puts caching in mentat_db, adds a reverse lookup capability for
unique attributes, and populates bidirectional caches with a single
SQL cursor walk.

Differentiate between begin_read and begin_uncached_read.

Note that we still allow toggling within InProgress, because there might be
transient local state that makes starting a new transaction impossible.
This commit is contained in:
Richard Newman 2018-02-13 16:51:21 -08:00
parent df3cdb5db6
commit e33fe71c47
29 changed files with 2025 additions and 644 deletions

32
core/src/cache.rs Normal file
View file

@ -0,0 +1,32 @@
// Copyright 2018 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
/// Cache traits.
use std::collections::{
BTreeSet,
};
use ::{
Entid,
Schema,
TypedValue,
};
pub trait CachedAttributes {
fn is_attribute_cached_reverse(&self, entid: Entid) -> bool;
fn is_attribute_cached_forward(&self, entid: Entid) -> bool;
fn get_values_for_entid(&self, schema: &Schema, attribute: Entid, entid: Entid) -> Option<&Vec<TypedValue>>;
fn get_value_for_entid(&self, schema: &Schema, attribute: Entid, entid: Entid) -> Option<&TypedValue>;
/// Reverse lookup.
fn get_entid_for_value(&self, attribute: Entid, value: &TypedValue) -> Option<Entid>;
fn get_entids_for_value(&self, attribute: Entid, value: &TypedValue) -> Option<&BTreeSet<Entid>>;
}

View file

@ -23,6 +23,7 @@ extern crate serde_derive;
extern crate edn;
pub mod values;
mod cache;
use std::collections::{
BTreeMap,
@ -50,6 +51,8 @@ pub use edn::{
Utc,
};
pub use cache::CachedAttributes;
/// Core types defining a Mentat knowledge base.
/// Represents one entid in the entid space.

View file

@ -8,92 +8,557 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use std::cmp::Ord;
use std::collections::BTreeMap;
use std::fmt::Debug;
use std::collections::{
BTreeMap,
BTreeSet,
};
use std::iter::Peekable;
use rusqlite;
use errors::{
Result
};
use db::{
TypedSQLValue,
};
use mentat_core::{
CachedAttributes,
Entid,
HasSchema,
Schema,
TypedValue,
};
use db::{
TypedSQLValue,
};
use errors::{
ErrorKind,
Result,
};
pub type Aev = (Entid, Entid, TypedValue);
fn row_to_aev(row: &rusqlite::Row) -> Aev {
let a: Entid = row.get(0);
let e: Entid = row.get(1);
let value_type_tag: i32 = row.get(3);
let v = TypedValue::from_sql_value_pair(row.get(2), value_type_tag).map(|x| x).unwrap();
(a, e, v)
}
pub type CacheMap<K, V> = BTreeMap<K, V>;
pub trait ValueProvider<K, V>: Clone {
fn fetch_values<'sqlite>(&mut self, sqlite: &'sqlite rusqlite::Connection) -> Result<CacheMap<K, V>>;
pub struct AevRows<'conn> {
rows: rusqlite::MappedRows<'conn, fn(&rusqlite::Row) -> Aev>,
}
pub trait Cacheable {
type Key;
type Value;
fn cache_values<'sqlite>(&mut self, sqlite: &'sqlite rusqlite::Connection) -> Result<()>;
fn get(&self, key: &Self::Key) -> Option<&Self::Value>;
/// Unwrap the Result from MappedRows. We could also use this opportunity to map_err it, but
/// for now it's convenient to avoid error handling.
impl<'conn> Iterator for AevRows<'conn> {
type Item = Aev;
fn next(&mut self) -> Option<Aev> {
self.rows
.next()
.map(|row_result| row_result.expect("All database contents should be representable"))
}
}
#[derive(Clone)]
pub struct EagerCache<K, V, VP> where K: Ord, VP: ValueProvider<K, V> {
pub cache: CacheMap<K, V>,
value_provider: VP,
// The behavior of the cache is different for different kinds of attributes:
// - cardinality/one doesn't need a vec
// - unique/* should have a bijective mapping (reverse lookup)
trait CardinalityOneCache {
fn clear(&mut self);
fn set(&mut self, e: Entid, v: TypedValue);
fn get(&self, e: Entid) -> Option<&TypedValue>;
}
impl<K, V, VP> EagerCache<K, V, VP> where K: Ord, VP: ValueProvider<K, V> {
pub fn new(value_provider: VP) -> Self {
EagerCache {
cache: CacheMap::new(),
value_provider: value_provider,
trait CardinalityManyCache {
fn clear(&mut self);
fn acc(&mut self, e: Entid, v: TypedValue);
fn set(&mut self, e: Entid, vs: Vec<TypedValue>);
fn get(&self, e: Entid) -> Option<&Vec<TypedValue>>;
}
#[derive(Debug, Default)]
struct SingleValAttributeCache {
attr: Entid,
e_v: CacheMap<Entid, TypedValue>,
}
impl CardinalityOneCache for SingleValAttributeCache {
fn clear(&mut self) {
self.e_v.clear();
}
fn set(&mut self, e: Entid, v: TypedValue) {
self.e_v.insert(e, v);
}
fn get(&self, e: Entid) -> Option<&TypedValue> {
self.e_v.get(&e)
}
}
#[derive(Debug, Default)]
struct MultiValAttributeCache {
attr: Entid,
e_vs: CacheMap<Entid, Vec<TypedValue>>,
}
impl CardinalityManyCache for MultiValAttributeCache {
fn clear(&mut self) {
self.e_vs.clear();
}
fn acc(&mut self, e: Entid, v: TypedValue) {
self.e_vs.entry(e).or_insert(vec![]).push(v)
}
fn set(&mut self, e: Entid, vs: Vec<TypedValue>) {
self.e_vs.insert(e, vs);
}
fn get(&self, e: Entid) -> Option<&Vec<TypedValue>> {
self.e_vs.get(&e)
}
}
#[derive(Debug, Default)]
struct UniqueReverseAttributeCache {
attr: Entid,
v_e: CacheMap<TypedValue, Entid>,
}
impl UniqueReverseAttributeCache {
fn clear(&mut self) {
self.v_e.clear();
}
fn set(&mut self, e: Entid, v: TypedValue) {
self.v_e.insert(v, e);
}
fn get_e(&self, v: &TypedValue) -> Option<Entid> {
self.v_e.get(v).cloned()
}
}
#[derive(Debug, Default)]
struct NonUniqueReverseAttributeCache {
attr: Entid,
v_es: CacheMap<TypedValue, BTreeSet<Entid>>,
}
impl NonUniqueReverseAttributeCache {
fn clear(&mut self) {
self.v_es.clear();
}
fn acc(&mut self, e: Entid, v: TypedValue) {
self.v_es.entry(v).or_insert(BTreeSet::new()).insert(e);
}
fn get_es(&self, v: &TypedValue) -> Option<&BTreeSet<Entid>> {
self.v_es.get(v)
}
}
#[derive(Debug, Default)]
pub struct AttributeCaches {
reverse_cached_attributes: BTreeSet<Entid>,
forward_cached_attributes: BTreeSet<Entid>,
single_vals: BTreeMap<Entid, SingleValAttributeCache>,
multi_vals: BTreeMap<Entid, MultiValAttributeCache>,
unique_reverse: BTreeMap<Entid, UniqueReverseAttributeCache>,
non_unique_reverse: BTreeMap<Entid, NonUniqueReverseAttributeCache>,
}
fn with_aev_iter<F, I>(a: Entid, iter: &mut Peekable<I>, mut f: F)
where I: Iterator<Item=Aev>,
F: FnMut(Entid, TypedValue) {
let check = Some(a);
while iter.peek().map(|&(a, _, _)| a) == check {
let (_, e, v) = iter.next().unwrap();
f(e, v);
}
}
fn accumulate_single_val_evs_forward<I, C>(a: Entid, f: &mut C, iter: &mut Peekable<I>) where I: Iterator<Item=Aev>, C: CardinalityOneCache {
with_aev_iter(a, iter, |e, v| f.set(e, v))
}
fn accumulate_multi_val_evs_forward<I, C>(a: Entid, f: &mut C, iter: &mut Peekable<I>) where I: Iterator<Item=Aev>, C: CardinalityManyCache {
with_aev_iter(a, iter, |e, v| f.acc(e, v))
}
fn accumulate_unique_evs_reverse<I>(a: Entid, r: &mut UniqueReverseAttributeCache, iter: &mut Peekable<I>) where I: Iterator<Item=Aev> {
with_aev_iter(a, iter, |e, v| r.set(e, v))
}
fn accumulate_non_unique_evs_reverse<I>(a: Entid, r: &mut NonUniqueReverseAttributeCache, iter: &mut Peekable<I>) where I: Iterator<Item=Aev> {
with_aev_iter(a, iter, |e, v| r.acc(e, v))
}
fn accumulate_single_val_unique_evs_both<I, C>(a: Entid, f: &mut C, r: &mut UniqueReverseAttributeCache, iter: &mut Peekable<I>) where I: Iterator<Item=Aev>, C: CardinalityOneCache {
with_aev_iter(a, iter, |e, v| {
f.set(e, v.clone());
r.set(e, v);
})
}
fn accumulate_multi_val_unique_evs_both<I, C>(a: Entid, f: &mut C, r: &mut UniqueReverseAttributeCache, iter: &mut Peekable<I>) where I: Iterator<Item=Aev>, C: CardinalityManyCache {
with_aev_iter(a, iter, |e, v| {
f.acc(e, v.clone());
r.set(e, v);
})
}
fn accumulate_single_val_non_unique_evs_both<I, C>(a: Entid, f: &mut C, r: &mut NonUniqueReverseAttributeCache, iter: &mut Peekable<I>) where I: Iterator<Item=Aev>, C: CardinalityOneCache {
with_aev_iter(a, iter, |e, v| {
f.set(e, v.clone());
r.acc(e, v);
})
}
fn accumulate_multi_val_non_unique_evs_both<I, C>(a: Entid, f: &mut C, r: &mut NonUniqueReverseAttributeCache, iter: &mut Peekable<I>) where I: Iterator<Item=Aev>, C: CardinalityManyCache {
with_aev_iter(a, iter, |e, v| {
f.acc(e, v.clone());
r.acc(e, v);
})
}
// TODO: if an entity or attribute is ever renumbered, the cache will need to be rebuilt.
impl AttributeCaches {
//
// These function names are brief and local.
// f = forward; r = reverse; both = both forward and reverse.
// s = single-val; m = multi-val.
// u = unique; nu = non-unique.
// c = cache.
#[inline]
fn fsc(&mut self, a: Entid) -> &mut SingleValAttributeCache {
self.single_vals
.entry(a)
.or_insert_with(Default::default)
}
#[inline]
fn fmc(&mut self, a: Entid) -> &mut MultiValAttributeCache {
self.multi_vals
.entry(a)
.or_insert_with(Default::default)
}
#[inline]
fn ruc(&mut self, a: Entid) -> &mut UniqueReverseAttributeCache {
self.unique_reverse
.entry(a)
.or_insert_with(Default::default)
}
#[inline]
fn rnuc(&mut self, a: Entid) -> &mut NonUniqueReverseAttributeCache {
self.non_unique_reverse
.entry(a)
.or_insert_with(Default::default)
}
#[inline]
fn both_s_u<'r>(&'r mut self, a: Entid) -> (&'r mut SingleValAttributeCache, &'r mut UniqueReverseAttributeCache) {
(self.single_vals.entry(a).or_insert_with(Default::default),
self.unique_reverse.entry(a).or_insert_with(Default::default))
}
#[inline]
fn both_m_u<'r>(&'r mut self, a: Entid) -> (&'r mut MultiValAttributeCache, &'r mut UniqueReverseAttributeCache) {
(self.multi_vals.entry(a).or_insert_with(Default::default),
self.unique_reverse.entry(a).or_insert_with(Default::default))
}
#[inline]
fn both_s_nu<'r>(&'r mut self, a: Entid) -> (&'r mut SingleValAttributeCache, &'r mut NonUniqueReverseAttributeCache) {
(self.single_vals.entry(a).or_insert_with(Default::default),
self.non_unique_reverse.entry(a).or_insert_with(Default::default))
}
#[inline]
fn both_m_nu<'r>(&'r mut self, a: Entid) -> (&'r mut MultiValAttributeCache, &'r mut NonUniqueReverseAttributeCache) {
(self.multi_vals.entry(a).or_insert_with(Default::default),
self.non_unique_reverse.entry(a).or_insert_with(Default::default))
}
// Process rows in `iter` that all share an attribute with the first. Leaves the iterator
// advanced to the first non-matching row.
fn accumulate_evs<I>(&mut self, schema: &Schema, iter: &mut Peekable<I>, replace_a: bool) where I: Iterator<Item=Aev> {
if let Some(&(a, _, _)) = iter.peek() {
if let Some(attribute) = schema.attribute_for_entid(a) {
let forward = self.is_attribute_cached_forward(a);
let reverse = self.is_attribute_cached_reverse(a);
let multi = attribute.multival;
let unique = attribute.unique.is_some();
match (forward, reverse, multi, unique) {
(true, true, true, true) => {
let (f, r) = self.both_m_u(a);
if replace_a {
f.clear();
r.clear();
}
accumulate_multi_val_unique_evs_both(a, f, r, iter);
},
(true, true, true, false) => {
let (f, r) = self.both_m_nu(a);
if replace_a {
f.clear();
r.clear();
}
accumulate_multi_val_non_unique_evs_both(a, f, r, iter);
},
(true, true, false, true) => {
let (f, r) = self.both_s_u(a);
if replace_a {
f.clear();
r.clear();
}
accumulate_single_val_unique_evs_both(a, f, r, iter);
},
(true, true, false, false) => {
let (f, r) = self.both_s_nu(a);
if replace_a {
f.clear();
r.clear();
}
accumulate_single_val_non_unique_evs_both(a, f, r, iter);
},
(true, false, true, _) => {
let f = self.fmc(a);
if replace_a {
f.clear();
}
accumulate_multi_val_evs_forward(a, f, iter)
},
(true, false, false, _) => {
let f = self.fsc(a);
if replace_a {
f.clear();
}
accumulate_single_val_evs_forward(a, f, iter)
},
(false, true, _, true) => {
let r = self.ruc(a);
if replace_a {
r.clear();
}
accumulate_unique_evs_reverse(a, r, iter);
},
(false, true, _, false) => {
let r = self.rnuc(a);
if replace_a {
r.clear();
}
accumulate_non_unique_evs_reverse(a, r, iter);
},
(false, false, _, _) => {
unreachable!(); // Must be cached in at least one direction!
},
}
}
}
}
fn add_to_cache<I>(&mut self, schema: &Schema, mut iter: Peekable<I>, replace_a: bool) -> Result<()> where I: Iterator<Item=Aev> {
while iter.peek().is_some() {
self.accumulate_evs(schema, &mut iter, replace_a);
}
Ok(())
}
fn clear_cache(&mut self) {
self.single_vals.clear();
self.multi_vals.clear();
self.unique_reverse.clear();
self.non_unique_reverse.clear();
}
fn unregister_all_attributes(&mut self) {
self.reverse_cached_attributes.clear();
self.forward_cached_attributes.clear();
self.clear_cache();
}
pub fn unregister_attribute<U>(&mut self, attribute: U)
where U: Into<Entid> {
let a = attribute.into();
self.reverse_cached_attributes.remove(&a);
self.forward_cached_attributes.remove(&a);
self.single_vals.remove(&a);
self.multi_vals.remove(&a);
self.unique_reverse.remove(&a);
self.non_unique_reverse.remove(&a);
}
}
impl CachedAttributes for AttributeCaches {
fn get_values_for_entid(&self, schema: &Schema, attribute: Entid, entid: Entid) -> Option<&Vec<TypedValue>> {
self.values_pairs(schema, attribute)
.and_then(|c| c.get(&entid))
}
fn get_value_for_entid(&self, schema: &Schema, attribute: Entid, entid: Entid) -> Option<&TypedValue> {
self.value_pairs(schema, attribute)
.and_then(|c| c.get(&entid))
}
fn is_attribute_cached_reverse(&self, attribute: Entid) -> bool {
self.reverse_cached_attributes.contains(&attribute)
}
fn is_attribute_cached_forward(&self, attribute: Entid) -> bool {
self.forward_cached_attributes.contains(&attribute)
}
fn get_entid_for_value(&self, attribute: Entid, value: &TypedValue) -> Option<Entid> {
if self.is_attribute_cached_reverse(attribute) {
self.unique_reverse.get(&attribute).and_then(|c| c.get_e(value))
} else {
None
}
}
fn get_entids_for_value(&self, attribute: Entid, value: &TypedValue) -> Option<&BTreeSet<Entid>> {
if self.is_attribute_cached_reverse(attribute) {
self.non_unique_reverse.get(&attribute).and_then(|c| c.get_es(value))
} else {
None
}
}
}
impl<K, V, VP> Cacheable for EagerCache<K, V, VP>
where K: Ord + Clone + Debug + ::std::hash::Hash,
V: Clone,
VP: ValueProvider<K, V> {
type Key = K;
type Value = V;
impl AttributeCaches {
fn values_pairs<U>(&self, schema: &Schema, attribute: U) -> Option<&BTreeMap<Entid, Vec<TypedValue>>>
where U: Into<Entid> {
let attribute = attribute.into();
schema.attribute_for_entid(attribute)
.and_then(|attr|
if attr.multival {
self.multi_vals
.get(&attribute)
.map(|c| &c.e_vs)
} else {
None
})
}
fn cache_values<'sqlite>(&mut self, sqlite: &'sqlite rusqlite::Connection) -> Result<()> {
// fetch results and add to cache
self.cache = self.value_provider.fetch_values(sqlite)?;
fn value_pairs<U>(&self, schema: &Schema, attribute: U) -> Option<&CacheMap<Entid, TypedValue>>
where U: Into<Entid> {
let attribute = attribute.into();
schema.attribute_for_entid(attribute)
.and_then(|attr|
if attr.multival {
None
} else {
self.single_vals
.get(&attribute)
.map(|c| &c.e_v)
})
}
}
#[derive(Debug, Default)]
pub struct SQLiteAttributeCache {
inner: AttributeCaches,
}
impl SQLiteAttributeCache {
pub fn register_forward<U>(&mut self, schema: &Schema, sqlite: &rusqlite::Connection, attribute: U) -> Result<()>
where U: Into<Entid> {
let a = attribute.into();
// The attribute must exist!
let _ = schema.attribute_for_entid(a).ok_or_else(|| ErrorKind::UnknownAttribute(a))?;
self.inner.forward_cached_attributes.insert(a);
self.repopulate(schema, sqlite, a)
}
pub fn register_reverse<U>(&mut self, schema: &Schema, sqlite: &rusqlite::Connection, attribute: U) -> Result<()>
where U: Into<Entid> {
let a = attribute.into();
// The attribute must exist!
let _ = schema.attribute_for_entid(a).ok_or_else(|| ErrorKind::UnknownAttribute(a))?;
self.inner.reverse_cached_attributes.insert(a);
self.repopulate(schema, sqlite, a)
}
pub fn register<U>(&mut self, schema: &Schema, sqlite: &rusqlite::Connection, attribute: U) -> Result<()>
where U: Into<Entid> {
let a = attribute.into();
// TODO: reverse-index unique by default?
self.inner.forward_cached_attributes.insert(a);
self.inner.reverse_cached_attributes.insert(a);
self.repopulate(schema, sqlite, a)
}
fn repopulate(&mut self, schema: &Schema, sqlite: &rusqlite::Connection, attribute: Entid) -> Result<()> {
let sql = "SELECT a, e, v, value_type_tag FROM datoms WHERE a = ? ORDER BY a ASC, e ASC";
let args: Vec<&rusqlite::types::ToSql> = vec![&attribute];
let mut stmt = sqlite.prepare(sql)?;
let rows = stmt.query_map(&args, row_to_aev as fn(&rusqlite::Row) -> Aev)?;
let aevs = AevRows {
rows: rows,
};
self.inner.add_to_cache(schema, aevs.peekable(), true)?;
Ok(())
}
fn get(&self, key: &Self::Key) -> Option<&Self::Value> {
self.cache.get(&key)
pub fn unregister<U>(&mut self, attribute: U)
where U: Into<Entid> {
self.inner.unregister_attribute(attribute);
}
pub fn unregister_all(&mut self) {
self.inner.unregister_all_attributes();
}
}
#[derive(Clone)]
pub struct AttributeValueProvider {
pub attribute: Entid,
}
impl CachedAttributes for SQLiteAttributeCache {
fn get_values_for_entid(&self, schema: &Schema, attribute: Entid, entid: Entid) -> Option<&Vec<TypedValue>> {
self.inner.get_values_for_entid(schema, attribute, entid)
}
impl ValueProvider<Entid, Vec<TypedValue>> for AttributeValueProvider {
fn fetch_values<'sqlite>(&mut self, sqlite: &'sqlite rusqlite::Connection) -> Result<CacheMap<Entid, Vec<TypedValue>>> {
let sql = "SELECT e, v, value_type_tag FROM datoms WHERE a = ? ORDER BY e ASC";
let mut stmt = sqlite.prepare(sql)?;
let value_iter = stmt.query_map(&[&self.attribute], |row| {
let entid: Entid = row.get(0);
let value_type_tag: i32 = row.get(2);
let value = TypedValue::from_sql_value_pair(row.get(1), value_type_tag).map(|x| x).unwrap();
(entid, value)
}).map_err(|e| e.into());
value_iter.map(|v| {
v.fold(CacheMap::new(), |mut map, row| {
let _ = row.map(|r| {
map.entry(r.0).or_insert(vec![]).push(r.1);
});
map
})
})
fn get_value_for_entid(&self, schema: &Schema, attribute: Entid, entid: Entid) -> Option<&TypedValue> {
self.inner.get_value_for_entid(schema, attribute, entid)
}
fn is_attribute_cached_reverse(&self, attribute: Entid) -> bool {
self.inner.is_attribute_cached_reverse(attribute)
}
fn is_attribute_cached_forward(&self, attribute: Entid) -> bool {
self.inner.is_attribute_cached_forward(attribute)
}
fn get_entids_for_value(&self, attribute: Entid, value: &TypedValue) -> Option<&BTreeSet<Entid>> {
self.inner.get_entids_for_value(attribute, value)
}
fn get_entid_for_value(&self, attribute: Entid, value: &TypedValue) -> Option<Entid> {
self.inner.get_entid_for_value(attribute, value)
}
}
impl SQLiteAttributeCache {
/// Intended for use from tests.
pub fn values_pairs<U>(&self, schema: &Schema, attribute: U) -> Option<&BTreeMap<Entid, Vec<TypedValue>>>
where U: Into<Entid> {
self.inner.values_pairs(schema, attribute)
}
/// Intended for use from tests.
pub fn value_pairs<U>(&self, schema: &Schema, attribute: U) -> Option<&BTreeMap<Entid, TypedValue>>
where U: Into<Entid> {
self.inner.value_pairs(schema, attribute)
}
}

View file

@ -916,15 +916,16 @@ impl MentatStoring for rusqlite::Connection {
// First, insert all fulltext string values.
// `fts_params` reference computed values in `block`.
let fts_params: Vec<&ToSql> = block.iter()
.filter(|&&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref _searchid)| {
value.is_some()
})
.flat_map(|&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref searchid)| {
// Avoid inner heap allocation.
once(value as &ToSql)
.chain(once(searchid as &ToSql))
}).collect();
let fts_params: Vec<&ToSql> =
block.iter()
.filter(|&&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref _searchid)| {
value.is_some()
})
.flat_map(|&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref searchid)| {
// Avoid inner heap allocation.
once(value as &ToSql)
.chain(once(searchid as &ToSql))
}).collect();
// TODO: make this maximally efficient. It's not terribly inefficient right now.
let fts_values: String = repeat_values(2, string_count);

View file

@ -87,5 +87,15 @@ error_chain! {
description("conflicting datoms in tx")
display("conflicting datoms in tx")
}
UnknownAttribute(attr: Entid) {
description("unknown attribute")
display("unknown attribute for entid: {}", attr)
}
CannotCacheNonUniqueAttributeInReverse(attr: Entid) {
description("cannot reverse-cache non-unique attribute")
display("cannot reverse-cache non-unique attribute: {}", attr)
}
}
}

View file

@ -10,7 +10,6 @@
use mentat_core::{
HasSchema,
Schema,
TypedValue,
ValueType,
};
@ -48,9 +47,11 @@ use types::{
SourceAlias,
};
use Known;
impl ConjoiningClauses {
#[allow(unused_variables)]
pub fn apply_fulltext<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> {
pub fn apply_fulltext(&mut self, known: Known, where_fn: WhereFn) -> Result<()> {
if where_fn.args.len() != 3 {
bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 3));
}
@ -96,6 +97,8 @@ impl ConjoiningClauses {
_ => bail!(ErrorKind::InvalidArgument(where_fn.operator.clone(), "source variable".into(), 0)),
}
let schema = known.schema;
// TODO: accept placeholder and set of attributes. Alternately, consider putting the search
// term before the attribute arguments and collect the (variadic) attributes into a set.
// let a: Entid = self.resolve_attribute_argument(&where_fn.operator, 1, args.next().unwrap())?;
@ -130,7 +133,7 @@ impl ConjoiningClauses {
if !attribute.fulltext {
// We can never get results from a non-fulltext attribute!
println!("Can't run fulltext on non-fulltext attribute {}.", a);
self.mark_known_empty(EmptyBecause::InvalidAttributeEntid(a));
self.mark_known_empty(EmptyBecause::NonFulltextAttribute(a));
return Ok(());
}
@ -258,6 +261,7 @@ mod testing {
use mentat_core::{
Attribute,
Schema,
ValueType,
};
@ -294,8 +298,10 @@ mod testing {
..Default::default()
});
let known = Known::for_schema(&schema);
let op = PlainSymbol::new("fulltext");
cc.apply_fulltext(&schema, WhereFn {
cc.apply_fulltext(known, WhereFn {
operator: op,
args: vec![
FnArg::SrcVar(SrcVar::DefaultSrc),
@ -353,7 +359,7 @@ mod testing {
let mut cc = ConjoiningClauses::default();
let op = PlainSymbol::new("fulltext");
cc.apply_fulltext(&schema, WhereFn {
cc.apply_fulltext(known, WhereFn {
operator: op,
args: vec![
FnArg::SrcVar(SrcVar::DefaultSrc),

View file

@ -43,6 +43,8 @@ use types::{
VariableColumn,
};
use Known;
impl ConjoiningClauses {
/// Take a relation: a matrix of values which will successively bind to named variables of
/// the provided types.
@ -113,7 +115,7 @@ impl ConjoiningClauses {
Ok(())
}
pub fn apply_ground<'s>(&mut self, schema: &'s Schema, where_fn: WhereFn) -> Result<()> {
pub fn apply_ground(&mut self, known: Known, where_fn: WhereFn) -> Result<()> {
if where_fn.args.len() != 1 {
bail!(ErrorKind::InvalidNumberOfArguments(where_fn.operator.clone(), where_fn.args.len(), 1));
}
@ -130,6 +132,8 @@ impl ConjoiningClauses {
bail!(ErrorKind::InvalidBinding(where_fn.operator.clone(), BindingError::RepeatedBoundVariable));
}
let schema = known.schema;
// Scalar and tuple bindings are a little special: because there's only one value,
// we can immediately substitute the value as a known value in the CC, additionally
// generating a WHERE clause if columns have already been bound.
@ -350,10 +354,12 @@ mod testing {
..Default::default()
});
let known = Known::for_schema(&schema);
// It's awkward enough to write these expansions that we give the details for the simplest
// case only. See the tests of the translator for more extensive (albeit looser) coverage.
let op = PlainSymbol::new("ground");
cc.apply_ground(&schema, WhereFn {
cc.apply_ground(known, WhereFn {
operator: op,
args: vec![
FnArg::EntidOrInteger(10),

View file

@ -13,9 +13,12 @@ use std::cmp;
use std::collections::{
BTreeMap,
BTreeSet,
VecDeque,
};
use std::collections::btree_map::Entry;
use std::collections::btree_map::{
Entry,
};
use std::fmt::{
Debug,
@ -37,14 +40,15 @@ use mentat_core::counter::RcCounter;
use mentat_query::{
NamespacedKeyword,
NonIntegerConstant,
Pattern,
PatternNonValuePlace,
PatternValuePlace,
Variable,
WhereClause,
};
#[cfg(test)]
use mentat_query::{
PatternNonValuePlace,
};
use errors::{
Error,
ErrorKind,
@ -59,7 +63,11 @@ use types::{
DatomsColumn,
DatomsTable,
EmptyBecause,
EvolvedNonValuePlace,
EvolvedPattern,
EvolvedValuePlace,
FulltextColumn,
PlaceOrEmpty,
QualifiedAlias,
QueryValue,
SourceAlias,
@ -85,6 +93,8 @@ use validate::{
pub use self::inputs::QueryInputs;
use Known;
// We do this a lot for errors.
trait RcCloned<T> {
fn cloned(&self) -> T;
@ -146,6 +156,8 @@ impl<K: Clone + Ord, V: Clone> Intersection<K> for BTreeMap<K, V> {
}
}
type VariableBindings = BTreeMap<Variable, TypedValue>;
/// A `ConjoiningClauses` (CC) is a collection of clauses that are combined with `JOIN`.
/// The topmost form in a query is a `ConjoiningClauses`.
///
@ -205,7 +217,7 @@ pub struct ConjoiningClauses {
///
/// and for `?val` provide `TypedValue::String("foo".to_string())`, the query will be known at
/// algebrizing time to be empty.
value_bindings: BTreeMap<Variable, TypedValue>,
value_bindings: VariableBindings,
/// A map from var to type. Whenever a var maps unambiguously to two different types, it cannot
/// yield results, so we don't represent that case here. If a var isn't present in the map, it
@ -535,6 +547,23 @@ impl ConjoiningClauses {
self.narrow_types_for_var(variable, ValueTypeSet::of_numeric_types());
}
pub fn can_constrain_var_to_type(&self, var: &Variable, this_type: ValueType) -> Option<EmptyBecause> {
self.can_constrain_var_to_types(var, ValueTypeSet::of_one(this_type))
}
fn can_constrain_var_to_types(&self, var: &Variable, these_types: ValueTypeSet) -> Option<EmptyBecause> {
if let Some(existing) = self.known_types.get(var) {
if existing.intersection(&these_types).is_empty() {
return Some(EmptyBecause::TypeMismatch {
var: var.clone(),
existing: existing.clone(),
desired: these_types,
});
}
}
None
}
/// Constrains the var if there's no existing type.
/// Marks as known-empty if it's impossible for this type to apply because there's a conflicting
/// type already known.
@ -673,17 +702,17 @@ impl ConjoiningClauses {
}
/// Ensure that the given place has the correct types to be a tx-id.
fn constrain_to_tx(&mut self, tx: &PatternNonValuePlace) {
fn constrain_to_tx(&mut self, tx: &EvolvedNonValuePlace) {
self.constrain_to_ref(tx);
}
/// Ensure that the given place can be an entity, and is congruent with existing types.
/// This is used for `entity` and `attribute` places in a pattern.
fn constrain_to_ref(&mut self, value: &PatternNonValuePlace) {
fn constrain_to_ref(&mut self, value: &EvolvedNonValuePlace) {
// If it's a variable, record that it has the right type.
// Ident or attribute resolution errors (the only other check we need to do) will be done
// by the caller.
if let &PatternNonValuePlace::Variable(ref v) = value {
if let &EvolvedNonValuePlace::Variable(ref v) = value {
self.constrain_var_to_type(v.clone(), ValueType::Ref)
}
}
@ -705,17 +734,17 @@ impl ConjoiningClauses {
schema.get_entid(&ident)
}
fn table_for_attribute_and_value<'s, 'a>(&self, attribute: &'s Attribute, value: &'a PatternValuePlace) -> ::std::result::Result<DatomsTable, EmptyBecause> {
fn table_for_attribute_and_value<'s, 'a>(&self, attribute: &'s Attribute, value: &'a EvolvedValuePlace) -> ::std::result::Result<DatomsTable, EmptyBecause> {
if attribute.fulltext {
match value {
&PatternValuePlace::Placeholder =>
&EvolvedValuePlace::Placeholder =>
Ok(DatomsTable::Datoms), // We don't need the value.
// TODO: an existing non-string binding can cause this pattern to fail.
&PatternValuePlace::Variable(_) =>
&EvolvedValuePlace::Variable(_) =>
Ok(DatomsTable::AllDatoms),
&PatternValuePlace::Constant(NonIntegerConstant::Text(_)) =>
&EvolvedValuePlace::Value(TypedValue::String(_)) =>
Ok(DatomsTable::AllDatoms),
_ => {
@ -729,7 +758,7 @@ impl ConjoiningClauses {
}
}
fn table_for_unknown_attribute<'s, 'a>(&self, value: &'a PatternValuePlace) -> ::std::result::Result<DatomsTable, EmptyBecause> {
fn table_for_unknown_attribute<'s, 'a>(&self, value: &'a EvolvedValuePlace) -> ::std::result::Result<DatomsTable, EmptyBecause> {
// If the value is known to be non-textual, we can simply use the regular datoms
// table (TODO: and exclude on `index_fulltext`!).
//
@ -742,7 +771,7 @@ impl ConjoiningClauses {
match value {
// TODO: see if the variable is projected, aggregated, or compared elsewhere in
// the query. If it's not, we don't need to use all_datoms here.
&PatternValuePlace::Variable(ref v) => {
&EvolvedValuePlace::Variable(ref v) => {
// If `required_types` and `known_types` don't exclude strings,
// we need to query `all_datoms`.
if self.required_types.get(v).map_or(true, |s| s.contains(ValueType::String)) &&
@ -752,7 +781,7 @@ impl ConjoiningClauses {
DatomsTable::Datoms
}
}
&PatternValuePlace::Constant(NonIntegerConstant::Text(_)) =>
&EvolvedValuePlace::Value(TypedValue::String(_)) =>
DatomsTable::AllDatoms,
_ =>
DatomsTable::Datoms,
@ -763,21 +792,17 @@ impl ConjoiningClauses {
/// If the attribute input or value binding doesn't name an attribute, or doesn't name an
/// attribute that is congruent with the supplied value, we return an `EmptyBecause`.
/// The caller is responsible for marking the CC as known-empty if this is a fatal failure.
fn table_for_places<'s, 'a>(&self, schema: &'s Schema, attribute: &'a PatternNonValuePlace, value: &'a PatternValuePlace) -> ::std::result::Result<DatomsTable, EmptyBecause> {
fn table_for_places<'s, 'a>(&self, schema: &'s Schema, attribute: &'a EvolvedNonValuePlace, value: &'a EvolvedValuePlace) -> ::std::result::Result<DatomsTable, EmptyBecause> {
match attribute {
&PatternNonValuePlace::Ident(ref kw) =>
schema.attribute_for_ident(kw)
.ok_or_else(|| EmptyBecause::InvalidAttributeIdent(kw.cloned()))
.and_then(|(attribute, _entid)| self.table_for_attribute_and_value(attribute, value)),
&PatternNonValuePlace::Entid(id) =>
&EvolvedNonValuePlace::Entid(id) =>
schema.attribute_for_entid(id)
.ok_or_else(|| EmptyBecause::InvalidAttributeEntid(id))
.and_then(|attribute| self.table_for_attribute_and_value(attribute, value)),
// TODO: In a prepared context, defer this decision until a second algebrizing phase.
// #278.
&PatternNonValuePlace::Placeholder =>
&EvolvedNonValuePlace::Placeholder =>
self.table_for_unknown_attribute(value),
&PatternNonValuePlace::Variable(ref v) => {
&EvolvedNonValuePlace::Variable(ref v) => {
// See if we have a binding for the variable.
match self.bound_value(v) {
// TODO: In a prepared context, defer this decision until a second algebrizing phase.
@ -786,7 +811,7 @@ impl ConjoiningClauses {
self.table_for_unknown_attribute(value),
Some(TypedValue::Ref(id)) =>
// Recurse: it's easy.
self.table_for_places(schema, &PatternNonValuePlace::Entid(id), value),
self.table_for_places(schema, &EvolvedNonValuePlace::Entid(id), value),
Some(TypedValue::Keyword(ref kw)) =>
// Don't recurse: avoid needing to clone the keyword.
schema.attribute_for_ident(kw)
@ -815,7 +840,7 @@ impl ConjoiningClauses {
/// This is a mutating method because it mutates the aliaser function!
/// Note that if this function decides that a pattern cannot match, it will flip
/// `empty_because`.
fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a Pattern) -> Option<SourceAlias> {
fn alias_table<'s, 'a>(&mut self, schema: &'s Schema, pattern: &'a EvolvedPattern) -> Option<SourceAlias> {
self.table_for_places(schema, &pattern.attribute, &pattern.value)
.map_err(|reason| {
self.mark_known_empty(reason);
@ -833,25 +858,22 @@ impl ConjoiningClauses {
}
}
fn get_attribute<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option<&'s Attribute> {
fn get_attribute<'s, 'a>(&self, schema: &'s Schema, pattern: &'a EvolvedPattern) -> Option<&'s Attribute> {
match pattern.attribute {
PatternNonValuePlace::Entid(id) =>
EvolvedNonValuePlace::Entid(id) =>
// We know this one is known if the attribute lookup succeeds…
schema.attribute_for_entid(id),
PatternNonValuePlace::Ident(ref kw) =>
schema.attribute_for_ident(kw).map(|(a, _id)| a),
PatternNonValuePlace::Variable(ref var) =>
EvolvedNonValuePlace::Variable(ref var) =>
// If the pattern has a variable, we've already determined that the binding -- if
// any -- is acceptable and yields a table. Here, simply look to see if it names
// an attribute so we can find out the type.
self.value_bindings.get(var)
.and_then(|val| self.get_attribute_for_value(schema, val)),
_ =>
None,
EvolvedNonValuePlace::Placeholder => None,
}
}
fn get_value_type<'s, 'a>(&self, schema: &'s Schema, pattern: &'a Pattern) -> Option<ValueType> {
fn get_value_type<'s, 'a>(&self, schema: &'s Schema, pattern: &'a EvolvedPattern) -> Option<ValueType> {
self.get_attribute(schema, pattern).map(|a| a.value_type)
}
}
@ -984,43 +1006,83 @@ impl ConjoiningClauses {
}
impl ConjoiningClauses {
pub fn apply_clauses(&mut self, schema: &Schema, where_clauses: Vec<WhereClause>) -> Result<()> {
fn apply_evolved_patterns(&mut self, known: Known, mut patterns: VecDeque<EvolvedPattern>) -> Result<()> {
while let Some(pattern) = patterns.pop_front() {
match self.evolve_pattern(known, pattern) {
PlaceOrEmpty::Place(re_evolved) => self.apply_pattern(known, re_evolved),
PlaceOrEmpty::Empty(because) => {
self.mark_known_empty(because);
patterns.clear();
},
}
}
Ok(())
}
pub fn apply_clauses(&mut self, known: Known, where_clauses: Vec<WhereClause>) -> Result<()> {
// We apply (top level) type predicates first as an optimization.
for clause in where_clauses.iter() {
if let &WhereClause::TypeAnnotation(ref anno) = clause {
self.apply_type_anno(anno)?;
}
}
// Then we apply everything else.
// Note that we collect contiguous runs of patterns so that we can evolve them
// together to take advantage of mutual partial evaluation.
let mut remaining = where_clauses.len();
let mut patterns: VecDeque<EvolvedPattern> = VecDeque::with_capacity(remaining);
for clause in where_clauses {
remaining -= 1;
if let &WhereClause::TypeAnnotation(_) = &clause {
continue;
}
self.apply_clause(schema, clause)?;
match clause {
WhereClause::Pattern(p) => {
match self.make_evolved_pattern(known, p) {
PlaceOrEmpty::Place(evolved) => patterns.push_back(evolved),
PlaceOrEmpty::Empty(because) => {
self.mark_known_empty(because);
return Ok(());
}
}
},
_ => {
if !patterns.is_empty() {
self.apply_evolved_patterns(known, patterns)?;
patterns = VecDeque::with_capacity(remaining);
}
self.apply_clause(known, clause)?;
},
}
}
Ok(())
self.apply_evolved_patterns(known, patterns)
}
// This is here, rather than in `lib.rs`, because it's recursive: `or` can contain `or`,
// and so on.
pub fn apply_clause(&mut self, schema: &Schema, where_clause: WhereClause) -> Result<()> {
pub fn apply_clause(&mut self, known: Known, where_clause: WhereClause) -> Result<()> {
match where_clause {
WhereClause::Pattern(p) => {
self.apply_pattern(schema, p);
match self.make_evolved_pattern(known, p) {
PlaceOrEmpty::Place(evolved) => self.apply_pattern(known, evolved),
PlaceOrEmpty::Empty(because) => self.mark_known_empty(because),
}
Ok(())
},
WhereClause::Pred(p) => {
self.apply_predicate(schema, p)
self.apply_predicate(known, p)
},
WhereClause::WhereFn(f) => {
self.apply_where_fn(schema, f)
self.apply_where_fn(known, f)
},
WhereClause::OrJoin(o) => {
validate_or_join(&o)?;
self.apply_or_join(schema, o)
self.apply_or_join(known, o)
},
WhereClause::NotJoin(n) => {
validate_not_join(&n)?;
self.apply_not_join(schema, n)
self.apply_not_join(known, n)
},
WhereClause::TypeAnnotation(anno) => {
self.apply_type_anno(&anno)

View file

@ -8,8 +8,6 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use mentat_core::Schema;
use mentat_query::{
ContainsVariables,
NotJoin,
@ -28,8 +26,10 @@ use types::{
ComputedTable,
};
use Known;
impl ConjoiningClauses {
pub fn apply_not_join(&mut self, schema: &Schema, not_join: NotJoin) -> Result<()> {
pub fn apply_not_join(&mut self, known: Known, not_join: NotJoin) -> Result<()> {
let unified = match not_join.unify_vars {
UnifyVars::Implicit => not_join.collect_mentioned_variables(),
UnifyVars::Explicit(vs) => vs,
@ -49,7 +49,7 @@ impl ConjoiningClauses {
}
}
template.apply_clauses(&schema, not_join.clauses)?;
template.apply_clauses(known, not_join.clauses)?;
if template.is_known_empty() {
return Ok(());
@ -70,6 +70,12 @@ impl ConjoiningClauses {
return Ok(());
}
// If we don't impose any constraints on the output, we might as well
// not exist.
if template.wheres.is_empty() {
return Ok(());
}
let subquery = ComputedTable::Subquery(template);
self.wheres.add_intersection(ColumnConstraint::NotExists(subquery));
@ -133,13 +139,15 @@ mod testing {
};
fn alg(schema: &Schema, input: &str) -> ConjoiningClauses {
let known = Known::for_schema(schema);
let parsed = parse_find_string(input).expect("parse failed");
algebrize(schema.into(), parsed).expect("algebrize failed").cc
algebrize(known, parsed).expect("algebrize failed").cc
}
fn alg_with_inputs(schema: &Schema, input: &str, inputs: QueryInputs) -> ConjoiningClauses {
let known = Known::for_schema(schema);
let parsed = parse_find_string(input).expect("parse failed");
algebrize_with_inputs(schema.into(), parsed, 0, inputs).expect("algebrize failed").cc
algebrize_with_inputs(known, parsed, 0, inputs).expect("algebrize failed").cc
}
fn prepopulated_schema() -> Schema {
@ -292,7 +300,7 @@ mod testing {
let age = QueryValue::Entid(68);
let john = QueryValue::TypedValue(TypedValue::typed_string("John"));
let eleven = QueryValue::PrimitiveLong(11);
let eleven = QueryValue::TypedValue(TypedValue::Long(11));
let mut subquery = ConjoiningClauses::default();
subquery.from = vec![SourceAlias(DatomsTable::Datoms, d3)];
@ -541,12 +549,13 @@ mod testing {
#[test]
fn test_unbound_var_fails() {
let schema = prepopulated_schema();
let known = Known::for_schema(&schema);