mentat/core/src/lib.rs

326 lines
11 KiB
Rust
Raw Normal View History

// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
2017-02-15 23:53:13 +00:00
extern crate edn;
extern crate ordered_float;
use std::collections::BTreeMap;
use self::ordered_float::OrderedFloat;
2017-02-15 23:53:13 +00:00
use self::edn::NamespacedKeyword;
/// Core types defining a Mentat knowledge base.
/// Represents one entid in the entid space.
///
/// Per https://www.sqlite.org/datatype3.html (see also http://stackoverflow.com/a/8499544), SQLite
/// stores signed integers up to 64 bits in size. Since u32 is not appropriate for our use case, we
/// use i64 rather than manually truncating u64 to u63 and casting to i64 throughout the codebase.
pub type Entid = i64;
/// The attribute of each Mentat assertion has a :db/valueType constraining the value to a
/// particular set. Mentat recognizes the following :db/valueType values.
2017-02-10 00:59:46 +00:00
#[derive(Clone,Copy,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
pub enum ValueType {
Ref,
Boolean,
Instant,
Long,
Double,
String,
Keyword,
}
/// Represents a Mentat value in a particular value set.
// TODO: expand to include :db.type/{instant,url,uuid}.
// TODO: BigInt?
#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
pub enum TypedValue {
Ref(Entid),
Boolean(bool),
Long(i64),
Double(OrderedFloat<f64>),
// TODO: &str throughout?
String(String),
Keyword(NamespacedKeyword),
}
impl TypedValue {
/// Returns true if the provided type is `Some` and matches this value's type, or if the
/// provided type is `None`.
#[inline]
pub fn is_congruent_with<T: Into<Option<ValueType>>>(&self, t: T) -> bool {
t.into().map_or(true, |x| self.matches_type(x))
}
#[inline]
pub fn matches_type(&self, t: ValueType) -> bool {
self.value_type() == t
}
pub fn value_type(&self) -> ValueType {
match self {
&TypedValue::Ref(_) => ValueType::Ref,
&TypedValue::Boolean(_) => ValueType::Boolean,
&TypedValue::Long(_) => ValueType::Long,
&TypedValue::Double(_) => ValueType::Double,
&TypedValue::String(_) => ValueType::String,
&TypedValue::Keyword(_) => ValueType::Keyword,
}
}
}
// Put this here rather than in `db` simply because it's widely needed.
pub trait SQLValueType {
fn value_type_tag(&self) -> i32;
fn accommodates_integer(&self, int: i64) -> bool;
}
impl SQLValueType for ValueType {
fn value_type_tag(&self) -> i32 {
match *self {
ValueType::Ref => 0,
ValueType::Boolean => 1,
ValueType::Instant => 4,
// SQLite distinguishes integral from decimal types, allowing long and double to share a tag.
ValueType::Long => 5,
ValueType::Double => 5,
ValueType::String => 10,
ValueType::Keyword => 13,
}
}
/// Returns true if the provided integer is in the SQLite value space of this type. For
/// example, `1` is how we encode `true`.
///
/// ```
/// use mentat_core::{ValueType, SQLValueType};
/// assert!(ValueType::Boolean.accommodates_integer(1));
/// assert!(!ValueType::Boolean.accommodates_integer(-1));
/// assert!(!ValueType::Boolean.accommodates_integer(10));
/// assert!(!ValueType::String.accommodates_integer(10));
/// ```
fn accommodates_integer(&self, int: i64) -> bool {
use ValueType::*;
match *self {
Instant | Long | Double => true,
Ref => int >= 0,
Boolean => (int == 0) || (int == 1),
ValueType::String => false,
Keyword => false,
}
}
}
#[test]
fn test_typed_value() {
assert!(TypedValue::Boolean(false).is_congruent_with(None));
assert!(TypedValue::Boolean(false).is_congruent_with(ValueType::Boolean));
assert!(!TypedValue::String("foo".to_string()).is_congruent_with(ValueType::Boolean));
assert!(TypedValue::String("foo".to_string()).is_congruent_with(ValueType::String));
assert!(TypedValue::String("foo".to_string()).is_congruent_with(None));
}
/// Bit flags used in `flags0` column in temporary tables created during search,
/// such as the `search_results`, `inexact_searches` and `exact_searches` tables.
/// When moving to a more concrete table, such as `datoms`, they are expanded out
/// via these flags and put into their own column rather than a bit field.
pub enum AttributeBitFlags {
IndexAVET = 1 << 0,
IndexVAET = 1 << 1,
IndexFulltext = 1 << 2,
UniqueValue = 1 << 3,
}
/// A Mentat schema attribute has a value type and several other flags determining how assertions
/// with the attribute are interpreted.
///
/// TODO: consider packing this into a bitfield or similar.
#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)]
pub struct Attribute {
/// The associated value type, i.e., `:db/valueType`?
pub value_type: ValueType,
/// `true` if this attribute is multi-valued, i.e., it is `:db/cardinality
/// :db.cardinality/many`. `false` if this attribute is single-valued (the default), i.e., it
/// is `:db/cardinality :db.cardinality/one`.
pub multival: bool,
/// `true` if this attribute is unique-value, i.e., it is `:db/unique :db.unique/value`.
///
/// *Unique-value* means that there is at most one assertion with the attribute and a
/// particular value in the datom store.
pub unique_value: bool,
/// `true` if this attribute is unique-identity, i.e., it is `:db/unique :db.unique/identity`.
///
/// Unique-identity attributes always have value type `Ref`.
///
/// *Unique-identity* means that the attribute is *unique-value* and that they can be used in
/// lookup-refs and will automatically upsert where appropriate.
pub unique_identity: bool,
/// `true` if this attribute is automatically indexed, i.e., it is `:db/indexing true`.
pub index: bool,
/// `true` if this attribute is automatically fulltext indexed, i.e., it is `:db/fulltext true`.
///
/// Fulltext attributes always have string values.
pub fulltext: bool,
/// `true` if this attribute is a component, i.e., it is `:db/isComponent true`.
///
/// Component attributes always have value type `Ref`.
///
/// They are used to compose entities from component sub-entities: they are fetched recursively
/// by pull expressions, and they are automatically recursively deleted where appropriate.
pub component: bool,
}
impl Attribute {
/// Combine several attribute flags into a bitfield used in temporary search tables.
pub fn flags(&self) -> u8 {
let mut flags: u8 = 0;
if self.index {
flags |= AttributeBitFlags::IndexAVET as u8;
}
if self.value_type == ValueType::Ref {
flags |= AttributeBitFlags::IndexVAET as u8;
}
if self.fulltext {
flags |= AttributeBitFlags::IndexFulltext as u8;
}
if self.unique_value {
flags |= AttributeBitFlags::UniqueValue as u8;
}
flags
}
}
impl Default for Attribute {
fn default() -> Attribute {
Attribute {
// There's no particular reason to favour one value type, so Ref it is.
value_type: ValueType::Ref,
fulltext: false,
index: false,
multival: false,
unique_value: false,
unique_identity: false,
component: false,
}
}
}
/// Map `NamespacedKeyword` idents (`:db/ident`) to positive integer entids (`1`).
pub type IdentMap = BTreeMap<NamespacedKeyword, Entid>;
/// Map positive integer entids (`1`) to `NamespacedKeyword` idents (`:db/ident`).
pub type EntidMap = BTreeMap<Entid, NamespacedKeyword>;
/// Map attribute entids to `Attribute` instances.
pub type SchemaMap = BTreeMap<Entid, Attribute>;
/// Represents a Mentat schema.
///
/// Maintains the mapping between string idents and positive integer entids; and exposes the schema
/// flags associated to a given entid (equivalently, ident).
///
/// TODO: consider a single bi-directional map instead of separate ident->entid and entid->ident
/// maps.
#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialOrd, PartialEq)]
pub struct Schema {
/// Map entid->ident.
///
/// Invariant: is the inverse map of `ident_map`.
pub entid_map: EntidMap,
/// Map ident->entid.
///
/// Invariant: is the inverse map of `entid_map`.
pub ident_map: IdentMap,
/// Map entid->attribute flags.
///
/// Invariant: key-set is the same as the key-set of `entid_map` (equivalently, the value-set of
/// `ident_map`).
pub schema_map: SchemaMap,
}
impl Schema {
pub fn get_ident(&self, x: Entid) -> Option<&NamespacedKeyword> {
self.entid_map.get(&x)
}
pub fn get_entid(&self, x: &NamespacedKeyword) -> Option<Entid> {
self.ident_map.get(x).map(|x| *x)
}
pub fn attribute_for_entid(&self, x: Entid) -> Option<&Attribute> {
self.schema_map.get(&x)
}
2017-02-15 23:53:13 +00:00
pub fn attribute_for_ident(&self, ident: &NamespacedKeyword) -> Option<&Attribute> {
self.get_entid(&ident)
2017-02-15 23:53:13 +00:00
.and_then(|x| self.attribute_for_entid(x))
}
2017-02-14 03:39:47 +00:00
/// Return true if the provided entid identifies an attribute in this schema.
pub fn is_attribute(&self, x: Entid) -> bool {
self.schema_map.contains_key(&x)
}
2017-02-14 03:39:47 +00:00
/// Return true if the provided ident identifies an attribute in this schema.
pub fn identifies_attribute(&self, x: &NamespacedKeyword) -> bool {
self.get_entid(x).map(|e| self.is_attribute(e)).unwrap_or(false)
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_attribute_flags() {
let attr1 = Attribute {
index: true,
value_type: ValueType::Ref,
fulltext: false,
unique_value: false,
multival: false,
unique_identity: false,
component: false,
};
assert!(attr1.flags() & AttributeBitFlags::IndexAVET as u8 != 0);
assert!(attr1.flags() & AttributeBitFlags::IndexVAET as u8 != 0);
assert!(attr1.flags() & AttributeBitFlags::IndexFulltext as u8 == 0);
assert!(attr1.flags() & AttributeBitFlags::UniqueValue as u8 == 0);
let attr2 = Attribute {
index: false,
value_type: ValueType::Boolean,
fulltext: true,
unique_value: true,
multival: false,
unique_identity: false,
component: false,
};
assert!(attr2.flags() & AttributeBitFlags::IndexAVET as u8 == 0);
assert!(attr2.flags() & AttributeBitFlags::IndexVAET as u8 == 0);
assert!(attr2.flags() & AttributeBitFlags::IndexFulltext as u8 != 0);
assert!(attr2.flags() & AttributeBitFlags::UniqueValue as u8 != 0);
}
}
Implement upsert resolution algorithm. (#186, #283). r=rnewman, f=jsantell * Pre: Implement batch [a v] pair lookup. * Pre: Add InternSet for sharing ref-counted handles to large values. * Pre: Derive more for Entity. * Pre: Return DB from creating; return TxReport from transact. I explicitly am not supporting opening existing databases yet, let alone upgrading databases from earlier versions. That can follow fast once basic transactions are supported. * Pre: Parse string temporary ID entities; remove ValueOrLookupRef. This adds TempId entities, but we can't disambiguate String temporary IDs from values without the use of the schema, so there's no new value branch. Similarly, we can't disambiguate lookup-ref values from two element list values without a schema, so we remove this entirely. We'll handle the ambiguity later in the transactor. * Persist partitions to SQL store; allocate transaction ID. (#186) * Post: Test upserting with vectors. This converts an existing test to EDN: https://github.com/mozilla/mentat/blob/84a80f40f5c888f8452d07bd15f3b5fba49d3963/test/datomish/db_test.cljc#L193. * Implement tempid upsert resolution algorithm. (#184) * Post: Separate Tx out of DB. This is very preliminary, since we don't have a real connection type to manage transactions and their metadata yet. * Post: Comment on implementation choices in the transactor. * Review comment: Put long use lists on separate lines. * Review comment: Accept String: Borrow<S> instead of just String. * Review comment: Address nits.
2017-02-15 00:50:40 +00:00
pub mod intern_set;