From 1818e0b98e48baafb90b1e90ec7be871b220d9f2 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Mon, 16 Apr 2018 12:54:36 -0700 Subject: [PATCH] Split mentat_core TypedValue code into separate files for clarity. --- core/src/lib.rs | 675 ++----------------------------------- core/src/sql_types.rs | 139 ++++++++ core/src/types.rs | 392 +++++++++++++++++++++ core/src/value_type_set.rs | 186 ++++++++++ 4 files changed, 739 insertions(+), 653 deletions(-) create mode 100644 core/src/sql_types.rs create mode 100644 core/src/types.rs create mode 100644 core/src/value_type_set.rs diff --git a/core/src/lib.rs b/core/src/lib.rs index b25ac842..9d381797 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -27,16 +27,8 @@ mod cache; use std::collections::{ BTreeMap, - BTreeSet, }; -use std::fmt; -use std::rc::Rc; - -use enum_set::EnumSet; - -use self::ordered_float::OrderedFloat; - pub use uuid::Uuid; pub use chrono::{ @@ -57,651 +49,28 @@ pub use cache::{ }; /// Core types defining a Mentat knowledge base. - -/// Represents one entid in the entid space. -/// -/// Per https://www.sqlite.org/datatype3.html (see also http://stackoverflow.com/a/8499544), SQLite -/// stores signed integers up to 64 bits in size. Since u32 is not appropriate for our use case, we -/// use i64 rather than manually truncating u64 to u63 and casting to i64 throughout the codebase. -pub type Entid = i64; - -/// An entid that's either already in the store, or newly allocated to a tempid. -/// TODO: we'd like to link this in some way to the lifetime of a particular PartitionMap. -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)] -pub struct KnownEntid(pub Entid); - -impl From for Entid { - fn from(k: KnownEntid) -> Entid { - k.0 - } -} - -impl From for TypedValue { - fn from(k: KnownEntid) -> TypedValue { - TypedValue::Ref(k.0) - } -} - -/// The attribute of each Mentat assertion has a :db/valueType constraining the value to a -/// particular set. Mentat recognizes the following :db/valueType values. -#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] -#[repr(u32)] -pub enum ValueType { - Ref, - Boolean, - Instant, - Long, - Double, - String, - Keyword, - Uuid, -} - -pub type ValueTypeTag = i32; - -impl ValueType { - pub fn all_enums() -> EnumSet { - // TODO: lazy_static. - let mut s = EnumSet::new(); - s.insert(ValueType::Ref); - s.insert(ValueType::Boolean); - s.insert(ValueType::Instant); - s.insert(ValueType::Long); - s.insert(ValueType::Double); - s.insert(ValueType::String); - s.insert(ValueType::Keyword); - s.insert(ValueType::Uuid); - s - } -} - - -impl enum_set::CLike for ValueType { - fn to_u32(&self) -> u32 { - *self as u32 - } - - unsafe fn from_u32(v: u32) -> ValueType { - std::mem::transmute(v) - } -} - -impl ValueType { - pub fn into_keyword(self) -> NamespacedKeyword { - NamespacedKeyword::new("db.type", match self { - ValueType::Ref => "ref", - ValueType::Boolean => "boolean", - ValueType::Instant => "instant", - ValueType::Long => "long", - ValueType::Double => "double", - ValueType::String => "string", - ValueType::Keyword => "keyword", - ValueType::Uuid => "uuid", - }) - } - - pub fn into_typed_value(self) -> TypedValue { - TypedValue::typed_ns_keyword("db.type", match self { - ValueType::Ref => "ref", - ValueType::Boolean => "boolean", - ValueType::Instant => "instant", - ValueType::Long => "long", - ValueType::Double => "double", - ValueType::String => "string", - ValueType::Keyword => "keyword", - ValueType::Uuid => "uuid", - }) - } - - pub fn into_edn_value(self) -> edn::Value { - match self { - ValueType::Ref => values::DB_TYPE_REF.clone(), - ValueType::Boolean => values::DB_TYPE_BOOLEAN.clone(), - ValueType::Instant => values::DB_TYPE_INSTANT.clone(), - ValueType::Long => values::DB_TYPE_LONG.clone(), - ValueType::Double => values::DB_TYPE_DOUBLE.clone(), - ValueType::String => values::DB_TYPE_STRING.clone(), - ValueType::Keyword => values::DB_TYPE_KEYWORD.clone(), - ValueType::Uuid => values::DB_TYPE_UUID.clone(), - } - } -} - -impl fmt::Display for ValueType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", match *self { - ValueType::Ref => ":db.type/ref", - ValueType::Boolean => ":db.type/boolean", - ValueType::Instant => ":db.type/instant", - ValueType::Long => ":db.type/long", - ValueType::Double => ":db.type/double", - ValueType::String => ":db.type/string", - ValueType::Keyword => ":db.type/keyword", - ValueType::Uuid => ":db.type/uuid", - }) - } -} - -/// Represents a Mentat value in a particular value set. -// TODO: expand to include :db.type/uri. https://github.com/mozilla/mentat/issues/201 -// TODO: JSON data type? https://github.com/mozilla/mentat/issues/31 -// TODO: BigInt? Bytes? -#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq,Serialize,Deserialize)] -pub enum TypedValue { - Ref(Entid), - Boolean(bool), - Long(i64), - Double(OrderedFloat), - Instant(DateTime), // Use `into()` to ensure truncation. - // TODO: &str throughout? - String(Rc), - Keyword(Rc), - Uuid(Uuid), // It's only 128 bits, so this should be acceptable to clone. -} - -impl TypedValue { - /// Returns true if the provided type is `Some` and matches this value's type, or if the - /// provided type is `None`. - #[inline] - pub fn is_congruent_with>>(&self, t: T) -> bool { - t.into().map_or(true, |x| self.matches_type(x)) - } - - #[inline] - pub fn matches_type(&self, t: ValueType) -> bool { - self.value_type() == t - } - - pub fn value_type(&self) -> ValueType { - match self { - &TypedValue::Ref(_) => ValueType::Ref, - &TypedValue::Boolean(_) => ValueType::Boolean, - &TypedValue::Long(_) => ValueType::Long, - &TypedValue::Instant(_) => ValueType::Instant, - &TypedValue::Double(_) => ValueType::Double, - &TypedValue::String(_) => ValueType::String, - &TypedValue::Keyword(_) => ValueType::Keyword, - &TypedValue::Uuid(_) => ValueType::Uuid, - } - } - - /// Construct a new `TypedValue::Keyword` instance by cloning the provided - /// values and wrapping them in a new `Rc`. This is expensive, so this might - /// be best limited to tests. - pub fn typed_ns_keyword(ns: &str, name: &str) -> TypedValue { - TypedValue::Keyword(Rc::new(NamespacedKeyword::new(ns, name))) - } - - /// Construct a new `TypedValue::String` instance by cloning the provided - /// value and wrapping it in a new `Rc`. This is expensive, so this might - /// be best limited to tests. - pub fn typed_string(s: &str) -> TypedValue { - TypedValue::String(Rc::new(s.to_string())) - } - - pub fn current_instant() -> TypedValue { - Utc::now().into() - } - - /// Construct a new `TypedValue::Instant` instance from the provided - /// microsecond timestamp. - pub fn instant(micros: i64) -> TypedValue { - DateTime::::from_micros(micros).into() - } -} - -trait MicrosecondPrecision { - /// Truncate the provided `DateTime` to microsecond precision. - fn microsecond_precision(self) -> Self; -} - -impl MicrosecondPrecision for DateTime { - fn microsecond_precision(self) -> DateTime { - let nanoseconds = self.nanosecond(); - if nanoseconds % 1000 == 0 { - return self; - } - let microseconds = nanoseconds / 1000; - let truncated = microseconds * 1000; - self.with_nanosecond(truncated).expect("valid timestamp") - } -} - -/// Return the current time as a UTC `DateTime` instance with microsecond precision. -pub fn now() -> DateTime { - Utc::now().microsecond_precision() -} - -// We don't do From or From 'cos it's ambiguous. - -impl From for TypedValue { - fn from(value: bool) -> TypedValue { - TypedValue::Boolean(value) - } -} - -/// Truncate the provided `DateTime` to microsecond precision, and return the corresponding -/// `TypedValue::Instant`. -impl From> for TypedValue { - fn from(value: DateTime) -> TypedValue { - TypedValue::Instant(value.microsecond_precision()) - } -} - -impl From for TypedValue { - fn from(value: Uuid) -> TypedValue { - TypedValue::Uuid(value) - } -} - -impl<'a> From<&'a str> for TypedValue { - fn from(value: &'a str) -> TypedValue { - TypedValue::String(Rc::new(value.to_string())) - } -} - -impl From for TypedValue { - fn from(value: String) -> TypedValue { - TypedValue::String(Rc::new(value)) - } -} - -impl From for TypedValue { - fn from(value: NamespacedKeyword) -> TypedValue { - TypedValue::Keyword(Rc::new(value)) - } -} - -impl From for TypedValue { - fn from(value: u32) -> TypedValue { - TypedValue::Long(value as i64) - } -} - -impl From for TypedValue { - fn from(value: i32) -> TypedValue { - TypedValue::Long(value as i64) - } -} - -impl From for TypedValue { - fn from(value: f64) -> TypedValue { - TypedValue::Double(OrderedFloat(value)) - } -} - -impl TypedValue { - pub fn into_known_entid(self) -> Option { - match self { - TypedValue::Ref(v) => Some(KnownEntid(v)), - _ => None, - } - } - - pub fn into_entid(self) -> Option { - match self { - TypedValue::Ref(v) => Some(v), - _ => None, - } - } - - pub fn into_kw(self) -> Option> { - match self { - TypedValue::Keyword(v) => Some(v), - _ => None, - } - } - - pub fn into_boolean(self) -> Option { - match self { - TypedValue::Boolean(v) => Some(v), - _ => None, - } - } - - pub fn into_long(self) -> Option { - match self { - TypedValue::Long(v) => Some(v), - _ => None, - } - } - - pub fn into_double(self) -> Option { - match self { - TypedValue::Double(v) => Some(v.into_inner()), - _ => None, - } - } - - pub fn into_instant(self) -> Option> { - match self { - TypedValue::Instant(v) => Some(v), - _ => None, - } - } - - pub fn into_timestamp(self) -> Option { - match self { - TypedValue::Instant(v) => Some(v.timestamp()), - _ => None, - } - } - - pub fn into_string(self) -> Option> { - match self { - TypedValue::String(v) => Some(v), - _ => None, - } - } - - pub fn into_uuid(self) -> Option { - match self { - TypedValue::Uuid(v) => Some(v), - _ => None, - } - } - - pub fn into_uuid_string(self) -> Option { - match self { - TypedValue::Uuid(v) => Some(v.hyphenated().to_string()), - _ => None, - } - } -} - -/// Type safe representation of the possible return values from SQLite's `typeof` -#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] -pub enum SQLTypeAffinity { - Null, // "null" - Integer, // "integer" - Real, // "real" - Text, // "text" - Blob, // "blob" -} - -// Put this here rather than in `db` simply because it's widely needed. -pub trait SQLValueType { - fn value_type_tag(&self) -> ValueTypeTag; - fn accommodates_integer(&self, int: i64) -> bool; - - /// Return a pair of the ValueTypeTag for this value type, and the SQLTypeAffinity required - /// to distinguish it from any other types that share the same tag. - /// - /// Background: The tag alone is not enough to determine the type of a value, since multiple - /// ValueTypes may share the same tag (for example, ValueType::Long and ValueType::Double). - /// However, each ValueType can be determined by checking both the tag and the type's affinity. - fn sql_representation(&self) -> (ValueTypeTag, Option); -} - -impl SQLValueType for ValueType { - fn sql_representation(&self) -> (ValueTypeTag, Option) { - match *self { - ValueType::Ref => (0, None), - ValueType::Boolean => (1, None), - ValueType::Instant => (4, None), - - // SQLite distinguishes integral from decimal types, allowing long and double to share a tag. - ValueType::Long => (5, Some(SQLTypeAffinity::Integer)), - ValueType::Double => (5, Some(SQLTypeAffinity::Real)), - ValueType::String => (10, None), - ValueType::Uuid => (11, None), - ValueType::Keyword => (13, None), - } - } - - #[inline] - fn value_type_tag(&self) -> ValueTypeTag { - self.sql_representation().0 - } - - /// Returns true if the provided integer is in the SQLite value space of this type. For - /// example, `1` is how we encode `true`. - /// - /// ``` - /// use mentat_core::{ValueType, SQLValueType}; - /// assert!(!ValueType::Instant.accommodates_integer(1493399581314)); - /// assert!(!ValueType::Instant.accommodates_integer(1493399581314000)); - /// assert!(ValueType::Boolean.accommodates_integer(1)); - /// assert!(!ValueType::Boolean.accommodates_integer(-1)); - /// assert!(!ValueType::Boolean.accommodates_integer(10)); - /// assert!(!ValueType::String.accommodates_integer(10)); - /// ``` - fn accommodates_integer(&self, int: i64) -> bool { - use ValueType::*; - match *self { - Instant => false, // Always use #inst. - Long | Double => true, - Ref => int >= 0, - Boolean => (int == 0) || (int == 1), - ValueType::String => false, - Keyword => false, - Uuid => false, - } - } -} - -trait EnumSetExtensions { - /// Return a set containing both `x` and `y`. - fn of_both(x: T, y: T) -> EnumSet; - - /// Return a clone of `self` with `y` added. - fn with(&self, y: T) -> EnumSet; -} - -impl EnumSetExtensions for EnumSet { - /// Return a set containing both `x` and `y`. - fn of_both(x: T, y: T) -> Self { - let mut o = EnumSet::new(); - o.insert(x); - o.insert(y); - o - } - - /// Return a clone of `self` with `y` added. - fn with(&self, y: T) -> EnumSet { - let mut o = self.clone(); - o.insert(y); - o - } -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct ValueTypeSet(pub EnumSet); - -impl Default for ValueTypeSet { - fn default() -> ValueTypeSet { - ValueTypeSet::any() - } -} - -impl ValueTypeSet { - pub fn any() -> ValueTypeSet { - ValueTypeSet(ValueType::all_enums()) - } - - pub fn none() -> ValueTypeSet { - ValueTypeSet(EnumSet::new()) - } - - /// Return a set containing only `t`. - pub fn of_one(t: ValueType) -> ValueTypeSet { - let mut s = EnumSet::new(); - s.insert(t); - ValueTypeSet(s) - } - - /// Return a set containing `Double` and `Long`. - pub fn of_numeric_types() -> ValueTypeSet { - ValueTypeSet(EnumSet::of_both(ValueType::Double, ValueType::Long)) - } - - /// Return a set containing `Double`, `Long`, and `Instant`. - pub fn of_numeric_and_instant_types() -> ValueTypeSet { - let mut s = EnumSet::new(); - s.insert(ValueType::Double); - s.insert(ValueType::Long); - s.insert(ValueType::Instant); - ValueTypeSet(s) - } - - /// Return a set containing `Ref` and `Keyword`. - pub fn of_keywords() -> ValueTypeSet { - ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Keyword)) - } - - /// Return a set containing `Ref` and `Long`. - pub fn of_longs() -> ValueTypeSet { - ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Long)) - } -} - -impl ValueTypeSet { - pub fn insert(&mut self, vt: ValueType) -> bool { - self.0.insert(vt) - } - - pub fn len(&self) -> usize { - self.0.len() - } - - /// Returns a set containing all the types in this set and `other`. - pub fn union(&self, other: &ValueTypeSet) -> ValueTypeSet { - ValueTypeSet(self.0.union(other.0)) - } - - pub fn intersection(&self, other: &ValueTypeSet) -> ValueTypeSet { - ValueTypeSet(self.0.intersection(other.0)) - } - - /// Returns the set difference between `self` and `other`, which is the - /// set of items in `self` that are not in `other`. - pub fn difference(&self, other: &ValueTypeSet) -> ValueTypeSet { - ValueTypeSet(self.0 - other.0) - } - - /// Return an arbitrary type that's part of this set. - /// For a set containing a single type, this will be that type. - pub fn exemplar(&self) -> Option { - self.0.iter().next() - } - - pub fn is_subset(&self, other: &ValueTypeSet) -> bool { - self.0.is_subset(&other.0) - } - - /// Returns true if `self` and `other` contain no items in common. - pub fn is_disjoint(&self, other: &ValueTypeSet) -> bool { - self.0.is_disjoint(&other.0) - } - - pub fn contains(&self, vt: ValueType) -> bool { - self.0.contains(&vt) - } - - pub fn is_empty(&self) -> bool { - self.0.is_empty() - } - - pub fn is_unit(&self) -> bool { - self.0.len() == 1 - } - - pub fn iter(&self) -> ::enum_set::Iter { - self.0.iter() - } -} - -impl From for ValueTypeSet { - fn from(t: ValueType) -> Self { - ValueTypeSet::of_one(t) - } -} - -impl ValueTypeSet { - pub fn is_only_numeric(&self) -> bool { - self.is_subset(&ValueTypeSet::of_numeric_types()) - } -} - -impl IntoIterator for ValueTypeSet { - type Item = ValueType; - type IntoIter = ::enum_set::Iter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -impl ::std::iter::FromIterator for ValueTypeSet { - fn from_iter>(iterator: I) -> Self { - let mut ret = Self::none(); - ret.0.extend(iterator); - ret - } -} - -impl ::std::iter::Extend for ValueTypeSet { - fn extend>(&mut self, iter: I) { - for element in iter { - self.0.insert(element); - } - } -} - -/// We have an enum of types, `ValueType`. It can be collected into a set, `ValueTypeSet`. Each type -/// is associated with a type tag, which is how a type is represented in, e.g., SQL storage. Types -/// can share type tags, because backing SQL storage is able to differentiate between some types -/// (e.g., longs and doubles), and so distinct tags aren't necessary. That association is defined by -/// `SQLValueType`. That trait similarly extends to `ValueTypeSet`, which maps a collection of types -/// into a collection of tags. -pub trait SQLValueTypeSet { - fn value_type_tags(&self) -> BTreeSet; - fn has_unique_type_tag(&self) -> bool; - fn unique_type_tag(&self) -> Option; -} - -impl SQLValueTypeSet for ValueTypeSet { - // This is inefficient, but it'll do for now. - fn value_type_tags(&self) -> BTreeSet { - let mut out = BTreeSet::new(); - for t in self.0.iter() { - out.insert(t.value_type_tag()); - } - out - } - - fn unique_type_tag(&self) -> Option { - if self.is_unit() || self.has_unique_type_tag() { - self.exemplar().map(|t| t.value_type_tag()) - } else { - None - } - } - - fn has_unique_type_tag(&self) -> bool { - if self.is_unit() { - return true; - } - - let mut acc = BTreeSet::new(); - for t in self.0.iter() { - if acc.insert(t.value_type_tag()) && acc.len() > 1 { - // We inserted a second or subsequent value. - return false; - } - } - !acc.is_empty() - } -} - -#[test] -fn test_typed_value() { - assert!(TypedValue::Boolean(false).is_congruent_with(None)); - assert!(TypedValue::Boolean(false).is_congruent_with(ValueType::Boolean)); - assert!(!TypedValue::typed_string("foo").is_congruent_with(ValueType::Boolean)); - assert!(TypedValue::typed_string("foo").is_congruent_with(ValueType::String)); - assert!(TypedValue::typed_string("foo").is_congruent_with(None)); -} +mod types; +mod value_type_set; +mod sql_types; + +pub use types::{ + Entid, + KnownEntid, + TypedValue, + ValueType, + ValueTypeTag, + now, +}; + +pub use value_type_set::{ + ValueTypeSet, +}; + +pub use sql_types::{ + SQLTypeAffinity, + SQLValueType, + SQLValueTypeSet, +}; /// Bit flags used in `flags0` column in temporary tables created during search, /// such as the `search_results`, `inexact_searches` and `exact_searches` tables. diff --git a/core/src/sql_types.rs b/core/src/sql_types.rs new file mode 100644 index 00000000..8678065c --- /dev/null +++ b/core/src/sql_types.rs @@ -0,0 +1,139 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::collections::{ + BTreeSet, +}; + +use types::{ + ValueType, + ValueTypeTag, +}; + +use value_type_set::{ + ValueTypeSet, +}; + +/// Type safe representation of the possible return values from SQLite's `typeof` +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] +pub enum SQLTypeAffinity { + Null, // "null" + Integer, // "integer" + Real, // "real" + Text, // "text" + Blob, // "blob" +} + +// Put this here rather than in `db` simply because it's widely needed. +pub trait SQLValueType { + fn value_type_tag(&self) -> ValueTypeTag; + fn accommodates_integer(&self, int: i64) -> bool; + + /// Return a pair of the ValueTypeTag for this value type, and the SQLTypeAffinity required + /// to distinguish it from any other types that share the same tag. + /// + /// Background: The tag alone is not enough to determine the type of a value, since multiple + /// ValueTypes may share the same tag (for example, ValueType::Long and ValueType::Double). + /// However, each ValueType can be determined by checking both the tag and the type's affinity. + fn sql_representation(&self) -> (ValueTypeTag, Option); +} + +impl SQLValueType for ValueType { + fn sql_representation(&self) -> (ValueTypeTag, Option) { + match *self { + ValueType::Ref => (0, None), + ValueType::Boolean => (1, None), + ValueType::Instant => (4, None), + + // SQLite distinguishes integral from decimal types, allowing long and double to share a tag. + ValueType::Long => (5, Some(SQLTypeAffinity::Integer)), + ValueType::Double => (5, Some(SQLTypeAffinity::Real)), + ValueType::String => (10, None), + ValueType::Uuid => (11, None), + ValueType::Keyword => (13, None), + } + } + + #[inline] + fn value_type_tag(&self) -> ValueTypeTag { + self.sql_representation().0 + } + + /// Returns true if the provided integer is in the SQLite value space of this type. For + /// example, `1` is how we encode `true`. + /// + /// ``` + /// use mentat_core::{ValueType, SQLValueType}; + /// assert!(!ValueType::Instant.accommodates_integer(1493399581314)); + /// assert!(!ValueType::Instant.accommodates_integer(1493399581314000)); + /// assert!(ValueType::Boolean.accommodates_integer(1)); + /// assert!(!ValueType::Boolean.accommodates_integer(-1)); + /// assert!(!ValueType::Boolean.accommodates_integer(10)); + /// assert!(!ValueType::String.accommodates_integer(10)); + /// ``` + fn accommodates_integer(&self, int: i64) -> bool { + use ValueType::*; + match *self { + Instant => false, // Always use #inst. + Long | Double => true, + Ref => int >= 0, + Boolean => (int == 0) || (int == 1), + ValueType::String => false, + Keyword => false, + Uuid => false, + } + } +} + +/// We have an enum of types, `ValueType`. It can be collected into a set, `ValueTypeSet`. Each type +/// is associated with a type tag, which is how a type is represented in, e.g., SQL storage. Types +/// can share type tags, because backing SQL storage is able to differentiate between some types +/// (e.g., longs and doubles), and so distinct tags aren't necessary. That association is defined by +/// `SQLValueType`. That trait similarly extends to `ValueTypeSet`, which maps a collection of types +/// into a collection of tags. +pub trait SQLValueTypeSet { + fn value_type_tags(&self) -> BTreeSet; + fn has_unique_type_tag(&self) -> bool; + fn unique_type_tag(&self) -> Option; +} + +impl SQLValueTypeSet for ValueTypeSet { + // This is inefficient, but it'll do for now. + fn value_type_tags(&self) -> BTreeSet { + let mut out = BTreeSet::new(); + for t in self.0.iter() { + out.insert(t.value_type_tag()); + } + out + } + + fn unique_type_tag(&self) -> Option { + if self.is_unit() || self.has_unique_type_tag() { + self.exemplar().map(|t| t.value_type_tag()) + } else { + None + } + } + + fn has_unique_type_tag(&self) -> bool { + if self.is_unit() { + return true; + } + + let mut acc = BTreeSet::new(); + for t in self.0.iter() { + if acc.insert(t.value_type_tag()) && acc.len() > 1 { + // We inserted a second or subsequent value. + return false; + } + } + !acc.is_empty() + } +} diff --git a/core/src/types.rs b/core/src/types.rs new file mode 100644 index 00000000..799e947b --- /dev/null +++ b/core/src/types.rs @@ -0,0 +1,392 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::fmt; +use std::rc::Rc; + +use ::enum_set::EnumSet; + +use ::ordered_float::OrderedFloat; + +use ::uuid::Uuid; + +use ::chrono::{ + DateTime, + Timelike, // For truncation. +}; + +use ::edn::{ + self, + FromMicros, + NamespacedKeyword, + Utc, +}; + +use values; + +/// Represents one entid in the entid space. +/// +/// Per https://www.sqlite.org/datatype3.html (see also http://stackoverflow.com/a/8499544), SQLite +/// stores signed integers up to 64 bits in size. Since u32 is not appropriate for our use case, we +/// use i64 rather than manually truncating u64 to u63 and casting to i64 throughout the codebase. +pub type Entid = i64; + +/// An entid that's either already in the store, or newly allocated to a tempid. +/// TODO: we'd like to link this in some way to the lifetime of a particular PartitionMap. +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)] +pub struct KnownEntid(pub Entid); + +impl From for Entid { + fn from(k: KnownEntid) -> Entid { + k.0 + } +} + +impl From for TypedValue { + fn from(k: KnownEntid) -> TypedValue { + TypedValue::Ref(k.0) + } +} + +/// The attribute of each Mentat assertion has a :db/valueType constraining the value to a +/// particular set. Mentat recognizes the following :db/valueType values. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialOrd, PartialEq)] +#[repr(u32)] +pub enum ValueType { + Ref, + Boolean, + Instant, + Long, + Double, + String, + Keyword, + Uuid, +} + +pub type ValueTypeTag = i32; + +impl ValueType { + pub fn all_enums() -> EnumSet { + // TODO: lazy_static. + let mut s = EnumSet::new(); + s.insert(ValueType::Ref); + s.insert(ValueType::Boolean); + s.insert(ValueType::Instant); + s.insert(ValueType::Long); + s.insert(ValueType::Double); + s.insert(ValueType::String); + s.insert(ValueType::Keyword); + s.insert(ValueType::Uuid); + s + } +} + + +impl ::enum_set::CLike for ValueType { + fn to_u32(&self) -> u32 { + *self as u32 + } + + unsafe fn from_u32(v: u32) -> ValueType { + ::std::mem::transmute(v) + } +} + +impl ValueType { + pub fn into_keyword(self) -> NamespacedKeyword { + NamespacedKeyword::new("db.type", match self { + ValueType::Ref => "ref", + ValueType::Boolean => "boolean", + ValueType::Instant => "instant", + ValueType::Long => "long", + ValueType::Double => "double", + ValueType::String => "string", + ValueType::Keyword => "keyword", + ValueType::Uuid => "uuid", + }) + } + + pub fn into_typed_value(self) -> TypedValue { + TypedValue::typed_ns_keyword("db.type", match self { + ValueType::Ref => "ref", + ValueType::Boolean => "boolean", + ValueType::Instant => "instant", + ValueType::Long => "long", + ValueType::Double => "double", + ValueType::String => "string", + ValueType::Keyword => "keyword", + ValueType::Uuid => "uuid", + }) + } + + pub fn into_edn_value(self) -> edn::Value { + match self { + ValueType::Ref => values::DB_TYPE_REF.clone(), + ValueType::Boolean => values::DB_TYPE_BOOLEAN.clone(), + ValueType::Instant => values::DB_TYPE_INSTANT.clone(), + ValueType::Long => values::DB_TYPE_LONG.clone(), + ValueType::Double => values::DB_TYPE_DOUBLE.clone(), + ValueType::String => values::DB_TYPE_STRING.clone(), + ValueType::Keyword => values::DB_TYPE_KEYWORD.clone(), + ValueType::Uuid => values::DB_TYPE_UUID.clone(), + } + } +} + +impl fmt::Display for ValueType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", match *self { + ValueType::Ref => ":db.type/ref", + ValueType::Boolean => ":db.type/boolean", + ValueType::Instant => ":db.type/instant", + ValueType::Long => ":db.type/long", + ValueType::Double => ":db.type/double", + ValueType::String => ":db.type/string", + ValueType::Keyword => ":db.type/keyword", + ValueType::Uuid => ":db.type/uuid", + }) + } +} + +/// Represents a value that can be stored in a Mentat store. +// TODO: expand to include :db.type/uri. https://github.com/mozilla/mentat/issues/201 +// TODO: JSON data type? https://github.com/mozilla/mentat/issues/31 +// TODO: BigInt? Bytes? +#[derive(Clone, Debug, Eq, Hash, Ord, PartialOrd, PartialEq, Serialize, Deserialize)] +pub enum TypedValue { + Ref(Entid), + Boolean(bool), + Long(i64), + Double(OrderedFloat), + Instant(DateTime), // Use `into()` to ensure truncation. + // TODO: &str throughout? + String(Rc), + Keyword(Rc), + Uuid(Uuid), // It's only 128 bits, so this should be acceptable to clone. +} + +impl TypedValue { + /// Returns true if the provided type is `Some` and matches this value's type, or if the + /// provided type is `None`. + #[inline] + pub fn is_congruent_with>>(&self, t: T) -> bool { + t.into().map_or(true, |x| self.matches_type(x)) + } + + #[inline] + pub fn matches_type(&self, t: ValueType) -> bool { + self.value_type() == t + } + + pub fn value_type(&self) -> ValueType { + match self { + &TypedValue::Ref(_) => ValueType::Ref, + &TypedValue::Boolean(_) => ValueType::Boolean, + &TypedValue::Long(_) => ValueType::Long, + &TypedValue::Instant(_) => ValueType::Instant, + &TypedValue::Double(_) => ValueType::Double, + &TypedValue::String(_) => ValueType::String, + &TypedValue::Keyword(_) => ValueType::Keyword, + &TypedValue::Uuid(_) => ValueType::Uuid, + } + } + + /// Construct a new `TypedValue::Keyword` instance by cloning the provided + /// values and wrapping them in a new `Rc`. This is expensive, so this might + /// be best limited to tests. + pub fn typed_ns_keyword(ns: &str, name: &str) -> TypedValue { + TypedValue::Keyword(Rc::new(NamespacedKeyword::new(ns, name))) + } + + /// Construct a new `TypedValue::String` instance by cloning the provided + /// value and wrapping it in a new `Rc`. This is expensive, so this might + /// be best limited to tests. + pub fn typed_string(s: &str) -> TypedValue { + TypedValue::String(Rc::new(s.to_string())) + } + + pub fn current_instant() -> TypedValue { + Utc::now().into() + } + + /// Construct a new `TypedValue::Instant` instance from the provided + /// microsecond timestamp. + pub fn instant(micros: i64) -> TypedValue { + DateTime::::from_micros(micros).into() + } +} + +trait MicrosecondPrecision { + /// Truncate the provided `DateTime` to microsecond precision. + fn microsecond_precision(self) -> Self; +} + +impl MicrosecondPrecision for DateTime { + fn microsecond_precision(self) -> DateTime { + let nanoseconds = self.nanosecond(); + if nanoseconds % 1000 == 0 { + return self; + } + let microseconds = nanoseconds / 1000; + let truncated = microseconds * 1000; + self.with_nanosecond(truncated).expect("valid timestamp") + } +} + +/// Return the current time as a UTC `DateTime` instance with microsecond precision. +pub fn now() -> DateTime { + Utc::now().microsecond_precision() +} + +// We don't do From or From 'cos it's ambiguous. + +impl From for TypedValue { + fn from(value: bool) -> TypedValue { + TypedValue::Boolean(value) + } +} + +/// Truncate the provided `DateTime` to microsecond precision, and return the corresponding +/// `TypedValue::Instant`. +impl From> for TypedValue { + fn from(value: DateTime) -> TypedValue { + TypedValue::Instant(value.microsecond_precision()) + } +} + +impl From for TypedValue { + fn from(value: Uuid) -> TypedValue { + TypedValue::Uuid(value) + } +} + +impl<'a> From<&'a str> for TypedValue { + fn from(value: &'a str) -> TypedValue { + TypedValue::String(Rc::new(value.to_string())) + } +} + +impl From for TypedValue { + fn from(value: String) -> TypedValue { + TypedValue::String(Rc::new(value)) + } +} + +impl From for TypedValue { + fn from(value: NamespacedKeyword) -> TypedValue { + TypedValue::Keyword(Rc::new(value)) + } +} + +impl From for TypedValue { + fn from(value: u32) -> TypedValue { + TypedValue::Long(value as i64) + } +} + +impl From for TypedValue { + fn from(value: i32) -> TypedValue { + TypedValue::Long(value as i64) + } +} + +impl From for TypedValue { + fn from(value: f64) -> TypedValue { + TypedValue::Double(OrderedFloat(value)) + } +} + +impl TypedValue { + pub fn into_known_entid(self) -> Option { + match self { + TypedValue::Ref(v) => Some(KnownEntid(v)), + _ => None, + } + } + + pub fn into_entid(self) -> Option { + match self { + TypedValue::Ref(v) => Some(v), + _ => None, + } + } + + pub fn into_kw(self) -> Option> { + match self { + TypedValue::Keyword(v) => Some(v), + _ => None, + } + } + + pub fn into_boolean(self) -> Option { + match self { + TypedValue::Boolean(v) => Some(v), + _ => None, + } + } + + pub fn into_long(self) -> Option { + match self { + TypedValue::Long(v) => Some(v), + _ => None, + } + } + + pub fn into_double(self) -> Option { + match self { + TypedValue::Double(v) => Some(v.into_inner()), + _ => None, + } + } + + pub fn into_instant(self) -> Option> { + match self { + TypedValue::Instant(v) => Some(v), + _ => None, + } + } + + pub fn into_timestamp(self) -> Option { + match self { + TypedValue::Instant(v) => Some(v.timestamp()), + _ => None, + } + } + + pub fn into_string(self) -> Option> { + match self { + TypedValue::String(v) => Some(v), + _ => None, + } + } + + pub fn into_uuid(self) -> Option { + match self { + TypedValue::Uuid(v) => Some(v), + _ => None, + } + } + + pub fn into_uuid_string(self) -> Option { + match self { + TypedValue::Uuid(v) => Some(v.hyphenated().to_string()), + _ => None, + } + } +} + +#[test] +fn test_typed_value() { + assert!(TypedValue::Boolean(false).is_congruent_with(None)); + assert!(TypedValue::Boolean(false).is_congruent_with(ValueType::Boolean)); + assert!(!TypedValue::typed_string("foo").is_congruent_with(ValueType::Boolean)); + assert!(TypedValue::typed_string("foo").is_congruent_with(ValueType::String)); + assert!(TypedValue::typed_string("foo").is_congruent_with(None)); +} diff --git a/core/src/value_type_set.rs b/core/src/value_type_set.rs new file mode 100644 index 00000000..1629aa71 --- /dev/null +++ b/core/src/value_type_set.rs @@ -0,0 +1,186 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use ::enum_set::{ + EnumSet, +}; + +use ::types::{ + ValueType, +}; + +trait EnumSetExtensions { + /// Return a set containing both `x` and `y`. + fn of_both(x: T, y: T) -> EnumSet; + + /// Return a clone of `self` with `y` added. + fn with(&self, y: T) -> EnumSet; +} + +impl EnumSetExtensions for EnumSet { + /// Return a set containing both `x` and `y`. + fn of_both(x: T, y: T) -> Self { + let mut o = EnumSet::new(); + o.insert(x); + o.insert(y); + o + } + + /// Return a clone of `self` with `y` added. + fn with(&self, y: T) -> EnumSet { + let mut o = self.clone(); + o.insert(y); + o + } +} + + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ValueTypeSet(pub EnumSet); + +impl Default for ValueTypeSet { + fn default() -> ValueTypeSet { + ValueTypeSet::any() + } +} + +impl ValueTypeSet { + pub fn any() -> ValueTypeSet { + ValueTypeSet(ValueType::all_enums()) + } + + pub fn none() -> ValueTypeSet { + ValueTypeSet(EnumSet::new()) + } + + /// Return a set containing only `t`. + pub fn of_one(t: ValueType) -> ValueTypeSet { + let mut s = EnumSet::new(); + s.insert(t); + ValueTypeSet(s) + } + + /// Return a set containing `Double` and `Long`. + pub fn of_numeric_types() -> ValueTypeSet { + ValueTypeSet(EnumSet::of_both(ValueType::Double, ValueType::Long)) + } + + /// Return a set containing `Double`, `Long`, and `Instant`. + pub fn of_numeric_and_instant_types() -> ValueTypeSet { + let mut s = EnumSet::new(); + s.insert(ValueType::Double); + s.insert(ValueType::Long); + s.insert(ValueType::Instant); + ValueTypeSet(s) + } + + /// Return a set containing `Ref` and `Keyword`. + pub fn of_keywords() -> ValueTypeSet { + ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Keyword)) + } + + /// Return a set containing `Ref` and `Long`. + pub fn of_longs() -> ValueTypeSet { + ValueTypeSet(EnumSet::of_both(ValueType::Ref, ValueType::Long)) + } +} + +impl ValueTypeSet { + pub fn insert(&mut self, vt: ValueType) -> bool { + self.0.insert(vt) + } + + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns a set containing all the types in this set and `other`. + pub fn union(&self, other: &ValueTypeSet) -> ValueTypeSet { + ValueTypeSet(self.0.union(other.0)) + } + + pub fn intersection(&self, other: &ValueTypeSet) -> ValueTypeSet { + ValueTypeSet(self.0.intersection(other.0)) + } + + /// Returns the set difference between `self` and `other`, which is the + /// set of items in `self` that are not in `other`. + pub fn difference(&self, other: &ValueTypeSet) -> ValueTypeSet { + ValueTypeSet(self.0 - other.0) + } + + /// Return an arbitrary type that's part of this set. + /// For a set containing a single type, this will be that type. + pub fn exemplar(&self) -> Option { + self.0.iter().next() + } + + pub fn is_subset(&self, other: &ValueTypeSet) -> bool { + self.0.is_subset(&other.0) + } + + /// Returns true if `self` and `other` contain no items in common. + pub fn is_disjoint(&self, other: &ValueTypeSet) -> bool { + self.0.is_disjoint(&other.0) + } + + pub fn contains(&self, vt: ValueType) -> bool { + self.0.contains(&vt) + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn is_unit(&self) -> bool { + self.0.len() == 1 + } + + pub fn iter(&self) -> ::enum_set::Iter { + self.0.iter() + } +} + +impl From for ValueTypeSet { + fn from(t: ValueType) -> Self { + ValueTypeSet::of_one(t) + } +} + +impl ValueTypeSet { + pub fn is_only_numeric(&self) -> bool { + self.is_subset(&ValueTypeSet::of_numeric_types()) + } +} + +impl IntoIterator for ValueTypeSet { + type Item = ValueType; + type IntoIter = ::enum_set::Iter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl ::std::iter::FromIterator for ValueTypeSet { + fn from_iter>(iterator: I) -> Self { + let mut ret = Self::none(); + ret.0.extend(iterator); + ret + } +} + +impl ::std::iter::Extend for ValueTypeSet { + fn extend>(&mut self, iter: I) { + for element in iter { + self.0.insert(element); + } + } +}