From 1500d4348c3578f49271daddc10e3a72ef457438 Mon Sep 17 00:00:00 2001 From: Mark Watts Date: Sun, 22 Aug 2021 17:41:50 -0400 Subject: [PATCH 1/4] add blobs via #bytes to edn --- db-traits/errors.rs | 4 ++-- db/src/cache.rs | 6 ++++-- db/src/db.rs | 2 +- db/src/internal_types.rs | 2 +- db/src/timelines.rs | 2 +- edn/Cargo.toml | 2 ++ edn/src/lib.rs | 13 ++++++++++++- edn/src/pretty_print.rs | 10 ++++------ edn/src/query.rs | 3 ++- edn/src/types.rs | 15 +++++++++++++++ edn/tests/tests.rs | 28 ++++++++++++++++++++++++++++ tolstoy/src/remote_client.rs | 2 +- tolstoy/src/syncer.rs | 20 ++++++++++---------- 13 files changed, 83 insertions(+), 26 deletions(-) diff --git a/db-traits/errors.rs b/db-traits/errors.rs index 94ccc8fd..15612f44 100644 --- a/db-traits/errors.rs +++ b/db-traits/errors.rs @@ -118,10 +118,10 @@ impl ::std::fmt::Display for InputError { match self { BadDbId => { writeln!(f, ":db/id in map notation must either not be present or be an entid, an ident, or a tempid") - }, + } BadEntityPlace => { writeln!(f, "cannot convert value place into entity place") - }, + } } } } diff --git a/db/src/cache.rs b/db/src/cache.rs index c4ac794a..2fb05695 100644 --- a/db/src/cache.rs +++ b/db/src/cache.rs @@ -61,7 +61,7 @@ use std::iter::Peekable; use failure::ResultExt; use rusqlite; -use rusqlite::{params_from_iter}; +use rusqlite::params_from_iter; use core_traits::{Binding, Entid, TypedValue}; @@ -1072,7 +1072,9 @@ impl AttributeCaches { replacing: bool, ) -> Result<()> { let mut aev_factory = AevFactory::new(); - let rows = statement.query_map(params_from_iter(&args), |row| Ok(aev_factory.row_to_aev(row)))?; + let rows = statement.query_map(params_from_iter(&args), |row| { + Ok(aev_factory.row_to_aev(row)) + })?; let aevs = AevRows { rows }; self.accumulate_into_cache( None, diff --git a/db/src/db.rs b/db/src/db.rs index 5d2747cd..cbb05b41 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -22,9 +22,9 @@ use itertools; use itertools::Itertools; use rusqlite; use rusqlite::limits::Limit; +use rusqlite::params_from_iter; use rusqlite::types::{ToSql, ToSqlOutput}; use rusqlite::TransactionBehavior; -use rusqlite::{params_from_iter}; use crate::bootstrap; use crate::{repeat_values, to_namespaced_keyword}; diff --git a/db/src/internal_types.rs b/db/src/internal_types.rs index d7aa5929..1b41fcc3 100644 --- a/db/src/internal_types.rs +++ b/db/src/internal_types.rs @@ -75,7 +75,7 @@ impl TransactableValue for ValueAndSpan { } } Nil | Boolean(_) | Instant(_) | BigInteger(_) | Float(_) | Uuid(_) | PlainSymbol(_) - | NamespacedSymbol(_) | Vector(_) | Set(_) | Map(_) => { + | NamespacedSymbol(_) | Vector(_) | Set(_) | Map(_) | Bytes(_) => { bail!(DbErrorKind::InputError(errors::InputError::BadEntityPlace)) } } diff --git a/db/src/timelines.rs b/db/src/timelines.rs index ae14e047..cbb35eac 100644 --- a/db/src/timelines.rs +++ b/db/src/timelines.rs @@ -81,7 +81,7 @@ fn move_transactions_to( new_timeline, crate::repeat_values(tx_ids.len(), 1) ), - params_from_iter(tx_ids.iter()) + params_from_iter(tx_ids.iter()), )?; Ok(()) } diff --git a/edn/Cargo.toml b/edn/Cargo.toml index d65dc4cf..51ce50d1 100644 --- a/edn/Cargo.toml +++ b/edn/Cargo.toml @@ -19,6 +19,8 @@ uuid = { version = "~0.8", features = ["v4", "serde"] } serde = { version = "~1.0", optional = true } serde_derive = { version = "~1.0", optional = true } peg = "~0.7" +bytes = "1.0.1" +hex = "0.4.3" [dev-dependencies] serde_test = "~1.0" diff --git a/edn/src/lib.rs b/edn/src/lib.rs index 2cbb21e2..1d1cfec2 100644 --- a/edn/src/lib.rs +++ b/edn/src/lib.rs @@ -8,7 +8,9 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +extern crate bytes; extern crate chrono; +extern crate hex; extern crate itertools; extern crate num; extern crate ordered_float; @@ -38,7 +40,9 @@ pub mod value_rc; pub use crate::value_rc::{Cloned, FromRc, ValueRc}; // Re-export the types we use. +use bytes::Bytes; pub use chrono::{DateTime, Utc}; +use hex::decode; pub use num::BigInt; pub use ordered_float::OrderedFloat; pub use uuid::Uuid; @@ -172,6 +176,13 @@ peg::parser!(pub grammar parse() for str { pub rule uuid() -> SpannedValue = "#uuid" whitespace()+ u:uuid_string() { SpannedValue::Uuid(u) } + rule byte_buffer() -> Bytes = + u:$( ['a'..='f' | 'A'..='F' | '0'..='9']* ) { + let b = decode(u).expect("this is a valid hex byte string"); + Bytes::copy_from_slice(&b) + } + pub rule bytes() -> SpannedValue = "#bytes" whitespace()+ u:byte_buffer() + { SpannedValue::Bytes(u) } rule namespace_divider() = "." rule namespace_separator() = "/" @@ -219,7 +230,7 @@ peg::parser!(pub grammar parse() for str { // Note: It's important that float comes before integer or the parser assumes that floats are integers and fails to parse. pub rule value() -> ValueAndSpan = - __ start:position!() v:(nil() / nan() / infinity() / boolean() / number() / inst() / uuid() / text() / keyword() / symbol() / list() / vector() / map() / set()) end:position!() __ { + __ start:position!() v:(nil() / nan() / infinity() / boolean() / number() / inst() / uuid() / text() / keyword() / symbol() / list() / vector() / map() / set() / bytes() ) end:position!() __ { ValueAndSpan { inner: v, span: Span::new(start, end) diff --git a/edn/src/pretty_print.rs b/edn/src/pretty_print.rs index 2e92a192..acaa579c 100644 --- a/edn/src/pretty_print.rs +++ b/edn/src/pretty_print.rs @@ -58,9 +58,7 @@ impl Value { let open = open.into(); let n = open.len() as isize; let i = { - let this = vs - .into_iter() - .map(|v| v.as_doc(allocator)); + let this = vs.into_iter().map(|v| v.as_doc(allocator)); let element = allocator.line(); Itertools::intersperse(this, element) }; @@ -86,9 +84,9 @@ impl Value { Value::Map(ref vs) => { let xs = { let this = vs - .iter() - .rev() - .map(|(k, v)| k.as_doc(pp).append(pp.line()).append(v.as_doc(pp)).group()); + .iter() + .rev() + .map(|(k, v)| k.as_doc(pp).append(pp.line()).append(v.as_doc(pp)).group()); let element = pp.line(); Itertools::intersperse(this, element) }; diff --git a/edn/src/query.rs b/edn/src/query.rs index 7cc6bcd3..a788a8f0 100644 --- a/edn/src/query.rs +++ b/edn/src/query.rs @@ -233,7 +233,7 @@ impl FromValue for FnArg { { Some(FnArg::Constant(x.clone().into())) } - Nil | NamespacedSymbol(_) | Vector(_) | List(_) | Set(_) | Map(_) => None, + Nil | NamespacedSymbol(_) | Vector(_) | List(_) | Set(_) | Map(_) | Bytes(_) => None, } } } @@ -410,6 +410,7 @@ impl FromValue for PatternValuePlace { crate::SpannedValue::List(_) => None, crate::SpannedValue::Set(_) => None, crate::SpannedValue::Vector(_) => None, + crate::SpannedValue::Bytes(_) => None, } } } diff --git a/edn/src/types.rs b/edn/src/types.rs index c1f4b078..37d4158a 100644 --- a/edn/src/types.rs +++ b/edn/src/types.rs @@ -27,6 +27,8 @@ use uuid::Uuid; use crate::symbols; +use bytes::Bytes; +use hex::encode; /// Value represents one of the allowed values in an EDN string. #[derive(PartialEq, Eq, Hash, Clone, Debug)] pub enum Value { @@ -52,6 +54,7 @@ pub enum Value { // See https://internals.rust-lang.org/t/implementing-hash-for-hashset-hashmap/3817/1 Set(BTreeSet), Map(BTreeMap), + Bytes(Bytes), } /// `SpannedValue` is the parallel to `Value` but used in `ValueAndSpan`. @@ -73,6 +76,7 @@ pub enum SpannedValue { List(LinkedList), Set(BTreeSet), Map(BTreeMap), + Bytes(Bytes), } /// Span represents the current offset (start, end) into the input string. @@ -172,6 +176,7 @@ impl From for Value { .map(|(x, y)| (x.without_spans(), y.without_spans())) .collect(), ), + SpannedValue::Bytes(b) => Value::Bytes(b), } } } @@ -328,6 +333,7 @@ macro_rules! def_common_value_methods { def_is!(is_list, $t::List(_)); def_is!(is_set, $t::Set(_)); def_is!(is_map, $t::Map(_)); + def_is!(is_bytes, $t::Bytes(_)); pub fn is_keyword(&self) -> bool { match self { @@ -360,6 +366,7 @@ macro_rules! def_common_value_methods { def_as_ref!(as_uuid, $t::Uuid, Uuid); def_as_ref!(as_symbol, $t::PlainSymbol, symbols::PlainSymbol); def_as_ref!(as_namespaced_symbol, $t::NamespacedSymbol, symbols::NamespacedSymbol); + def_as_ref!(as_bytes, $t::Bytes, Bytes); pub fn as_keyword(&self) -> Option<&symbols::Keyword> { match self { @@ -397,6 +404,7 @@ macro_rules! def_common_value_methods { def_into!(into_uuid, $t::Uuid, Uuid,); def_into!(into_symbol, $t::PlainSymbol, symbols::PlainSymbol,); def_into!(into_namespaced_symbol, $t::NamespacedSymbol, symbols::NamespacedSymbol,); + def_into!(into_bytes, $t::Bytes, Bytes,); pub fn into_keyword(self) -> Option { match self { @@ -467,6 +475,7 @@ macro_rules! def_common_value_methods { $t::List(_) => 13, $t::Set(_) => 14, $t::Map(_) => 15, + $t::Bytes(_) => 16, } } @@ -487,6 +496,7 @@ macro_rules! def_common_value_methods { $t::List(_) => true, $t::Set(_) => true, $t::Map(_) => true, + $t::Bytes(_) => true, } } @@ -524,6 +534,7 @@ macro_rules! def_common_value_ord { (&$t::List(ref a), &$t::List(ref b)) => b.cmp(a), (&$t::Set(ref a), &$t::Set(ref b)) => b.cmp(a), (&$t::Map(ref a), &$t::Map(ref b)) => b.cmp(a), + (&$t::Bytes(ref a), &$t::Bytes(ref b)) => b.cmp(a), _ => $value.precedence().cmp(&$other.precedence()), } }; @@ -590,6 +601,10 @@ macro_rules! def_common_value_display { } write!($f, " }}") } + $t::Bytes(ref v) => { + let s = encode(v); + write!($f, "#bytes \"{}\"", s) + } } }; } diff --git a/edn/tests/tests.rs b/edn/tests/tests.rs index 82316c29..077f3265 100644 --- a/edn/tests/tests.rs +++ b/edn/tests/tests.rs @@ -82,6 +82,7 @@ fn_parse_into_value!(vector); fn_parse_into_value!(set); fn_parse_into_value!(map); fn_parse_into_value!(value); +fn_parse_into_value!(bytes); #[test] fn test_nil() { @@ -316,6 +317,27 @@ fn test_uuid() { assert_eq!(value.to_pretty(100).unwrap(), s); } +#[test] +fn test_bytes() { + assert!(parse::bytes("#bytes01 ").is_err()); // No whitespace. + assert!(parse::bytes("#bytes _ZZ").is_err()); // No whitespace. + assert!(parse::bytes("#bytes 01 ").is_err()); // No whitespace. + assert!(parse::bytes("#01 ").is_err()); // No whitespace. + + let expected = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + let s = format!("{} {}", "#bytes", hex::encode(expected.clone())); + let actual: Value = parse::bytes(&s).expect("parse success").into(); + assert!(actual.is_bytes()); + assert_eq!(expected, actual.as_bytes().unwrap().to_vec()); + + assert_eq!( + self::bytes("#bytes 010203050403022a").unwrap(), + Value::Bytes(bytes::Bytes::copy_from_slice(&vec!( + 1, 2, 3, 5, 4, 3, 2, 42 + ))) + ); +} + #[test] fn test_inst() { assert!(parse::value("#inst\"2016-01-01T11:00:00.000Z\"").is_err()); // No whitespace. @@ -584,6 +606,12 @@ fn test_value() { value("#inst \"2017-04-28T20:23:05.187Z\"").unwrap(), Instant(Utc.timestamp(1493410985, 187000000)) ); + assert_eq!( + value("#bytes 010203050403022a").unwrap(), + Bytes(bytes::Bytes::copy_from_slice(&vec!( + 1, 2, 3, 5, 4, 3, 2, 42 + ))) + ); } #[test] diff --git a/tolstoy/src/remote_client.rs b/tolstoy/src/remote_client.rs index cfee9335..9c620716 100644 --- a/tolstoy/src/remote_client.rs +++ b/tolstoy/src/remote_client.rs @@ -10,7 +10,7 @@ #![allow(dead_code)] -use hyper::{body, header, Client, Body, Method, Request, StatusCode}; +use hyper::{body, header, Body, Client, Method, Request, StatusCode}; use hyper_tls::HttpsConnector; // TODO: https://github.com/mozilla/mentat/issues/570 // use serde_cbor; diff --git a/tolstoy/src/syncer.rs b/tolstoy/src/syncer.rs index 4ad34e96..b2c61b77 100644 --- a/tolstoy/src/syncer.rs +++ b/tolstoy/src/syncer.rs @@ -741,13 +741,14 @@ impl Syncer { // Since we've "merged" with the remote bootstrap, the "no-op" and // "local fast-forward" cases are reported as merges. match Syncer::what_do(remote_state, local_state) { - SyncAction::NoOp => { - Ok(SyncReport::Merge(SyncFollowup::None)) - } + SyncAction::NoOp => Ok(SyncReport::Merge(SyncFollowup::None)), SyncAction::PopulateRemote => { // This is a programming error. - bail!(TolstoyError::UnexpectedState("Remote state can't be empty on first sync against non-empty remote".to_string())) + bail!(TolstoyError::UnexpectedState( + "Remote state can't be empty on first sync against non-empty remote" + .to_string() + )) } SyncAction::RemoteFastForward => { @@ -761,12 +762,11 @@ impl Syncer { SyncAction::CombineChanges => { let local_txs = Processor::process( - &ip.transaction, Some(local_metadata.root), LocalTxSet::new())?; - Syncer::merge( - ip, - incoming_txs[1..].to_vec(), - local_txs, - ) + &ip.transaction, + Some(local_metadata.root), + LocalTxSet::new(), + )?; + Syncer::merge(ip, incoming_txs[1..].to_vec(), local_txs) } } } From 179c1230611cdf26e0d418792d2258c468621ac1 Mon Sep 17 00:00:00 2001 From: Mark Watts Date: Mon, 23 Aug 2021 17:21:51 -0400 Subject: [PATCH 2/4] fix panic macro use --- ffi/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 61ebd653..14de4232 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -1499,7 +1499,7 @@ pub unsafe extern "C" fn query_builder_bind_ref_kw( let kw = kw_from_string(c_char_to_string(value)); let query_builder = &mut *query_builder; if let Some(err) = query_builder.bind_ref_from_kw(&var, kw).err() { - panic!(err); + std::panic::panic_any(err); } } From d3821432bcf2dd0862bca3cb6da2868e5371f4ca Mon Sep 17 00:00:00 2001 From: Mark Watts Date: Mon, 23 Aug 2021 17:23:09 -0400 Subject: [PATCH 3/4] fix problem parsing entities issue with how bytes are not a collection -> bytes not correctly viewed as atoms --- edn/src/lib.rs | 5 +++-- edn/src/types.rs | 4 ++-- edn/tests/tests.rs | 12 ++++++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/edn/src/lib.rs b/edn/src/lib.rs index 1d1cfec2..dfa846ac 100644 --- a/edn/src/lib.rs +++ b/edn/src/lib.rs @@ -177,12 +177,13 @@ peg::parser!(pub grammar parse() for str { { SpannedValue::Uuid(u) } rule byte_buffer() -> Bytes = - u:$( ['a'..='f' | 'A'..='F' | '0'..='9']* ) { + u:$( hex()+ ) { let b = decode(u).expect("this is a valid hex byte string"); Bytes::copy_from_slice(&b) } pub rule bytes() -> SpannedValue = "#bytes" whitespace()+ u:byte_buffer() { SpannedValue::Bytes(u) } + rule namespace_divider() = "." rule namespace_separator() = "/" @@ -230,7 +231,7 @@ peg::parser!(pub grammar parse() for str { // Note: It's important that float comes before integer or the parser assumes that floats are integers and fails to parse. pub rule value() -> ValueAndSpan = - __ start:position!() v:(nil() / nan() / infinity() / boolean() / number() / inst() / uuid() / text() / keyword() / symbol() / list() / vector() / map() / set() / bytes() ) end:position!() __ { + __ start:position!() v:(nil() / nan() / infinity() / boolean() / number() / inst() / uuid() / bytes() / text() / keyword() / symbol() / list() / vector() / map() / set() ) end:position!() __ { ValueAndSpan { inner: v, span: Span::new(start, end) diff --git a/edn/src/types.rs b/edn/src/types.rs index 37d4158a..818b0453 100644 --- a/edn/src/types.rs +++ b/edn/src/types.rs @@ -496,7 +496,7 @@ macro_rules! def_common_value_methods { $t::List(_) => true, $t::Set(_) => true, $t::Map(_) => true, - $t::Bytes(_) => true, + $t::Bytes(_) => false, } } @@ -603,7 +603,7 @@ macro_rules! def_common_value_display { } $t::Bytes(ref v) => { let s = encode(v); - write!($f, "#bytes \"{}\"", s) + write!($f, "#bytes {}", s) } } }; diff --git a/edn/tests/tests.rs b/edn/tests/tests.rs index 077f3265..2baa6923 100644 --- a/edn/tests/tests.rs +++ b/edn/tests/tests.rs @@ -336,8 +336,20 @@ fn test_bytes() { 1, 2, 3, 5, 4, 3, 2, 42 ))) ); + let data = r#"[ { :test/instant #inst "2018-01-01T11:00:00Z" :test/bytes #bytes 010203050403022a } ]"#; + let result = parse::value(data).unwrap().without_spans().to_string(); + assert_eq!(data, result); } + +#[test] +fn test_entities() { + let d2 = r#"[ { :test/boolean true :test/long 33 :test/double 1.4 :test/string "foo" :test/keyword :foo/bar :test/uuid #uuid "12341234-1234-1234-1234-123412341234" :test/instant #inst "2018-01-01T11:00:00Z" :test/ref 1 :test/bytes #bytes 010203050403022a } ]"#; + let r2 = parse::entities(d2); + assert!(r2.is_ok()); +} + + #[test] fn test_inst() { assert!(parse::value("#inst\"2016-01-01T11:00:00.000Z\"").is_err()); // No whitespace. From 73feb622cdef707cfd0a112b4cabf65f32470c0a Mon Sep 17 00:00:00 2001 From: Mark Watts Date: Mon, 23 Aug 2021 17:25:10 -0400 Subject: [PATCH 4/4] implement bytes (aka blobs) as native type --- core-traits/Cargo.toml | 1 + core-traits/lib.rs | 24 ++++++++++++++++++++++++ core-traits/values.rs | 1 + core/src/sql_types.rs | 2 ++ db/src/db.rs | 6 ++++++ db/src/internal_types.rs | 3 ++- db/src/metadata.rs | 1 + db/src/schema.rs | 2 ++ db/tests/value_tests.rs | 5 +++++ query-algebrizer/tests/type_reqs.rs | 1 + query-projector-traits/aggregates.rs | 2 +- sql/src/lib.rs | 12 ++++++++++++ tests/query.rs | 4 +++- tools/cli/Cargo.toml | 2 ++ tools/cli/src/mentat_cli/repl.rs | 1 + 15 files changed, 64 insertions(+), 3 deletions(-) diff --git a/core-traits/Cargo.toml b/core-traits/Cargo.toml index 6829d55a..1a60bfa6 100644 --- a/core-traits/Cargo.toml +++ b/core-traits/Cargo.toml @@ -16,6 +16,7 @@ ordered-float = { version = "~2.7", features = ["serde"] } uuid = { version = "~0.8", features = ["v4", "serde"] } serde = { version = "~1.0", features = ["rc"] } serde_derive = "~1.0" +bytes = { version = "1.0.1", features = ["serde"] } [dependencies.edn] path = "../edn" diff --git a/core-traits/lib.rs b/core-traits/lib.rs index 21d7dad7..c09495fc 100644 --- a/core-traits/lib.rs +++ b/core-traits/lib.rs @@ -16,6 +16,7 @@ extern crate ordered_float; extern crate serde_derive; extern crate edn; extern crate uuid; +extern crate bytes; #[macro_use] extern crate lazy_static; @@ -33,6 +34,7 @@ use std::sync::Arc; use std::collections::BTreeMap; +use bytes::Bytes; use indexmap::IndexMap; use enum_set::EnumSet; @@ -280,6 +282,7 @@ pub enum ValueType { String, Keyword, Uuid, + Bytes, } impl ValueType { @@ -294,6 +297,7 @@ impl ValueType { s.insert(ValueType::String); s.insert(ValueType::Keyword); s.insert(ValueType::Uuid); + s.insert(ValueType::Bytes); s } } @@ -321,6 +325,7 @@ impl ValueType { ValueType::String => "string", ValueType::Keyword => "keyword", ValueType::Uuid => "uuid", + ValueType::Bytes => "bytes", }, ) } @@ -338,6 +343,7 @@ impl ValueType { "string" => Some(ValueType::String), "keyword" => Some(ValueType::Keyword), "uuid" => Some(ValueType::Uuid), + "bytes" => Some(ValueType::Bytes), _ => None, } } @@ -355,6 +361,7 @@ impl ValueType { ValueType::String => "string", ValueType::Keyword => "keyword", ValueType::Uuid => "uuid", + ValueType::Bytes => "bytes", }, ) } @@ -369,6 +376,7 @@ impl ValueType { ValueType::String => values::DB_TYPE_STRING.clone(), ValueType::Keyword => values::DB_TYPE_KEYWORD.clone(), ValueType::Uuid => values::DB_TYPE_UUID.clone(), + ValueType::Bytes => values::DB_TYPE_BYTES.clone(), } } @@ -391,6 +399,7 @@ impl fmt::Display for ValueType { ValueType::String => ":db.type/string", ValueType::Keyword => ":db.type/keyword", ValueType::Uuid => ":db.type/uuid", + ValueType::Bytes => ":db.type/bytes", } ) } @@ -414,6 +423,7 @@ pub enum TypedValue { String(ValueRc), Keyword(ValueRc), Uuid(Uuid), // It's only 128 bits, so this should be acceptable to clone. + Bytes(Bytes), } impl From for TypedValue { @@ -445,6 +455,7 @@ impl TypedValue { TypedValue::String(_) => ValueType::String, TypedValue::Keyword(_) => ValueType::Keyword, TypedValue::Uuid(_) => ValueType::Uuid, + TypedValue::Bytes(_) => ValueType::Bytes, } } @@ -596,6 +607,13 @@ impl TypedValue { _ => None, } } + + pub fn into_bytes(self) -> Option { + match self { + TypedValue::Bytes(b) => Some(b), + _ => None, + } + } } // We don't do From or From 'cos it's ambiguous. @@ -686,6 +704,12 @@ impl From for TypedValue { } } +impl From<&[u8]> for TypedValue { + fn from(bslice: &[u8]) -> Self { + TypedValue::Bytes(Bytes::copy_from_slice(bslice)) + } +} + trait MicrosecondPrecision { /// Truncate the provided `DateTime` to microsecond precision. fn microsecond_precision(self) -> Self; diff --git a/core-traits/values.rs b/core-traits/values.rs index 0cde838c..575369a6 100644 --- a/core-traits/values.rs +++ b/core-traits/values.rs @@ -58,6 +58,7 @@ lazy_static_namespaced_keyword_value!(DB_TYPE_REF, "db.type", "ref"); lazy_static_namespaced_keyword_value!(DB_TYPE_STRING, "db.type", "string"); lazy_static_namespaced_keyword_value!(DB_TYPE_URI, "db.type", "uri"); lazy_static_namespaced_keyword_value!(DB_TYPE_UUID, "db.type", "uuid"); +lazy_static_namespaced_keyword_value!(DB_TYPE_BYTES, "db.type", "bytes"); lazy_static_namespaced_keyword_value!(DB_UNIQUE, "db", "unique"); lazy_static_namespaced_keyword_value!(DB_UNIQUE_IDENTITY, "db.unique", "identity"); lazy_static_namespaced_keyword_value!(DB_UNIQUE_VALUE, "db.unique", "value"); diff --git a/core/src/sql_types.rs b/core/src/sql_types.rs index e2243256..ec89f054 100644 --- a/core/src/sql_types.rs +++ b/core/src/sql_types.rs @@ -51,6 +51,7 @@ impl SQLValueType for ValueType { ValueType::String => (10, None), ValueType::Uuid => (11, None), ValueType::Keyword => (13, None), + ValueType::Bytes => (15, Some(SQLTypeAffinity::Blob)), } } @@ -71,6 +72,7 @@ impl SQLValueType for ValueType { ValueType::String => false, Keyword => false, Uuid => false, + Bytes => false, } } } diff --git a/db/src/db.rs b/db/src/db.rs index cbb05b41..66589b4f 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -434,6 +434,9 @@ impl TypedSQLValue for TypedValue { Ok(TypedValue::Uuid(u)) } (13, rusqlite::types::Value::Text(x)) => to_namespaced_keyword(&x).map(|k| k.into()), + (15, rusqlite::types::Value::Blob(x)) => { + Ok(TypedValue::Bytes(x.into())) + } (_, value) => bail!(DbErrorKind::BadSQLValuePair(value, value_type_tag)), } } @@ -454,6 +457,7 @@ impl TypedSQLValue for TypedValue { Value::Float(ref x) => Some(TypedValue::Double(*x)), Value::Text(ref x) => Some(x.clone().into()), Value::Keyword(ref x) => Some(x.clone().into()), + Value::Bytes(b) => Some(TypedValue::Bytes(b.clone())), _ => None, } } @@ -470,6 +474,7 @@ impl TypedSQLValue for TypedValue { TypedValue::String(ref x) => (x.as_str().into(), 10), TypedValue::Uuid(ref u) => (u.as_bytes().to_vec().into(), 11), TypedValue::Keyword(ref x) => (x.to_string().into(), 13), + TypedValue::Bytes(b) => (b.to_vec().into(), 15), } } @@ -484,6 +489,7 @@ impl TypedSQLValue for TypedValue { TypedValue::String(ref x) => (Value::Text(x.as_ref().clone()), ValueType::String), TypedValue::Uuid(ref u) => (Value::Uuid(*u), ValueType::Uuid), TypedValue::Keyword(ref x) => (Value::Keyword(x.as_ref().clone()), ValueType::Keyword), + TypedValue::Bytes(b) => (Value::Bytes(b.clone()), ValueType::Bytes), } } } diff --git a/db/src/internal_types.rs b/db/src/internal_types.rs index 1b41fcc3..56f7fcee 100644 --- a/db/src/internal_types.rs +++ b/db/src/internal_types.rs @@ -105,7 +105,8 @@ impl TransactableValue for TypedValue { | TypedValue::Long(_) | TypedValue::Double(_) | TypedValue::Instant(_) - | TypedValue::Uuid(_) => { + | TypedValue::Uuid(_) + | TypedValue::Bytes(_) => { bail!(DbErrorKind::InputError(errors::InputError::BadEntityPlace)) } } diff --git a/db/src/metadata.rs b/db/src/metadata.rs index ac439394..9043a6a2 100644 --- a/db/src/metadata.rs +++ b/db/src/metadata.rs @@ -248,6 +248,7 @@ pub fn update_attribute_map_from_entid_triples( TypedValue::Ref(entids::DB_TYPE_REF) => { builder.value_type(ValueType::Ref); }, TypedValue::Ref(entids::DB_TYPE_STRING) => { builder.value_type(ValueType::String); }, TypedValue::Ref(entids::DB_TYPE_UUID) => { builder.value_type(ValueType::Uuid); }, + TypedValue::Ref(entids::DB_TYPE_BYTES) => { builder.value_type(ValueType::Bytes); }, _ => bail!(DbErrorKind::BadSchemaAssertion(format!("Expected [... :db/valueType :db.type/*] but got [... :db/valueType {:?}] for entid {} and attribute {}", value, entid, attr))) } }, diff --git a/db/src/schema.rs b/db/src/schema.rs index fdbed5ae..d9730acb 100644 --- a/db/src/schema.rs +++ b/db/src/schema.rs @@ -362,6 +362,7 @@ impl SchemaTypeChecking for Schema { (ValueType::Uuid, tv @ TypedValue::Uuid(_)) => Ok(tv), (ValueType::Instant, tv @ TypedValue::Instant(_)) => Ok(tv), (ValueType::Keyword, tv @ TypedValue::Keyword(_)) => Ok(tv), + (ValueType::Bytes, tv @ TypedValue::Bytes(_)) => Ok(tv), // Ref coerces a little: we interpret some things depending on the schema as a Ref. (ValueType::Ref, TypedValue::Long(x)) => Ok(TypedValue::Ref(x)), (ValueType::Ref, TypedValue::Keyword(ref x)) => { @@ -379,6 +380,7 @@ impl SchemaTypeChecking for Schema { | (vt @ ValueType::Uuid, _) | (vt @ ValueType::Instant, _) | (vt @ ValueType::Keyword, _) + | (vt @ ValueType::Bytes, _) | (vt @ ValueType::Ref, _) => { bail!(DbErrorKind::BadValuePair(format!("{}", value), vt)) } diff --git a/db/tests/value_tests.rs b/db/tests/value_tests.rs index 2d42cde2..08637cba 100644 --- a/db/tests/value_tests.rs +++ b/db/tests/value_tests.rs @@ -67,6 +67,11 @@ fn test_from_sql_value_pair() { .unwrap(), TypedValue::typed_ns_keyword("db", "keyword") ); + assert_eq!( + TypedValue::from_sql_value_pair(rusqlite::types::Value::Blob(vec![1,2,3,42]), 15) + .unwrap(), + TypedValue::Bytes((vec![1,2,3,42]).into()) + ); } #[test] diff --git a/query-algebrizer/tests/type_reqs.rs b/query-algebrizer/tests/type_reqs.rs index 42d2015b..fdca1e20 100644 --- a/query-algebrizer/tests/type_reqs.rs +++ b/query-algebrizer/tests/type_reqs.rs @@ -34,6 +34,7 @@ fn prepopulated_schema() -> Schema { .define_simple_attr("test", "uuid", ValueType::Uuid, false) .define_simple_attr("test", "instant", ValueType::Instant, false) .define_simple_attr("test", "ref", ValueType::Ref, false) + .define_simple_attr("test", "bytes", ValueType::Bytes, false) .schema } diff --git a/query-projector-traits/aggregates.rs b/query-projector-traits/aggregates.rs index e0b58312..ec1ca787 100644 --- a/query-projector-traits/aggregates.rs +++ b/query-projector-traits/aggregates.rs @@ -110,7 +110,7 @@ impl SimpleAggregationOp { String => Ok(the_type), // Unordered types. - Keyword | Ref | Uuid => { + Keyword | Ref | Uuid | Bytes => { bail!(ProjectorError::CannotApplyAggregateOperationToTypes( self, possibilities diff --git a/sql/src/lib.rs b/sql/src/lib.rs index 65dd6bc4..ca9683f4 100644 --- a/sql/src/lib.rs +++ b/sql/src/lib.rs @@ -181,6 +181,18 @@ impl QueryBuilder for SQLiteQueryBuilder { let v = Rc::new(rusqlite::types::Value::Text(s.as_ref().to_string())); self.push_static_arg(v); } + Bytes(b) => { + let bytes = b.to_vec(); + if let Some(arg) = self.byte_args.get(&bytes).cloned() { + // Why, borrow checker, why?! + self.push_named_arg(arg.as_str()); + } else { + let arg = self.next_argument_name(); + self.push_named_arg(arg.as_str()); + self.byte_args.insert(bytes, arg); + } + + }, } Ok(()) } diff --git a/tests/query.rs b/tests/query.rs index 96c82843..6a5cf616 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -835,6 +835,7 @@ fn test_type_reqs() { {:db/ident :test/uuid :db/valueType :db.type/uuid :db/cardinality :db.cardinality/one} {:db/ident :test/instant :db/valueType :db.type/instant :db/cardinality :db.cardinality/one} {:db/ident :test/ref :db/valueType :db.type/ref :db/cardinality :db.cardinality/one} + {:db/ident :test/bytes :db/valueType :db.type/bytes :db/cardinality :db.cardinality/one} ]"#, ) .unwrap(); @@ -849,7 +850,8 @@ fn test_type_reqs() { :test/keyword :foo/bar :test/uuid #uuid "12341234-1234-1234-1234-123412341234" :test/instant #inst "2018-01-01T11:00:00.000Z" - :test/ref 1} + :test/ref 1 + :test/bytes #bytes 010203050403022a } ]"#, ) .unwrap(); diff --git a/tools/cli/Cargo.toml b/tools/cli/Cargo.toml index 2afa8c0f..3edaa378 100644 --- a/tools/cli/Cargo.toml +++ b/tools/cli/Cargo.toml @@ -32,6 +32,8 @@ tabwriter = "~1.2" tempfile = "~3.2" termion = "~1.5" time = "~0.3" +bytes = { version = "1.0.1", features = ["serde"] } +hex = "0.4.3" [dependencies.rusqlite] version = "~0.25" diff --git a/tools/cli/src/mentat_cli/repl.rs b/tools/cli/src/mentat_cli/repl.rs index 885d53bb..d7e7c4bb 100644 --- a/tools/cli/src/mentat_cli/repl.rs +++ b/tools/cli/src/mentat_cli/repl.rs @@ -613,6 +613,7 @@ impl Repl { Ref(r) => format!("{}", r), String(ref s) => format!("{:?}", s.to_string()), Uuid(ref u) => format!("{}", u), + Bytes(b) => format!("#bytes {:?}", b.to_vec()), } } }