From 73feb622cdef707cfd0a112b4cabf65f32470c0a Mon Sep 17 00:00:00 2001 From: Mark Watts Date: Mon, 23 Aug 2021 17:25:10 -0400 Subject: [PATCH] implement bytes (aka blobs) as native type --- core-traits/Cargo.toml | 1 + core-traits/lib.rs | 24 ++++++++++++++++++++++++ core-traits/values.rs | 1 + core/src/sql_types.rs | 2 ++ db/src/db.rs | 6 ++++++ db/src/internal_types.rs | 3 ++- db/src/metadata.rs | 1 + db/src/schema.rs | 2 ++ db/tests/value_tests.rs | 5 +++++ query-algebrizer/tests/type_reqs.rs | 1 + query-projector-traits/aggregates.rs | 2 +- sql/src/lib.rs | 12 ++++++++++++ tests/query.rs | 4 +++- tools/cli/Cargo.toml | 2 ++ tools/cli/src/mentat_cli/repl.rs | 1 + 15 files changed, 64 insertions(+), 3 deletions(-) diff --git a/core-traits/Cargo.toml b/core-traits/Cargo.toml index 6829d55a..1a60bfa6 100644 --- a/core-traits/Cargo.toml +++ b/core-traits/Cargo.toml @@ -16,6 +16,7 @@ ordered-float = { version = "~2.7", features = ["serde"] } uuid = { version = "~0.8", features = ["v4", "serde"] } serde = { version = "~1.0", features = ["rc"] } serde_derive = "~1.0" +bytes = { version = "1.0.1", features = ["serde"] } [dependencies.edn] path = "../edn" diff --git a/core-traits/lib.rs b/core-traits/lib.rs index 21d7dad7..c09495fc 100644 --- a/core-traits/lib.rs +++ b/core-traits/lib.rs @@ -16,6 +16,7 @@ extern crate ordered_float; extern crate serde_derive; extern crate edn; extern crate uuid; +extern crate bytes; #[macro_use] extern crate lazy_static; @@ -33,6 +34,7 @@ use std::sync::Arc; use std::collections::BTreeMap; +use bytes::Bytes; use indexmap::IndexMap; use enum_set::EnumSet; @@ -280,6 +282,7 @@ pub enum ValueType { String, Keyword, Uuid, + Bytes, } impl ValueType { @@ -294,6 +297,7 @@ impl ValueType { s.insert(ValueType::String); s.insert(ValueType::Keyword); s.insert(ValueType::Uuid); + s.insert(ValueType::Bytes); s } } @@ -321,6 +325,7 @@ impl ValueType { ValueType::String => "string", ValueType::Keyword => "keyword", ValueType::Uuid => "uuid", + ValueType::Bytes => "bytes", }, ) } @@ -338,6 +343,7 @@ impl ValueType { "string" => Some(ValueType::String), "keyword" => Some(ValueType::Keyword), "uuid" => Some(ValueType::Uuid), + "bytes" => Some(ValueType::Bytes), _ => None, } } @@ -355,6 +361,7 @@ impl ValueType { ValueType::String => "string", ValueType::Keyword => "keyword", ValueType::Uuid => "uuid", + ValueType::Bytes => "bytes", }, ) } @@ -369,6 +376,7 @@ impl ValueType { ValueType::String => values::DB_TYPE_STRING.clone(), ValueType::Keyword => values::DB_TYPE_KEYWORD.clone(), ValueType::Uuid => values::DB_TYPE_UUID.clone(), + ValueType::Bytes => values::DB_TYPE_BYTES.clone(), } } @@ -391,6 +399,7 @@ impl fmt::Display for ValueType { ValueType::String => ":db.type/string", ValueType::Keyword => ":db.type/keyword", ValueType::Uuid => ":db.type/uuid", + ValueType::Bytes => ":db.type/bytes", } ) } @@ -414,6 +423,7 @@ pub enum TypedValue { String(ValueRc), Keyword(ValueRc), Uuid(Uuid), // It's only 128 bits, so this should be acceptable to clone. + Bytes(Bytes), } impl From for TypedValue { @@ -445,6 +455,7 @@ impl TypedValue { TypedValue::String(_) => ValueType::String, TypedValue::Keyword(_) => ValueType::Keyword, TypedValue::Uuid(_) => ValueType::Uuid, + TypedValue::Bytes(_) => ValueType::Bytes, } } @@ -596,6 +607,13 @@ impl TypedValue { _ => None, } } + + pub fn into_bytes(self) -> Option { + match self { + TypedValue::Bytes(b) => Some(b), + _ => None, + } + } } // We don't do From or From 'cos it's ambiguous. @@ -686,6 +704,12 @@ impl From for TypedValue { } } +impl From<&[u8]> for TypedValue { + fn from(bslice: &[u8]) -> Self { + TypedValue::Bytes(Bytes::copy_from_slice(bslice)) + } +} + trait MicrosecondPrecision { /// Truncate the provided `DateTime` to microsecond precision. fn microsecond_precision(self) -> Self; diff --git a/core-traits/values.rs b/core-traits/values.rs index 0cde838c..575369a6 100644 --- a/core-traits/values.rs +++ b/core-traits/values.rs @@ -58,6 +58,7 @@ lazy_static_namespaced_keyword_value!(DB_TYPE_REF, "db.type", "ref"); lazy_static_namespaced_keyword_value!(DB_TYPE_STRING, "db.type", "string"); lazy_static_namespaced_keyword_value!(DB_TYPE_URI, "db.type", "uri"); lazy_static_namespaced_keyword_value!(DB_TYPE_UUID, "db.type", "uuid"); +lazy_static_namespaced_keyword_value!(DB_TYPE_BYTES, "db.type", "bytes"); lazy_static_namespaced_keyword_value!(DB_UNIQUE, "db", "unique"); lazy_static_namespaced_keyword_value!(DB_UNIQUE_IDENTITY, "db.unique", "identity"); lazy_static_namespaced_keyword_value!(DB_UNIQUE_VALUE, "db.unique", "value"); diff --git a/core/src/sql_types.rs b/core/src/sql_types.rs index e2243256..ec89f054 100644 --- a/core/src/sql_types.rs +++ b/core/src/sql_types.rs @@ -51,6 +51,7 @@ impl SQLValueType for ValueType { ValueType::String => (10, None), ValueType::Uuid => (11, None), ValueType::Keyword => (13, None), + ValueType::Bytes => (15, Some(SQLTypeAffinity::Blob)), } } @@ -71,6 +72,7 @@ impl SQLValueType for ValueType { ValueType::String => false, Keyword => false, Uuid => false, + Bytes => false, } } } diff --git a/db/src/db.rs b/db/src/db.rs index cbb05b41..66589b4f 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -434,6 +434,9 @@ impl TypedSQLValue for TypedValue { Ok(TypedValue::Uuid(u)) } (13, rusqlite::types::Value::Text(x)) => to_namespaced_keyword(&x).map(|k| k.into()), + (15, rusqlite::types::Value::Blob(x)) => { + Ok(TypedValue::Bytes(x.into())) + } (_, value) => bail!(DbErrorKind::BadSQLValuePair(value, value_type_tag)), } } @@ -454,6 +457,7 @@ impl TypedSQLValue for TypedValue { Value::Float(ref x) => Some(TypedValue::Double(*x)), Value::Text(ref x) => Some(x.clone().into()), Value::Keyword(ref x) => Some(x.clone().into()), + Value::Bytes(b) => Some(TypedValue::Bytes(b.clone())), _ => None, } } @@ -470,6 +474,7 @@ impl TypedSQLValue for TypedValue { TypedValue::String(ref x) => (x.as_str().into(), 10), TypedValue::Uuid(ref u) => (u.as_bytes().to_vec().into(), 11), TypedValue::Keyword(ref x) => (x.to_string().into(), 13), + TypedValue::Bytes(b) => (b.to_vec().into(), 15), } } @@ -484,6 +489,7 @@ impl TypedSQLValue for TypedValue { TypedValue::String(ref x) => (Value::Text(x.as_ref().clone()), ValueType::String), TypedValue::Uuid(ref u) => (Value::Uuid(*u), ValueType::Uuid), TypedValue::Keyword(ref x) => (Value::Keyword(x.as_ref().clone()), ValueType::Keyword), + TypedValue::Bytes(b) => (Value::Bytes(b.clone()), ValueType::Bytes), } } } diff --git a/db/src/internal_types.rs b/db/src/internal_types.rs index 1b41fcc3..56f7fcee 100644 --- a/db/src/internal_types.rs +++ b/db/src/internal_types.rs @@ -105,7 +105,8 @@ impl TransactableValue for TypedValue { | TypedValue::Long(_) | TypedValue::Double(_) | TypedValue::Instant(_) - | TypedValue::Uuid(_) => { + | TypedValue::Uuid(_) + | TypedValue::Bytes(_) => { bail!(DbErrorKind::InputError(errors::InputError::BadEntityPlace)) } } diff --git a/db/src/metadata.rs b/db/src/metadata.rs index ac439394..9043a6a2 100644 --- a/db/src/metadata.rs +++ b/db/src/metadata.rs @@ -248,6 +248,7 @@ pub fn update_attribute_map_from_entid_triples( TypedValue::Ref(entids::DB_TYPE_REF) => { builder.value_type(ValueType::Ref); }, TypedValue::Ref(entids::DB_TYPE_STRING) => { builder.value_type(ValueType::String); }, TypedValue::Ref(entids::DB_TYPE_UUID) => { builder.value_type(ValueType::Uuid); }, + TypedValue::Ref(entids::DB_TYPE_BYTES) => { builder.value_type(ValueType::Bytes); }, _ => bail!(DbErrorKind::BadSchemaAssertion(format!("Expected [... :db/valueType :db.type/*] but got [... :db/valueType {:?}] for entid {} and attribute {}", value, entid, attr))) } }, diff --git a/db/src/schema.rs b/db/src/schema.rs index fdbed5ae..d9730acb 100644 --- a/db/src/schema.rs +++ b/db/src/schema.rs @@ -362,6 +362,7 @@ impl SchemaTypeChecking for Schema { (ValueType::Uuid, tv @ TypedValue::Uuid(_)) => Ok(tv), (ValueType::Instant, tv @ TypedValue::Instant(_)) => Ok(tv), (ValueType::Keyword, tv @ TypedValue::Keyword(_)) => Ok(tv), + (ValueType::Bytes, tv @ TypedValue::Bytes(_)) => Ok(tv), // Ref coerces a little: we interpret some things depending on the schema as a Ref. (ValueType::Ref, TypedValue::Long(x)) => Ok(TypedValue::Ref(x)), (ValueType::Ref, TypedValue::Keyword(ref x)) => { @@ -379,6 +380,7 @@ impl SchemaTypeChecking for Schema { | (vt @ ValueType::Uuid, _) | (vt @ ValueType::Instant, _) | (vt @ ValueType::Keyword, _) + | (vt @ ValueType::Bytes, _) | (vt @ ValueType::Ref, _) => { bail!(DbErrorKind::BadValuePair(format!("{}", value), vt)) } diff --git a/db/tests/value_tests.rs b/db/tests/value_tests.rs index 2d42cde2..08637cba 100644 --- a/db/tests/value_tests.rs +++ b/db/tests/value_tests.rs @@ -67,6 +67,11 @@ fn test_from_sql_value_pair() { .unwrap(), TypedValue::typed_ns_keyword("db", "keyword") ); + assert_eq!( + TypedValue::from_sql_value_pair(rusqlite::types::Value::Blob(vec![1,2,3,42]), 15) + .unwrap(), + TypedValue::Bytes((vec![1,2,3,42]).into()) + ); } #[test] diff --git a/query-algebrizer/tests/type_reqs.rs b/query-algebrizer/tests/type_reqs.rs index 42d2015b..fdca1e20 100644 --- a/query-algebrizer/tests/type_reqs.rs +++ b/query-algebrizer/tests/type_reqs.rs @@ -34,6 +34,7 @@ fn prepopulated_schema() -> Schema { .define_simple_attr("test", "uuid", ValueType::Uuid, false) .define_simple_attr("test", "instant", ValueType::Instant, false) .define_simple_attr("test", "ref", ValueType::Ref, false) + .define_simple_attr("test", "bytes", ValueType::Bytes, false) .schema } diff --git a/query-projector-traits/aggregates.rs b/query-projector-traits/aggregates.rs index e0b58312..ec1ca787 100644 --- a/query-projector-traits/aggregates.rs +++ b/query-projector-traits/aggregates.rs @@ -110,7 +110,7 @@ impl SimpleAggregationOp { String => Ok(the_type), // Unordered types. - Keyword | Ref | Uuid => { + Keyword | Ref | Uuid | Bytes => { bail!(ProjectorError::CannotApplyAggregateOperationToTypes( self, possibilities diff --git a/sql/src/lib.rs b/sql/src/lib.rs index 65dd6bc4..ca9683f4 100644 --- a/sql/src/lib.rs +++ b/sql/src/lib.rs @@ -181,6 +181,18 @@ impl QueryBuilder for SQLiteQueryBuilder { let v = Rc::new(rusqlite::types::Value::Text(s.as_ref().to_string())); self.push_static_arg(v); } + Bytes(b) => { + let bytes = b.to_vec(); + if let Some(arg) = self.byte_args.get(&bytes).cloned() { + // Why, borrow checker, why?! + self.push_named_arg(arg.as_str()); + } else { + let arg = self.next_argument_name(); + self.push_named_arg(arg.as_str()); + self.byte_args.insert(bytes, arg); + } + + }, } Ok(()) } diff --git a/tests/query.rs b/tests/query.rs index 96c82843..6a5cf616 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -835,6 +835,7 @@ fn test_type_reqs() { {:db/ident :test/uuid :db/valueType :db.type/uuid :db/cardinality :db.cardinality/one} {:db/ident :test/instant :db/valueType :db.type/instant :db/cardinality :db.cardinality/one} {:db/ident :test/ref :db/valueType :db.type/ref :db/cardinality :db.cardinality/one} + {:db/ident :test/bytes :db/valueType :db.type/bytes :db/cardinality :db.cardinality/one} ]"#, ) .unwrap(); @@ -849,7 +850,8 @@ fn test_type_reqs() { :test/keyword :foo/bar :test/uuid #uuid "12341234-1234-1234-1234-123412341234" :test/instant #inst "2018-01-01T11:00:00.000Z" - :test/ref 1} + :test/ref 1 + :test/bytes #bytes 010203050403022a } ]"#, ) .unwrap(); diff --git a/tools/cli/Cargo.toml b/tools/cli/Cargo.toml index 2afa8c0f..3edaa378 100644 --- a/tools/cli/Cargo.toml +++ b/tools/cli/Cargo.toml @@ -32,6 +32,8 @@ tabwriter = "~1.2" tempfile = "~3.2" termion = "~1.5" time = "~0.3" +bytes = { version = "1.0.1", features = ["serde"] } +hex = "0.4.3" [dependencies.rusqlite] version = "~0.25" diff --git a/tools/cli/src/mentat_cli/repl.rs b/tools/cli/src/mentat_cli/repl.rs index 885d53bb..d7e7c4bb 100644 --- a/tools/cli/src/mentat_cli/repl.rs +++ b/tools/cli/src/mentat_cli/repl.rs @@ -613,6 +613,7 @@ impl Repl { Ref(r) => format!("{}", r), String(ref s) => format!("{:?}", s.to_string()), Uuid(ref u) => format!("{}", u), + Bytes(b) => format!("#bytes {:?}", b.to_vec()), } } }