From ed04083ceb957651505fd9626e0e043cbc43365c Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Tue, 6 Jun 2017 18:21:07 -0700 Subject: [PATCH 1/2] Dedupe SQL arguments. This isn't perfect -- we still need to clone in a couple of cases -- but it avoids us passing duplicate strings down into SQLite whenever the same value is mentioned more than once in a query. --- query-translator/tests/translate.rs | 5 +-- sql/src/lib.rs | 64 +++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index 1342acf9..5c28c56b 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -381,7 +381,7 @@ fn test_complex_or_join() { `datoms` AS `datoms03` \ WHERE `datoms02`.a = 95 \ AND `datoms03`.a = 96 \ - AND `datoms03`.v = $v2 \ + AND `datoms03`.v = $v1 \ AND `datoms02`.v = `datoms03`.e) AS `c00`, \ `datoms` AS `datoms04`, \ `datoms` AS `datoms05` \ @@ -391,8 +391,7 @@ fn test_complex_or_join() { AND `c00`.`?page` = `datoms05`.e \ LIMIT 1"); assert_eq!(args, vec![make_arg("$v0", "http://foo.com/"), - make_arg("$v1", "Foo"), - make_arg("$v2", "Foo")]); + make_arg("$v1", "Foo")]); } #[test] diff --git a/sql/src/lib.rs b/sql/src/lib.rs index 7b926de4..270ce0e0 100644 --- a/sql/src/lib.rs +++ b/sql/src/lib.rs @@ -17,11 +17,14 @@ extern crate mentat_core; use std::rc::Rc; +use std::collections::HashMap; + use ordered_float::OrderedFloat; use mentat_core::{ ToMicros, TypedValue, + Uuid, }; pub use rusqlite::types::Value; @@ -95,7 +98,14 @@ pub struct SQLiteQueryBuilder { arg_prefix: String, arg_counter: i64, - args: Vec<(String, Rc)>, + + // We can't just use an InternSet on the rusqlite::types::Value instances, because that + // includes f64, so it's not Hash or Eq. + // Instead we track UUID and String arguments separately, mapping them to their argument name, + // in order to dedupe. We'll add these to the regular argument vector later. + uuid_args: HashMap, String>, // From value to argument name. + string_args: HashMap, String>, // From value to argument name. + args: Vec<(String, Rc)>, // (arg, value). } impl SQLiteQueryBuilder { @@ -108,13 +118,22 @@ impl SQLiteQueryBuilder { sql: String::new(), arg_prefix: prefix, arg_counter: 0, + + uuid_args: HashMap::default(), + string_args: HashMap::default(), args: vec![], } } - fn push_static_arg(&mut self, val: Rc) { + fn next_argument_name(&mut self) -> String { let arg = format!("{}{}", self.arg_prefix, self.arg_counter); self.arg_counter = self.arg_counter + 1; + arg + } + + fn push_static_arg(&mut self, val: Rc) { + // TODO: intern these, too. + let arg = self.next_argument_name(); self.push_named_arg(arg.as_str()); self.args.push((arg, val)); } @@ -147,18 +166,28 @@ impl QueryBuilder for SQLiteQueryBuilder { self.push_sql(format!("{}", dt.to_micros()).as_str()); // TODO: argument instead? }, &Uuid(ref u) => { - // Get a byte array. - let bytes = u.as_bytes().clone(); - let v = Rc::new(rusqlite::types::Value::Blob(bytes.to_vec())); - self.push_static_arg(v); + if let Some(arg) = self.uuid_args.get(u).cloned() { // Why, borrow checker, why?! + self.push_named_arg(arg.as_str()); + } else { + let arg = self.next_argument_name(); + self.push_named_arg(arg.as_str()); + self.uuid_args.insert(Rc::new(u.clone()), arg); + } }, // These are both `Rc`. Unfortunately, we can't use that fact when // turning these into rusqlite Values. + // However, we can check to see whether there's an existing var that matches… &String(ref s) => { - let v = Rc::new(rusqlite::types::Value::Text(s.as_ref().clone())); - self.push_static_arg(v); + if let Some(arg) = self.string_args.get(s).cloned() { + self.push_named_arg(arg.as_str()); + } else { + let arg = self.next_argument_name(); + self.push_named_arg(arg.as_str()); + self.string_args.insert(s.clone(), arg); + } }, &Keyword(ref s) => { + // TODO: intern. let v = Rc::new(rusqlite::types::Value::Text(s.as_ref().to_string())); self.push_static_arg(v); }, @@ -190,9 +219,26 @@ impl QueryBuilder for SQLiteQueryBuilder { } fn finish(self) -> SQLQuery { + // We collected string and UUID arguments into separate maps so that we could + // dedupe them. Now we need to turn them into rusqlite Values. + let mut args = self.args; + let string_args = self.string_args.into_iter().map(|(val, arg)| { + (arg, Rc::new(rusqlite::types::Value::Text(val.as_ref().clone()))) + }); + let uuid_args = self.uuid_args.into_iter().map(|(val, arg)| { + // Get a byte array. + let bytes = val.as_bytes().clone(); + (arg, Rc::new(rusqlite::types::Value::Blob(bytes.to_vec()))) + }); + + args.extend(string_args); + args.extend(uuid_args); + + // Get the args in the right order -- $v0, $v1… + args.sort_by(|&(ref k1, _), &(ref k2, _)| k1.cmp(k2)); SQLQuery { sql: self.sql, - args: self.args, + args: args, } } } From 2c5234699939dafc325f6fa382444a622928c4b6 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Wed, 7 Jun 2017 11:55:05 -0700 Subject: [PATCH 2/2] Review comment: generalize from Uuid SQL arguments to byte arrays. --- sql/src/lib.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/sql/src/lib.rs b/sql/src/lib.rs index 270ce0e0..b7b7ab0c 100644 --- a/sql/src/lib.rs +++ b/sql/src/lib.rs @@ -24,7 +24,6 @@ use ordered_float::OrderedFloat; use mentat_core::{ ToMicros, TypedValue, - Uuid, }; pub use rusqlite::types::Value; @@ -101,9 +100,9 @@ pub struct SQLiteQueryBuilder { // We can't just use an InternSet on the rusqlite::types::Value instances, because that // includes f64, so it's not Hash or Eq. - // Instead we track UUID and String arguments separately, mapping them to their argument name, + // Instead we track byte and String arguments separately, mapping them to their argument name, // in order to dedupe. We'll add these to the regular argument vector later. - uuid_args: HashMap, String>, // From value to argument name. + byte_args: HashMap, String>, // From value to argument name. string_args: HashMap, String>, // From value to argument name. args: Vec<(String, Rc)>, // (arg, value). } @@ -119,7 +118,7 @@ impl SQLiteQueryBuilder { arg_prefix: prefix, arg_counter: 0, - uuid_args: HashMap::default(), + byte_args: HashMap::default(), string_args: HashMap::default(), args: vec![], } @@ -166,12 +165,13 @@ impl QueryBuilder for SQLiteQueryBuilder { self.push_sql(format!("{}", dt.to_micros()).as_str()); // TODO: argument instead? }, &Uuid(ref u) => { - if let Some(arg) = self.uuid_args.get(u).cloned() { // Why, borrow checker, why?! + let bytes = u.as_bytes(); + if let Some(arg) = self.byte_args.get(bytes.as_ref()).cloned() { // Why, borrow checker, why?! self.push_named_arg(arg.as_str()); } else { let arg = self.next_argument_name(); self.push_named_arg(arg.as_str()); - self.uuid_args.insert(Rc::new(u.clone()), arg); + self.byte_args.insert(bytes.clone().to_vec(), arg); } }, // These are both `Rc`. Unfortunately, we can't use that fact when @@ -219,20 +219,18 @@ impl QueryBuilder for SQLiteQueryBuilder { } fn finish(self) -> SQLQuery { - // We collected string and UUID arguments into separate maps so that we could + // We collected string and byte arguments into separate maps so that we could // dedupe them. Now we need to turn them into rusqlite Values. let mut args = self.args; let string_args = self.string_args.into_iter().map(|(val, arg)| { (arg, Rc::new(rusqlite::types::Value::Text(val.as_ref().clone()))) }); - let uuid_args = self.uuid_args.into_iter().map(|(val, arg)| { - // Get a byte array. - let bytes = val.as_bytes().clone(); - (arg, Rc::new(rusqlite::types::Value::Blob(bytes.to_vec()))) + let byte_args = self.byte_args.into_iter().map(|(val, arg)| { + (arg, Rc::new(rusqlite::types::Value::Blob(val))) }); args.extend(string_args); - args.extend(uuid_args); + args.extend(byte_args); // Get the args in the right order -- $v0, $v1… args.sort_by(|&(ref k1, _), &(ref k2, _)| k1.cmp(k2));