diff --git a/core/src/lib.rs b/core/src/lib.rs index 6b28421b..b25ac842 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -183,8 +183,9 @@ impl fmt::Display for ValueType { } /// Represents a Mentat value in a particular value set. -// TODO: expand to include :db.type/{instant,url,uuid}. -// TODO: BigInt? +// TODO: expand to include :db.type/uri. https://github.com/mozilla/mentat/issues/201 +// TODO: JSON data type? https://github.com/mozilla/mentat/issues/31 +// TODO: BigInt? Bytes? #[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq,Serialize,Deserialize)] pub enum TypedValue { Ref(Entid), @@ -983,6 +984,10 @@ impl HasSchema for Schema { } } +pub mod intern_set; +pub mod counter; +pub mod util; + #[cfg(test)] mod test { use super::*; @@ -1125,7 +1130,3 @@ mod test { assert_eq!(expected_value, value2); } } - -pub mod intern_set; -pub mod counter; -pub mod util; diff --git a/db/Cargo.toml b/db/Cargo.toml index 634d9d1a..77c2bc70 100644 --- a/db/Cargo.toml +++ b/db/Cargo.toml @@ -5,7 +5,7 @@ workspace = ".." [dependencies] error-chain = { git = "https://github.com/rnewman/error-chain", branch = "rnewman/sync" } -indexmap = "0.4" +indexmap = "1" itertools = "0.7" lazy_static = "0.2" num = "0.1" diff --git a/query-projector/Cargo.toml b/query-projector/Cargo.toml index 799c4c51..95059cd5 100644 --- a/query-projector/Cargo.toml +++ b/query-projector/Cargo.toml @@ -5,7 +5,7 @@ workspace = ".." [dependencies] error-chain = { git = "https://github.com/rnewman/error-chain", branch = "rnewman/sync" } -indexmap = "0.4" +indexmap = "1" [dependencies.rusqlite] version = "0.13" diff --git a/query-projector/src/lib.rs b/query-projector/src/lib.rs index acd3838a..7ebfb642 100644 --- a/query-projector/src/lib.rs +++ b/query-projector/src/lib.rs @@ -66,6 +66,7 @@ use mentat_query_sql::{ mod aggregates; mod project; +mod relresult; pub mod errors; pub use aggregates::{ @@ -81,6 +82,10 @@ pub use project::{ projected_column_for_var, }; +pub use relresult::{ + RelResult, +}; + use errors::{ ErrorKind, Result, @@ -97,7 +102,7 @@ pub enum QueryResults { Scalar(Option), Tuple(Option>), Coll(Vec), - Rel(Vec>), + Rel(RelResult), } impl From for QueryResults { @@ -110,10 +115,13 @@ impl QueryOutput { pub fn empty_factory(spec: &FindSpec) -> Box QueryResults> { use self::FindSpec::*; match spec { - &FindScalar(_) => Box::new(|| QueryResults::Scalar(None)), - &FindTuple(_) => Box::new(|| QueryResults::Tuple(None)), - &FindColl(_) => Box::new(|| QueryResults::Coll(vec![])), - &FindRel(_) => Box::new(|| QueryResults::Rel(vec![])), + &FindScalar(_) => Box::new(|| QueryResults::Scalar(None)), + &FindTuple(_) => Box::new(|| QueryResults::Tuple(None)), + &FindColl(_) => Box::new(|| QueryResults::Coll(vec![])), + &FindRel(ref es) => { + let width = es.len(); + Box::new(move || QueryResults::Rel(RelResult::empty(width))) + }, } } @@ -129,10 +137,10 @@ impl QueryOutput { use self::FindSpec::*; let results = match &**spec { - &FindScalar(_) => QueryResults::Scalar(None), - &FindTuple(_) => QueryResults::Tuple(None), - &FindColl(_) => QueryResults::Coll(vec![]), - &FindRel(_) => QueryResults::Rel(vec![]), + &FindScalar(_) => QueryResults::Scalar(None), + &FindTuple(_) => QueryResults::Tuple(None), + &FindColl(_) => QueryResults::Coll(vec![]), + &FindRel(ref es) => QueryResults::Rel(RelResult::empty(es.len())), }; QueryOutput { spec: spec.clone(), @@ -181,6 +189,7 @@ impl QueryOutput { unimplemented!(); }, &FindRel(ref elements) => { + let width = elements.len(); let values = elements.iter().map(|e| match e { &Element::Variable(ref var) | &Element::Corresponding(ref var) => { @@ -192,7 +201,7 @@ impl QueryOutput { unreachable!(); }, }).collect(); - QueryResults::Rel(vec![values]) + QueryResults::Rel(RelResult { width, values }) }, } } @@ -209,7 +218,7 @@ impl QueryOutput { self.results.into_tuple() } - pub fn into_rel(self) -> Result>> { + pub fn into_rel(self) -> Result { self.results.into_rel() } } @@ -221,7 +230,7 @@ impl QueryResults { &Scalar(ref o) => if o.is_some() { 1 } else { 0 }, &Tuple(ref o) => if o.is_some() { 1 } else { 0 }, &Coll(ref v) => v.len(), - &Rel(ref v) => v.len(), + &Rel(ref r) => r.row_count(), } } @@ -231,7 +240,7 @@ impl QueryResults { &Scalar(ref o) => o.is_none(), &Tuple(ref o) => o.is_none(), &Coll(ref v) => v.is_empty(), - &Rel(ref v) => v.is_empty(), + &Rel(ref r) => r.is_empty(), } } @@ -262,7 +271,7 @@ impl QueryResults { } } - pub fn into_rel(self) -> Result>> { + pub fn into_rel(self) -> Result { match self { QueryResults::Scalar(_) => bail!(ErrorKind::UnexpectedResultsType("scalar", "rel")), QueryResults::Coll(_) => bail!(ErrorKind::UnexpectedResultsType("coll", "rel")), @@ -457,10 +466,9 @@ impl Projector for TupleProjector { } } -/// A rel projector produces a vector of vectors. -/// Each inner vector is the same size, and sourced from the same columns. -/// One inner vector is produced per `Row`. -/// Each column in the inner vector is the result of taking one or two columns from +/// A rel projector produces a RelResult, which is a striding abstraction over a vector. +/// Each stride across the vector is the same size, and sourced from the same columns. +/// Each column in each stride is the result of taking one or two columns from /// the `Row`: one for the value and optionally one for the type tag. struct RelProjector { spec: Rc, @@ -477,15 +485,20 @@ impl RelProjector { } } - fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result> { + fn collect_bindings_into<'a, 'stmt, 'out>(&self, row: Row<'a, 'stmt>, out: &mut Vec) -> Result<()> { // There will be at least as many SQL columns as Datalog columns. // gte 'cos we might be querying extra columns for ordering. // The templates will take care of ignoring columns. assert!(row.column_count() >= self.len as i32); - self.templates - .iter() - .map(|ti| ti.lookup(&row)) - .collect::>>() + let mut count = 0; + for binding in self.templates + .iter() + .map(|ti| ti.lookup(&row)) { + out.push(binding?); + count += 1; + } + assert_eq!(self.len, count); + Ok(()) } fn combine(spec: Rc, column_count: usize, elements: ProjectedElements) -> Result { @@ -509,15 +522,19 @@ impl RelProjector { impl Projector for RelProjector { fn project<'stmt>(&self, mut rows: Rows<'stmt>) -> Result { - let mut out: Vec> = vec![]; + // Allocate space for five rows to start. + // This is better than starting off by doubling the buffer a couple of times, and will + // rapidly grow to support larger query results. + let width = self.len; + let mut values: Vec<_> = Vec::with_capacity(5 * width); + while let Some(r) = rows.next() { let row = r?; - let bindings = self.collect_bindings(row)?; - out.push(bindings); + self.collect_bindings_into(row, &mut values)?; } Ok(QueryOutput { spec: self.spec.clone(), - results: QueryResults::Rel(out), + results: QueryResults::Rel(RelResult { width, values }), }) } diff --git a/query-projector/src/relresult.rs b/query-projector/src/relresult.rs new file mode 100644 index 00000000..c88a0c30 --- /dev/null +++ b/query-projector/src/relresult.rs @@ -0,0 +1,150 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::{ + TypedValue, +}; + +/// The result you get from a 'rel' query, like: +/// +/// ```edn +/// [:find ?person ?name +/// :where [?person :person/name ?name]] +/// ``` +/// +/// There are three ways to get data out of a `RelResult`: +/// - By iterating over rows as slices. Use `result.rows()`. This is efficient and is +/// recommended in two cases: +/// 1. If you don't need to take ownership of the resulting values (e.g., you're comparing +/// or making a modified clone). +/// 2. When the data you're retrieving is cheap to clone. All scalar values are relatively +/// cheap: they're either small values or `Rc`. +/// - By direct reference to a row by index, using `result.row(i)`. This also returns +/// a reference. +/// - By consuming the results using `into_iter`. This allocates short-lived vectors, +/// but gives you ownership of the enclosed `TypedValue`s. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct RelResult { + pub width: usize, + pub values: Vec, +} + +impl RelResult { + pub fn empty(width: usize) -> RelResult { + RelResult { + width: width, + values: Vec::new(), + } + } + + pub fn is_empty(&self) -> bool { + self.values.is_empty() + } + + pub fn row_count(&self) -> usize { + self.values.len() / self.width + } + + pub fn rows(&self) -> ::std::slice::Chunks { + // TODO: Nightly-only API `exact_chunks`. #47115. + self.values.chunks(self.width) + } + + pub fn row(&self, index: usize) -> Option<&[TypedValue]> { + let end = self.width * (index + 1); + if end > self.values.len() { + None + } else { + let start = self.width * index; + Some(&self.values[start..end]) + } + } +} + +#[test] +fn test_rel_result() { + let empty = RelResult::empty(3); + let unit = RelResult { + width: 1, + values: vec![TypedValue::Long(5)], + }; + let two_by_two = RelResult { + width: 2, + values: vec![TypedValue::Long(5), TypedValue::Boolean(true), + TypedValue::Long(-2), TypedValue::Boolean(false)], + }; + + assert!(empty.is_empty()); + assert!(!unit.is_empty()); + assert!(!two_by_two.is_empty()); + + assert_eq!(empty.row_count(), 0); + assert_eq!(unit.row_count(), 1); + assert_eq!(two_by_two.row_count(), 2); + + assert_eq!(empty.row(0), None); + assert_eq!(unit.row(1), None); + assert_eq!(two_by_two.row(2), None); + + assert_eq!(unit.row(0), Some(vec![TypedValue::Long(5)].as_slice())); + assert_eq!(two_by_two.row(0), Some(vec![TypedValue::Long(5), TypedValue::Boolean(true)].as_slice())); + assert_eq!(two_by_two.row(1), Some(vec![TypedValue::Long(-2), TypedValue::Boolean(false)].as_slice())); + + let mut rr = two_by_two.rows(); + assert_eq!(rr.next(), Some(vec![TypedValue::Long(5), TypedValue::Boolean(true)].as_slice())); + assert_eq!(rr.next(), Some(vec![TypedValue::Long(-2), TypedValue::Boolean(false)].as_slice())); + assert_eq!(rr.next(), None); +} + +// Primarily for testing. +impl From>> for RelResult { + fn from(src: Vec>) -> Self { + if src.is_empty() { + RelResult::empty(0) + } else { + let width = src.get(0).map(|r| r.len()).unwrap_or(0); + RelResult { + width: width, + values: src.into_iter().flat_map(|r| r.into_iter()).collect(), + } + } + } +} + +pub struct SubvecIntoIterator { + width: usize, + values: ::std::vec::IntoIter, +} + +impl Iterator for SubvecIntoIterator { + // TODO: this is a good opportunity to use `SmallVec` instead: most queries + // return a handful of columns. + type Item = Vec; + fn next(&mut self) -> Option> { + let result: Vec = (&mut self.values).take(self.width).collect(); + if result.is_empty() { + None + } else { + Some(result) + } + } +} + +impl IntoIterator for RelResult { + type Item = Vec; + type IntoIter = SubvecIntoIterator; + + fn into_iter(self) -> Self::IntoIter { + SubvecIntoIterator { + width: self.width, + values: self.values.into_iter(), + } + } +} diff --git a/query-translator/tests/translate.rs b/query-translator/tests/translate.rs index dc4e84c7..90a36850 100644 --- a/query-translator/tests/translate.rs +++ b/query-translator/tests/translate.rs @@ -755,7 +755,7 @@ fn test_ground_tuple() { let constant = translate_to_constant(&schema, query); assert_eq!(constant.project_without_rows().unwrap() .into_rel().unwrap(), - vec![vec![TypedValue::Long(1), TypedValue::typed_string("yyy")]]); + vec![vec![TypedValue::Long(1), TypedValue::typed_string("yyy")]].into()); // Verify that we accept bound input constants. let query = r#"[:find [?x ?y] :in ?u ?v :where [(ground [?u ?v]) [?x ?y]]]"#; diff --git a/query/src/lib.rs b/query/src/lib.rs index 4ce03070..1eea007e 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -558,7 +558,7 @@ pub enum Limit { /// #[derive(Debug, Eq, PartialEq)] pub enum FindSpec { - /// Returns an array of arrays. + /// Returns an array of arrays, represented as a single array with length a multiple of width. FindRel(Vec), /// Returns an array of scalars, usually homogeneous. diff --git a/src/conn.rs b/src/conn.rs index c1da05c3..f9f1187b 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -1358,7 +1358,7 @@ mod tests { let end = time::PreciseTime::now(); println!("Prepared cache execution took {}µs", start.to(end).num_microseconds().unwrap()); assert_eq!(results.into_rel().expect("result"), - vec![vec![TypedValue::typed_string("Greater Duwamish")]]); + vec![vec![TypedValue::typed_string("Greater Duwamish")]].into()); } trait StoreCache { diff --git a/src/lib.rs b/src/lib.rs index bc8683f8..fb46e26b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -108,6 +108,7 @@ pub use query::{ QueryOutput, QueryPlanStep, QueryResults, + RelResult, Variable, q_once, }; diff --git a/src/query.rs b/src/query.rs index 1c260092..d270654e 100644 --- a/src/query.rs +++ b/src/query.rs @@ -72,6 +72,7 @@ pub use mentat_query_algebrizer::{ pub use mentat_query_projector::{ QueryOutput, // Includes the columns/find spec. QueryResults, // The results themselves. + RelResult, }; use errors::{ @@ -119,7 +120,7 @@ pub trait IntoResult { fn into_scalar_result(self) -> Result>; fn into_coll_result(self) -> Result>; fn into_tuple_result(self) -> Result>>; - fn into_rel_result(self) -> Result>>; + fn into_rel_result(self) -> Result; } impl IntoResult for QueryExecutionResult { @@ -135,7 +136,7 @@ impl IntoResult for QueryExecutionResult { self?.into_tuple().map_err(|e| e.into()) } - fn into_rel_result(self) -> Result>> { + fn into_rel_result(self) -> Result { self?.into_rel().map_err(|e| e.into()) } } diff --git a/src/query_builder.rs b/src/query_builder.rs index 54a0ff2b..87c34c41 100644 --- a/src/query_builder.rs +++ b/src/query_builder.rs @@ -25,6 +25,7 @@ use ::{ Queryable, QueryInputs, QueryOutput, + RelResult, Store, Variable, }; @@ -100,7 +101,7 @@ impl<'a> QueryBuilder<'a> { results.into_tuple().map_err(|e| e.into()) } - pub fn execute_rel(&mut self) -> Result>> { + pub fn execute_rel(&mut self) -> Result { let results = self.execute()?; results.into_rel().map_err(|e| e.into()) } diff --git a/tests/query.rs b/tests/query.rs index 22ffded0..d192d9c5 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -48,6 +48,7 @@ use mentat::{ QueryInputs, Queryable, QueryResults, + RelResult, Store, TxReport, Variable, @@ -80,8 +81,8 @@ fn test_rel() { assert_eq!(40, results.len()); // Every row is a pair of a Ref and a Keyword. - if let QueryResults::Rel(ref rel) = results { - for r in rel { + if let QueryResults::Rel(rel) = results { + for r in rel.into_iter() { assert_eq!(r.len(), 2); assert!(r[0].matches_type(ValueType::Ref)); assert!(r[1].matches_type(ValueType::Keyword)); @@ -90,7 +91,6 @@ fn test_rel() { panic!("Expected rel."); } - println!("{:?}", results); println!("Rel took {}µs", start.to(end).num_microseconds().unwrap()); } @@ -309,7 +309,7 @@ fn test_tx() { QueryResults::Rel(ref v) => { assert_eq!(*v, vec![ vec![TypedValue::Ref(t.tx_id),] - ]); + ].into()); }, _ => panic!("Expected query to work."), } @@ -346,7 +346,7 @@ fn test_tx_as_input() { QueryResults::Rel(ref v) => { assert_eq!(*v, vec![ vec![TypedValue::Uuid(Uuid::from_str("cf62d552-6569-4d1b-b667-04703041dfc4").expect("Valid UUID")),] - ]); + ].into()); }, _ => panic!("Expected query to work."), } @@ -451,7 +451,7 @@ fn test_fulltext() { vec![TypedValue::Ref(v), TypedValue::String("I've come to talk with you again".to_string().into()), ] - ]); + ].into()); }, _ => panic!("Expected query to work."), } @@ -693,21 +693,20 @@ fn test_type_reqs() { let eid_query = r#"[:find ?eid :where [?eid :test/string "foo"]]"#; let res = conn.q_once(&mut c, eid_query, None) - .expect("results") - .into(); + .into_rel_result() + .expect("results"); - let entid = match res { - QueryResults::Rel(ref vs) if vs.len() == 1 && vs[0].len() == 1 && vs[0][0].matches_type(ValueType::Ref) => - if let TypedValue::Ref(eid) = vs[0][0] { + assert_eq!(res.row_count(), 1); + assert_eq!(res.width, 1); + let entid = + match res.into_iter().next().unwrap().into_iter().next().unwrap() { + TypedValue::Ref(eid) => { eid - } else { - // Already checked this. - unreachable!(); + }, + unexpected => { + panic!("Query to get the entity id returned unexpected result {:?}", unexpected); } - unexpected => { - panic!("Query to get the entity id returned unexpected result {:?}", unexpected); - } - }; + }; let type_names = &[ "boolean", @@ -835,7 +834,7 @@ fn test_monster_head_aggregates() { vec!["Cyclops".into(), TypedValue::Long(3)], vec!["Medusa".into(), TypedValue::Long(1)], ]; - assert_eq!(vals, expected); + assert_eq!(vals, expected.into()); }, r => panic!("Unexpected result {:?}", r), }; @@ -898,7 +897,7 @@ fn test_basic_aggregates() { .into(); match r { QueryResults::Rel(vals) => { - assert_eq!(vals, vec![vec![TypedValue::Long(1)]]); + assert_eq!(vals, vec![vec![TypedValue::Long(1)]].into()); }, _ => panic!("Expected rel."), } @@ -994,7 +993,7 @@ fn test_basic_aggregates() { vec![TypedValue::Long(22), TypedValue::Long(1)], vec![TypedValue::Long(28), TypedValue::Long(1)], vec![TypedValue::Long(42), TypedValue::Long(1)], - ]); + ].into()); }, _ => panic!("Expected rel."), } @@ -1014,7 +1013,7 @@ fn test_basic_aggregates() { vec![TypedValue::Long(22), TypedValue::Long(1)], vec![TypedValue::Long(28), TypedValue::Long(2)], vec![TypedValue::Long(42), TypedValue::Long(1)], - ]); + ].into()); }, _ => panic!("Expected rel."), } @@ -1184,12 +1183,12 @@ fn test_aggregate_the() { // that corresponds to the maximum visit date. // // 'Group' in this context translates to GROUP BY in the generated SQL. - assert_eq!(2, per_title.len()); - assert_eq!(1, corresponding_title.len()); + assert_eq!(2, per_title.row_count()); + assert_eq!(1, corresponding_title.row_count()); assert_eq!(corresponding_title, vec![vec![TypedValue::Instant(DateTime::::from_str("2018-04-06T20:46:00.000Z").unwrap()), - TypedValue::typed_string("(1) Facebook")]]); + TypedValue::typed_string("(1) Facebook")]].into()); } #[test] @@ -1293,8 +1292,10 @@ fn test_aggregation_implicit_grouping() { } // Max scores for vegetarians. - assert_eq!(vec![vec!["Alice".into(), TypedValue::Long(99)], - vec!["Beli".into(), TypedValue::Long(22)]], + let expected: RelResult = + vec![vec!["Alice".into(), TypedValue::Long(99)], + vec!["Beli".into(), TypedValue::Long(22)]].into(); + assert_eq!(expected, store.q_once(r#"[:find ?name (max ?score) :where [?game :foo/score ?score] @@ -1336,7 +1337,7 @@ fn test_aggregation_implicit_grouping() { TypedValue::String("Diana".to_string().into()), TypedValue::Long(28), TypedValue::Long(2), - TypedValue::Double((33f64 / 2f64).into())]]); + TypedValue::Double((33f64 / 2f64).into())]].into()); }, x => panic!("Got unexpected results {:?}", x), } @@ -1444,7 +1445,7 @@ fn test_tx_data() { TypedValue::Instant(tx.tx_instant), TypedValue::Ref(tx.tx_id), TypedValue::Boolean(true)], - ]); + ].into()); }, x => panic!("Got unexpected results {:?}", x), } diff --git a/tests/vocabulary.rs b/tests/vocabulary.rs index 125e7ba1..a8de8f02 100644 --- a/tests/vocabulary.rs +++ b/tests/vocabulary.rs @@ -47,6 +47,7 @@ use mentat::{ NamespacedKeyword, QueryInputs, Queryable, + RelResult, Store, TypedValue, ValueType, @@ -132,7 +133,7 @@ fn test_real_world() { .into_rel_result() .expect("query succeeded"); assert_eq!(results, - vec![vec![alice, now.clone()], vec![barbara, now.clone()]]); + vec![vec![alice, now.clone()], vec![barbara, now.clone()]].into()); } #[test] @@ -1031,9 +1032,10 @@ fn test_upgrade_with_functions() { [?p :person/height ?height] [?p :person/name ?name]]"#; let r = store.q_once(q, None).into_rel_result().unwrap(); - assert_eq!(vec![vec![TypedValue::typed_string("Sam"), TypedValue::Long(162)], - vec![TypedValue::typed_string("Beth"), TypedValue::Long(172)]], - r); + let expected: RelResult = + vec![vec![TypedValue::typed_string("Sam"), TypedValue::Long(162)], + vec![TypedValue::typed_string("Beth"), TypedValue::Long(172)]].into(); + assert_eq!(expected, r); // Find foods that Upstream Color fans like. let q = r#"[:find [?food ...]