Introduce RelResult rather than Vec<Vec<TypedValue>>. (#639) r=nalexander

* Pre: clean up core/src/lib.rs.
* Pre: use indexmap 1.0 in db and query-projector.
* Change rel results to be a RelResult instance, not a Vec<Vec<TypedValue>>.

This avoids memory fragmentation and improves locality by using a single
heap-allocated vector for all bindings, rather than a separate
heap-allocated vector for each row.

We hide this abstraction behind the `RelResult` type, which tracks the
stride length (width) of each row.

* Don't allocate temporary vectors when projecting RelResults.
This commit is contained in:
Richard Newman 2018-04-24 15:04:00 -07:00 committed by GitHub
parent 0c31fc7875
commit a74a2deffc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 248 additions and 74 deletions

View file

@ -183,8 +183,9 @@ impl fmt::Display for ValueType {
}
/// Represents a Mentat value in a particular value set.
// TODO: expand to include :db.type/{instant,url,uuid}.
// TODO: BigInt?
// TODO: expand to include :db.type/uri. https://github.com/mozilla/mentat/issues/201
// TODO: JSON data type? https://github.com/mozilla/mentat/issues/31
// TODO: BigInt? Bytes?
#[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq,Serialize,Deserialize)]
pub enum TypedValue {
Ref(Entid),
@ -983,6 +984,10 @@ impl HasSchema for Schema {
}
}
pub mod intern_set;
pub mod counter;
pub mod util;
#[cfg(test)]
mod test {
use super::*;
@ -1125,7 +1130,3 @@ mod test {
assert_eq!(expected_value, value2);
}
}
pub mod intern_set;
pub mod counter;
pub mod util;

View file

@ -5,7 +5,7 @@ workspace = ".."
[dependencies]
error-chain = { git = "https://github.com/rnewman/error-chain", branch = "rnewman/sync" }
indexmap = "0.4"
indexmap = "1"
itertools = "0.7"
lazy_static = "0.2"
num = "0.1"

View file

@ -5,7 +5,7 @@ workspace = ".."
[dependencies]
error-chain = { git = "https://github.com/rnewman/error-chain", branch = "rnewman/sync" }
indexmap = "0.4"
indexmap = "1"
[dependencies.rusqlite]
version = "0.13"

View file

@ -66,6 +66,7 @@ use mentat_query_sql::{
mod aggregates;
mod project;
mod relresult;
pub mod errors;
pub use aggregates::{
@ -81,6 +82,10 @@ pub use project::{
projected_column_for_var,
};
pub use relresult::{
RelResult,
};
use errors::{
ErrorKind,
Result,
@ -97,7 +102,7 @@ pub enum QueryResults {
Scalar(Option<TypedValue>),
Tuple(Option<Vec<TypedValue>>),
Coll(Vec<TypedValue>),
Rel(Vec<Vec<TypedValue>>),
Rel(RelResult),
}
impl From<QueryOutput> for QueryResults {
@ -110,10 +115,13 @@ impl QueryOutput {
pub fn empty_factory(spec: &FindSpec) -> Box<Fn() -> QueryResults> {
use self::FindSpec::*;
match spec {
&FindScalar(_) => Box::new(|| QueryResults::Scalar(None)),
&FindTuple(_) => Box::new(|| QueryResults::Tuple(None)),
&FindColl(_) => Box::new(|| QueryResults::Coll(vec![])),
&FindRel(_) => Box::new(|| QueryResults::Rel(vec![])),
&FindScalar(_) => Box::new(|| QueryResults::Scalar(None)),
&FindTuple(_) => Box::new(|| QueryResults::Tuple(None)),
&FindColl(_) => Box::new(|| QueryResults::Coll(vec![])),
&FindRel(ref es) => {
let width = es.len();
Box::new(move || QueryResults::Rel(RelResult::empty(width)))
},
}
}
@ -129,10 +137,10 @@ impl QueryOutput {
use self::FindSpec::*;
let results =
match &**spec {
&FindScalar(_) => QueryResults::Scalar(None),
&FindTuple(_) => QueryResults::Tuple(None),
&FindColl(_) => QueryResults::Coll(vec![]),
&FindRel(_) => QueryResults::Rel(vec![]),
&FindScalar(_) => QueryResults::Scalar(None),
&FindTuple(_) => QueryResults::Tuple(None),
&FindColl(_) => QueryResults::Coll(vec![]),
&FindRel(ref es) => QueryResults::Rel(RelResult::empty(es.len())),
};
QueryOutput {
spec: spec.clone(),
@ -181,6 +189,7 @@ impl QueryOutput {
unimplemented!();
},
&FindRel(ref elements) => {
let width = elements.len();
let values = elements.iter().map(|e| match e {
&Element::Variable(ref var) |
&Element::Corresponding(ref var) => {
@ -192,7 +201,7 @@ impl QueryOutput {
unreachable!();
},
}).collect();
QueryResults::Rel(vec![values])
QueryResults::Rel(RelResult { width, values })
},
}
}
@ -209,7 +218,7 @@ impl QueryOutput {
self.results.into_tuple()
}
pub fn into_rel(self) -> Result<Vec<Vec<TypedValue>>> {
pub fn into_rel(self) -> Result<RelResult> {
self.results.into_rel()
}
}
@ -221,7 +230,7 @@ impl QueryResults {
&Scalar(ref o) => if o.is_some() { 1 } else { 0 },
&Tuple(ref o) => if o.is_some() { 1 } else { 0 },
&Coll(ref v) => v.len(),
&Rel(ref v) => v.len(),
&Rel(ref r) => r.row_count(),
}
}
@ -231,7 +240,7 @@ impl QueryResults {
&Scalar(ref o) => o.is_none(),
&Tuple(ref o) => o.is_none(),
&Coll(ref v) => v.is_empty(),
&Rel(ref v) => v.is_empty(),
&Rel(ref r) => r.is_empty(),
}
}
@ -262,7 +271,7 @@ impl QueryResults {
}
}
pub fn into_rel(self) -> Result<Vec<Vec<TypedValue>>> {
pub fn into_rel(self) -> Result<RelResult> {
match self {
QueryResults::Scalar(_) => bail!(ErrorKind::UnexpectedResultsType("scalar", "rel")),
QueryResults::Coll(_) => bail!(ErrorKind::UnexpectedResultsType("coll", "rel")),
@ -457,10 +466,9 @@ impl Projector for TupleProjector {
}
}
/// A rel projector produces a vector of vectors.
/// Each inner vector is the same size, and sourced from the same columns.
/// One inner vector is produced per `Row`.
/// Each column in the inner vector is the result of taking one or two columns from
/// A rel projector produces a RelResult, which is a striding abstraction over a vector.
/// Each stride across the vector is the same size, and sourced from the same columns.
/// Each column in each stride is the result of taking one or two columns from
/// the `Row`: one for the value and optionally one for the type tag.
struct RelProjector {
spec: Rc<FindSpec>,
@ -477,15 +485,20 @@ impl RelProjector {
}
}
fn collect_bindings<'a, 'stmt>(&self, row: Row<'a, 'stmt>) -> Result<Vec<TypedValue>> {
fn collect_bindings_into<'a, 'stmt, 'out>(&self, row: Row<'a, 'stmt>, out: &mut Vec<TypedValue>) -> Result<()> {
// There will be at least as many SQL columns as Datalog columns.
// gte 'cos we might be querying extra columns for ordering.
// The templates will take care of ignoring columns.
assert!(row.column_count() >= self.len as i32);
self.templates
.iter()
.map(|ti| ti.lookup(&row))
.collect::<Result<Vec<TypedValue>>>()
let mut count = 0;
for binding in self.templates
.iter()
.map(|ti| ti.lookup(&row)) {
out.push(binding?);
count += 1;
}
assert_eq!(self.len, count);
Ok(())
}
fn combine(spec: Rc<FindSpec>, column_count: usize, elements: ProjectedElements) -> Result<CombinedProjection> {
@ -509,15 +522,19 @@ impl RelProjector {
impl Projector for RelProjector {
fn project<'stmt>(&self, mut rows: Rows<'stmt>) -> Result<QueryOutput> {
let mut out: Vec<Vec<TypedValue>> = vec![];
// Allocate space for five rows to start.
// This is better than starting off by doubling the buffer a couple of times, and will
// rapidly grow to support larger query results.
let width = self.len;
let mut values: Vec<_> = Vec::with_capacity(5 * width);
while let Some(r) = rows.next() {
let row = r?;
let bindings = self.collect_bindings(row)?;
out.push(bindings);
self.collect_bindings_into(row, &mut values)?;
}
Ok(QueryOutput {
spec: self.spec.clone(),
results: QueryResults::Rel(out),
results: QueryResults::Rel(RelResult { width, values }),
})
}

View file

@ -0,0 +1,150 @@
// Copyright 2018 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use mentat_core::{
TypedValue,
};
/// The result you get from a 'rel' query, like:
///
/// ```edn
/// [:find ?person ?name
/// :where [?person :person/name ?name]]
/// ```
///
/// There are three ways to get data out of a `RelResult`:
/// - By iterating over rows as slices. Use `result.rows()`. This is efficient and is
/// recommended in two cases:
/// 1. If you don't need to take ownership of the resulting values (e.g., you're comparing
/// or making a modified clone).
/// 2. When the data you're retrieving is cheap to clone. All scalar values are relatively
/// cheap: they're either small values or `Rc`.
/// - By direct reference to a row by index, using `result.row(i)`. This also returns
/// a reference.
/// - By consuming the results using `into_iter`. This allocates short-lived vectors,
/// but gives you ownership of the enclosed `TypedValue`s.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RelResult {
pub width: usize,
pub values: Vec<TypedValue>,
}
impl RelResult {
pub fn empty(width: usize) -> RelResult {
RelResult {
width: width,
values: Vec::new(),
}
}
pub fn is_empty(&self) -> bool {
self.values.is_empty()
}
pub fn row_count(&self) -> usize {
self.values.len() / self.width
}
pub fn rows(&self) -> ::std::slice::Chunks<TypedValue> {
// TODO: Nightly-only API `exact_chunks`. #47115.
self.values.chunks(self.width)
}
pub fn row(&self, index: usize) -> Option<&[TypedValue]> {
let end = self.width * (index + 1);
if end > self.values.len() {
None
} else {
let start = self.width * index;
Some(&self.values[start..end])
}
}
}
#[test]
fn test_rel_result() {
let empty = RelResult::empty(3);
let unit = RelResult {
width: 1,
values: vec![TypedValue::Long(5)],
};
let two_by_two = RelResult {
width: 2,
values: vec![TypedValue::Long(5), TypedValue::Boolean(true),
TypedValue::Long(-2), TypedValue::Boolean(false)],
};
assert!(empty.is_empty());
assert!(!unit.is_empty());
assert!(!two_by_two.is_empty());
assert_eq!(empty.row_count(), 0);
assert_eq!(unit.row_count(), 1);
assert_eq!(two_by_two.row_count(), 2);
assert_eq!(empty.row(0), None);
assert_eq!(unit.row(1), None);
assert_eq!(two_by_two.row(2), None);
assert_eq!(unit.row(0), Some(vec![TypedValue::Long(5)].as_slice()));
assert_eq!(two_by_two.row(0), Some(vec![TypedValue::Long(5), TypedValue::Boolean(true)].as_slice()));
assert_eq!(two_by_two.row(1), Some(vec![TypedValue::Long(-2), TypedValue::Boolean(false)].as_slice()));
let mut rr = two_by_two.rows();
assert_eq!(rr.next(), Some(vec![TypedValue::Long(5), TypedValue::Boolean(true)].as_slice()));
assert_eq!(rr.next(), Some(vec![TypedValue::Long(-2), TypedValue::Boolean(false)].as_slice()));
assert_eq!(rr.next(), None);
}
// Primarily for testing.
impl From<Vec<Vec<TypedValue>>> for RelResult {
fn from(src: Vec<Vec<TypedValue>>) -> Self {
if src.is_empty() {
RelResult::empty(0)
} else {
let width = src.get(0).map(|r| r.len()).unwrap_or(0);
RelResult {
width: width,
values: src.into_iter().flat_map(|r| r.into_iter()).collect(),
}
}
}
}
pub struct SubvecIntoIterator {
width: usize,
values: ::std::vec::IntoIter<TypedValue>,
}
impl Iterator for SubvecIntoIterator {
// TODO: this is a good opportunity to use `SmallVec` instead: most queries
// return a handful of columns.
type Item = Vec<TypedValue>;
fn next(&mut self) -> Option<Vec<TypedValue>> {
let result: Vec<TypedValue> = (&mut self.values).take(self.width).collect();
if result.is_empty() {
None
} else {
Some(result)
}
}
}
impl IntoIterator for RelResult {
type Item = Vec<TypedValue>;
type IntoIter = SubvecIntoIterator;
fn into_iter(self) -> Self::IntoIter {
SubvecIntoIterator {
width: self.width,
values: self.values.into_iter(),
}
}
}

View file

@ -755,7 +755,7 @@ fn test_ground_tuple() {
let constant = translate_to_constant(&schema, query);
assert_eq!(constant.project_without_rows().unwrap()
.into_rel().unwrap(),
vec![vec![TypedValue::Long(1), TypedValue::typed_string("yyy")]]);
vec![vec![TypedValue::Long(1), TypedValue::typed_string("yyy")]].into());
// Verify that we accept bound input constants.
let query = r#"[:find [?x ?y] :in ?u ?v :where [(ground [?u ?v]) [?x ?y]]]"#;

View file

@ -558,7 +558,7 @@ pub enum Limit {
///
#[derive(Debug, Eq, PartialEq)]
pub enum FindSpec {
/// Returns an array of arrays.
/// Returns an array of arrays, represented as a single array with length a multiple of width.
FindRel(Vec<Element>),
/// Returns an array of scalars, usually homogeneous.

View file

@ -1358,7 +1358,7 @@ mod tests {
let end = time::PreciseTime::now();
println!("Prepared cache execution took {}µs", start.to(end).num_microseconds().unwrap());
assert_eq!(results.into_rel().expect("result"),
vec![vec![TypedValue::typed_string("Greater Duwamish")]]);
vec![vec![TypedValue::typed_string("Greater Duwamish")]].into());
}
trait StoreCache {

View file

@ -108,6 +108,7 @@ pub use query::{
QueryOutput,
QueryPlanStep,
QueryResults,
RelResult,
Variable,
q_once,
};

View file

@ -72,6 +72,7 @@ pub use mentat_query_algebrizer::{
pub use mentat_query_projector::{
QueryOutput, // Includes the columns/find spec.
QueryResults, // The results themselves.
RelResult,
};
use errors::{
@ -119,7 +120,7 @@ pub trait IntoResult {
fn into_scalar_result(self) -> Result<Option<TypedValue>>;
fn into_coll_result(self) -> Result<Vec<TypedValue>>;
fn into_tuple_result(self) -> Result<Option<Vec<TypedValue>>>;
fn into_rel_result(self) -> Result<Vec<Vec<TypedValue>>>;
fn into_rel_result(self) -> Result<RelResult>;
}
impl IntoResult for QueryExecutionResult {
@ -135,7 +136,7 @@ impl IntoResult for QueryExecutionResult {
self?.into_tuple().map_err(|e| e.into())
}
fn into_rel_result(self) -> Result<Vec<Vec<TypedValue>>> {
fn into_rel_result(self) -> Result<RelResult> {
self?.into_rel().map_err(|e| e.into())
}
}

View file

@ -25,6 +25,7 @@ use ::{
Queryable,
QueryInputs,
QueryOutput,
RelResult,
Store,
Variable,
};
@ -100,7 +101,7 @@ impl<'a> QueryBuilder<'a> {
results.into_tuple().map_err(|e| e.into())
}
pub fn execute_rel(&mut self) -> Result<Vec<Vec<TypedValue>>> {
pub fn execute_rel(&mut self) -> Result<RelResult> {
let results = self.execute()?;
results.into_rel().map_err(|e| e.into())
}

View file

@ -48,6 +48,7 @@ use mentat::{
QueryInputs,
Queryable,
QueryResults,
RelResult,
Store,
TxReport,
Variable,
@ -80,8 +81,8 @@ fn test_rel() {
assert_eq!(40, results.len());
// Every row is a pair of a Ref and a Keyword.
if let QueryResults::Rel(ref rel) = results {
for r in rel {
if let QueryResults::Rel(rel) = results {
for r in rel.into_iter() {
assert_eq!(r.len(), 2);
assert!(r[0].matches_type(ValueType::Ref));
assert!(r[1].matches_type(ValueType::Keyword));
@ -90,7 +91,6 @@ fn test_rel() {
panic!("Expected rel.");
}
println!("{:?}", results);
println!("Rel took {}µs", start.to(end).num_microseconds().unwrap());
}
@ -309,7 +309,7 @@ fn test_tx() {
QueryResults::Rel(ref v) => {
assert_eq!(*v, vec![
vec![TypedValue::Ref(t.tx_id),]
]);
].into());
},
_ => panic!("Expected query to work."),
}
@ -346,7 +346,7 @@ fn test_tx_as_input() {
QueryResults::Rel(ref v) => {
assert_eq!(*v, vec![
vec![TypedValue::Uuid(Uuid::from_str("cf62d552-6569-4d1b-b667-04703041dfc4").expect("Valid UUID")),]
]);
].into());
},
_ => panic!("Expected query to work."),
}
@ -451,7 +451,7 @@ fn test_fulltext() {
vec![TypedValue::Ref(v),
TypedValue::String("I've come to talk with you again".to_string().into()),
]
]);
].into());
},
_ => panic!("Expected query to work."),
}
@ -693,21 +693,20 @@ fn test_type_reqs() {
let eid_query = r#"[:find ?eid :where [?eid :test/string "foo"]]"#;
let res = conn.q_once(&mut c, eid_query, None)
.expect("results")
.into();
.into_rel_result()
.expect("results");
let entid = match res {
QueryResults::Rel(ref vs) if vs.len() == 1 && vs[0].len() == 1 && vs[0][0].matches_type(ValueType::Ref) =>
if let TypedValue::Ref(eid) = vs[0][0] {
assert_eq!(res.row_count(), 1);
assert_eq!(res.width, 1);
let entid =
match res.into_iter().next().unwrap().into_iter().next().unwrap() {
TypedValue::Ref(eid) => {
eid
} else {
// Already checked this.
unreachable!();
},
unexpected => {
panic!("Query to get the entity id returned unexpected result {:?}", unexpected);
}
unexpected => {
panic!("Query to get the entity id returned unexpected result {:?}", unexpected);
}
};
};
let type_names = &[
"boolean",
@ -835,7 +834,7 @@ fn test_monster_head_aggregates() {
vec!["Cyclops".into(), TypedValue::Long(3)],
vec!["Medusa".into(), TypedValue::Long(1)],
];
assert_eq!(vals, expected);
assert_eq!(vals, expected.into());
},
r => panic!("Unexpected result {:?}", r),
};
@ -898,7 +897,7 @@ fn test_basic_aggregates() {
.into();
match r {
QueryResults::Rel(vals) => {
assert_eq!(vals, vec![vec![TypedValue::Long(1)]]);
assert_eq!(vals, vec![vec![TypedValue::Long(1)]].into());
},
_ => panic!("Expected rel."),
}
@ -994,7 +993,7 @@ fn test_basic_aggregates() {
vec![TypedValue::Long(22), TypedValue::Long(1)],
vec![TypedValue::Long(28), TypedValue::Long(1)],
vec![TypedValue::Long(42), TypedValue::Long(1)],
]);
].into());
},
_ => panic!("Expected rel."),
}
@ -1014,7 +1013,7 @@ fn test_basic_aggregates() {
vec![TypedValue::Long(22), TypedValue::Long(1)],
vec![TypedValue::Long(28), TypedValue::Long(2)],
vec![TypedValue::Long(42), TypedValue::Long(1)],
]);
].into());
},
_ => panic!("Expected rel."),
}
@ -1184,12 +1183,12 @@ fn test_aggregate_the() {
// that corresponds to the maximum visit date.
//
// 'Group' in this context translates to GROUP BY in the generated SQL.
assert_eq!(2, per_title.len());
assert_eq!(1, corresponding_title.len());
assert_eq!(2, per_title.row_count());
assert_eq!(1, corresponding_title.row_count());
assert_eq!(corresponding_title,
vec![vec![TypedValue::Instant(DateTime::<Utc>::from_str("2018-04-06T20:46:00.000Z").unwrap()),
TypedValue::typed_string("(1) Facebook")]]);
TypedValue::typed_string("(1) Facebook")]].into());
}
#[test]
@ -1293,8 +1292,10 @@ fn test_aggregation_implicit_grouping() {
}
// Max scores for vegetarians.
assert_eq!(vec![vec!["Alice".into(), TypedValue::Long(99)],
vec!["Beli".into(), TypedValue::Long(22)]],
let expected: RelResult =
vec![vec!["Alice".into(), TypedValue::Long(99)],
vec!["Beli".into(), TypedValue::Long(22)]].into();
assert_eq!(expected,
store.q_once(r#"[:find ?name (max ?score)
:where
[?game :foo/score ?score]
@ -1336,7 +1337,7 @@ fn test_aggregation_implicit_grouping() {
TypedValue::String("Diana".to_string().into()),
TypedValue::Long(28),
TypedValue::Long(2),
TypedValue::Double((33f64 / 2f64).into())]]);
TypedValue::Double((33f64 / 2f64).into())]].into());
},
x => panic!("Got unexpected results {:?}", x),
}
@ -1444,7 +1445,7 @@ fn test_tx_data() {
TypedValue::Instant(tx.tx_instant),
TypedValue::Ref(tx.tx_id),
TypedValue::Boolean(true)],
]);
].into());
},
x => panic!("Got unexpected results {:?}", x),
}

View file

@ -47,6 +47,7 @@ use mentat::{
NamespacedKeyword,
QueryInputs,
Queryable,
RelResult,
Store,
TypedValue,
ValueType,
@ -132,7 +133,7 @@ fn test_real_world() {
.into_rel_result()
.expect("query succeeded");
assert_eq!(results,
vec![vec![alice, now.clone()], vec![barbara, now.clone()]]);
vec![vec![alice, now.clone()], vec![barbara, now.clone()]].into());
}
#[test]
@ -1031,9 +1032,10 @@ fn test_upgrade_with_functions() {
[?p :person/height ?height]
[?p :person/name ?name]]"#;
let r = store.q_once(q, None).into_rel_result().unwrap();
assert_eq!(vec![vec![TypedValue::typed_string("Sam"), TypedValue::Long(162)],
vec![TypedValue::typed_string("Beth"), TypedValue::Long(172)]],
r);
let expected: RelResult =
vec![vec![TypedValue::typed_string("Sam"), TypedValue::Long(162)],
vec![TypedValue::typed_string("Beth"), TypedValue::Long(172)]].into();
assert_eq!(expected, r);
// Find foods that Upstream Color fans like.
let q = r#"[:find [?food ...]