Store a bitfield in temporary search tables and expand to bit flags in the datoms table to investigate performance difference. Fixes #226. r=nalexander (#242)

This commit is contained in:
Jordan Santell 2017-02-10 12:03:49 -08:00 committed by GitHub
parent f591c90738
commit 6ce5d526a3
3 changed files with 102 additions and 39 deletions

View file

@ -81,6 +81,17 @@ fn test_typed_value() {
assert!(TypedValue::String("foo".to_string()).is_congruent_with(None));
}
/// Bit flags used in `flags0` column in temporary tables created during search,
/// such as the `search_results`, `inexact_searches` and `exact_searches` tables.
/// When moving to a more concrete table, such as `datoms`, they are expanded out
/// via these flags and put into their own column rather than a bit field.
pub enum AttributeBitFlags {
IndexAVET = 1 << 0,
IndexVAET = 1 << 1,
IndexFulltext = 1 << 2,
UniqueValue = 1 << 3,
}
/// A Mentat schema attribute has a value type and several other flags determining how assertions
/// with the attribute are interpreted.
///
@ -126,6 +137,27 @@ pub struct Attribute {
pub component: bool,
}
impl Attribute {
/// Combine several attribute flags into a bitfield used in temporary search tables.
pub fn flags(&self) -> u8 {
let mut flags: u8 = 0;
if self.index {
flags |= AttributeBitFlags::IndexAVET as u8;
}
if self.value_type == ValueType::Ref {
flags |= AttributeBitFlags::IndexVAET as u8;
}
if self.fulltext {
flags |= AttributeBitFlags::IndexFulltext as u8;
}
if self.unique_value {
flags |= AttributeBitFlags::UniqueValue as u8;
}
flags
}
}
impl Default for Attribute {
fn default() -> Attribute {
Attribute {
@ -141,3 +173,40 @@ impl Default for Attribute {
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_attribute_flags() {
let attr1 = Attribute {
index: true,
value_type: ValueType::Ref,
fulltext: false,
unique_value: false,
multival: false,
unique_identity: false,
component: false,
};
assert!(attr1.flags() & AttributeBitFlags::IndexAVET as u8 != 0);
assert!(attr1.flags() & AttributeBitFlags::IndexVAET as u8 != 0);
assert!(attr1.flags() & AttributeBitFlags::IndexFulltext as u8 == 0);
assert!(attr1.flags() & AttributeBitFlags::UniqueValue as u8 == 0);
let attr2 = Attribute {
index: false,
value_type: ValueType::Boolean,
fulltext: true,
unique_value: true,
multival: false,
unique_identity: false,
component: false,
};
assert!(attr2.flags() & AttributeBitFlags::IndexAVET as u8 == 0);
assert!(attr2.flags() & AttributeBitFlags::IndexVAET as u8 == 0);
assert!(attr2.flags() & AttributeBitFlags::IndexFulltext as u8 != 0);
assert!(attr2.flags() & AttributeBitFlags::UniqueValue as u8 != 0);
}
}

View file

@ -26,7 +26,7 @@ use entids;
use mentat_tx::entities as entmod;
use mentat_tx::entities::{Entity, OpType};
use errors::{ErrorKind, Result, ResultExt};
use types::{Attribute, DB, Entid, IdentMap, Partition, PartitionMap, Schema, TypedValue, ValueType};
use types::{Attribute, AttributeBitFlags, DB, Entid, IdentMap, Partition, PartitionMap, Schema, TypedValue, ValueType};
pub fn new_connection<T>(uri: T) -> rusqlite::Result<rusqlite::Connection> where T: AsRef<Path> {
let conn = match uri.as_ref().to_string_lossy().len() {
@ -458,8 +458,9 @@ impl DB {
// We can't do this in one shot, since we can't prepare a batch statement.
let statements = [
r#"DROP TABLE IF EXISTS temp.exact_searches"#,
// TODO: compress bit flags into a single bit field, and expand when inserting into
// `datoms` and `transactions`.
// Note that `flags0` is a bitfield of several flags compressed via
// `AttributeBitFlags.flags()` in the temporary search tables, later
// expanded in the `datoms` insertion.
// TODO: drop tx0 entirely.
r#"CREATE TABLE temp.exact_searches (
e0 INTEGER NOT NULL,
@ -468,10 +469,7 @@ impl DB {
value_type_tag0 SMALLINT NOT NULL,
tx0 INTEGER NOT NULL,
added0 TINYINT NOT NULL,
index_avet0 TINYINT NOT NULL,
index_vaet0 TINYINT NOT NULL,
index_fulltext0 TINYINT NOT NULL,
unique_value0 TINYINT NOT NULL)"#,
flags0 TINYINT NOT NULL)"#,
// There's no real need to split exact and inexact searches, so long as we keep things
// in the correct place and performant. Splitting has the advantage of being explicit
// and slightly easier to read, so we'll do that to start.
@ -483,10 +481,7 @@ impl DB {
value_type_tag0 SMALLINT NOT NULL,
tx0 INTEGER NOT NULL,
added0 TINYINT NOT NULL,
index_avet0 TINYINT NOT NULL,
index_vaet0 TINYINT NOT NULL,
index_fulltext0 TINYINT NOT NULL,
unique_value0 TINYINT NOT NULL)"#,
flags0 TINYINT NOT NULL)"#,
r#"DROP TABLE IF EXISTS temp.search_results"#,
// TODO: don't encode search_type as a STRING. This is explicit and much easier to read
// than another flag, so we'll do it to start, and optimize later.
@ -497,10 +492,7 @@ impl DB {
value_type_tag0 SMALLINT NOT NULL,
tx0 INTEGER NOT NULL,
added0 TINYINT NOT NULL,
index_avet0 TINYINT NOT NULL,
index_vaet0 TINYINT NOT NULL,
index_fulltext0 TINYINT NOT NULL,
unique_value0 TINYINT NOT NULL,
flags0 TINYINT NOT NULL,
search_type STRING NOT NULL,
rid INTEGER,
v BLOB)"#,
@ -527,7 +519,7 @@ impl DB {
/// Eventually, the details of this approach will be captured in
/// https://github.com/mozilla/mentat/wiki/Transacting:-entity-to-SQL-translation.
fn insert_non_fts_searches<'a>(&self, conn: &rusqlite::Connection, entities: &'a [ReducedEntity], tx: Entid, search_type: SearchType) -> Result<()> {
let bindings_per_statement = 10;
let bindings_per_statement = 7;
let chunks: itertools::IntoChunks<_> = entities.into_iter().chunks(::SQLITE_MAX_VARIABLE_NUMBER / bindings_per_statement);
@ -537,31 +529,27 @@ impl DB {
// We must keep these computed values somewhere to reference them later, so we can't
// combine this map and the subsequent flat_map.
// (e0, a0, v0, value_type_tag0, added0, index_avet0, index_vaet0, index_fulltext0, unique_value0)
// (e0, a0, v0, value_type_tag0, added0, flags0)
let block: Result<Vec<(i64 /* e */, i64 /* a */,
ToSqlOutput<'a> /* value */, /* value_type_tag */ i32,
/* added0 */ bool,
/* index_avet0 */ bool,
/* index_vaet0 */ bool,
/* index_fulltext0 */ bool,
/* unique_value0 */ bool)>> = chunk.map(|&(e, a, ref typed_value, added)| {
/* flags0 */ u8)>> = chunk.map(|&(e, a, ref typed_value, added)| {
count += 1;
let attribute: &Attribute = self.schema.require_attribute_for_entid(&a)?;
// Now we can represent the typed value as an SQL value.
let (value, value_type_tag): (ToSqlOutput, i32) = typed_value.to_sql_value_pair();
let flags = attribute.flags();
Ok((e, a, value, value_type_tag,
added,
attribute.index,
attribute.value_type == ValueType::Ref,
attribute.fulltext,
attribute.unique_value))
flags))
}).collect();
let block = block?;
// `params` reference computed values in `block`.
let params: Vec<&ToSql> = block.iter().flat_map(|&(ref e, ref a, ref value, ref value_type_tag, added, index_avet, index_vaet, index_fulltext, unique_value)| {
let params: Vec<&ToSql> = block.iter().flat_map(|&(ref e, ref a, ref value, ref value_type_tag, added, ref flags)| {
// Avoid inner heap allocation.
// TODO: extract some finite length iterator to make this less indented!
once(e as &ToSql)
@ -570,18 +558,15 @@ impl DB {
.chain(once(value_type_tag as &ToSql)
.chain(once(&tx as &ToSql)
.chain(once(to_bool_ref(added) as &ToSql)
.chain(once(to_bool_ref(index_avet) as &ToSql)
.chain(once(to_bool_ref(index_vaet) as &ToSql)
.chain(once(to_bool_ref(index_fulltext) as &ToSql)
.chain(once(to_bool_ref(unique_value) as &ToSql))))))))))
.chain(once(flags as &ToSql)))))))
}).collect();
// TODO: cache this for selected values of count.
let values: String = repeat_values(bindings_per_statement, count);
let s: String = if search_type == SearchType::Exact {
format!("INSERT INTO temp.exact_searches (e0, a0, v0, value_type_tag0, tx0, added0, index_avet0, index_vaet0, index_fulltext0, unique_value0) VALUES {}", values)
format!("INSERT INTO temp.exact_searches (e0, a0, v0, value_type_tag0, tx0, added0, flags0) VALUES {}", values)
} else {
format!("INSERT INTO temp.inexact_searches (e0, a0, v0, value_type_tag0, tx0, added0, index_avet0, index_vaet0, index_fulltext0, unique_value0) VALUES {}", values)
format!("INSERT INTO temp.inexact_searches (e0, a0, v0, value_type_tag0, tx0, added0, flags0) VALUES {}", values)
};
// TODO: consider ensuring we inserted the expected number of rows.
@ -602,7 +587,7 @@ impl DB {
// Second is slower, but still only one table walk: lookup old value by ea.
let s = r#"
INSERT INTO temp.search_results
SELECT t.e0, t.a0, t.v0, t.value_type_tag0, t.tx0, t.added0, t.index_avet0, t.index_vaet0, t.index_fulltext0, t.unique_value0, ':db.cardinality/many', d.rowid, d.v
SELECT t.e0, t.a0, t.v0, t.value_type_tag0, t.tx0, t.added0, t.flags0, ':db.cardinality/many', d.rowid, d.v
FROM temp.exact_searches AS t
LEFT JOIN datoms AS d
ON t.e0 = d.e AND
@ -612,7 +597,7 @@ impl DB {
UNION ALL
SELECT t.e0, t.a0, t.v0, t.value_type_tag0, t.tx0, t.added0, t.index_avet0, t.index_vaet0, t.index_fulltext0, t.unique_value0, ':db.cardinality/one', d.rowid, d.v
SELECT t.e0, t.a0, t.v0, t.value_type_tag0, t.tx0, t.added0, t.flags0, ':db.cardinality/one', d.rowid, d.v
FROM temp.inexact_searches AS t
LEFT JOIN datoms AS d
ON t.e0 = d.e AND
@ -680,15 +665,23 @@ impl DB {
.map(|_c| ())
.chain_err(|| "Could not update datoms: failed to retract datoms already present")?;
// Insert datoms that were added and not already present.
let s = r#"
// Insert datoms that were added and not already present. We also must
// expand our bitfield into flags.
let s = format!(r#"
INSERT INTO datoms (e, a, v, tx, value_type_tag, index_avet, index_vaet, index_fulltext, unique_value)
SELECT e0, a0, v0, ?, value_type_tag0,
index_avet0, index_vaet0, index_fulltext0, unique_value0
flags0 & {} IS NOT 0,
flags0 & {} IS NOT 0,
flags0 & {} IS NOT 0,
flags0 & {} IS NOT 0
FROM temp.search_results
WHERE added0 IS 1 AND ((rid IS NULL) OR ((rid IS NOT NULL) AND (v0 IS NOT v)))"#;
WHERE added0 IS 1 AND ((rid IS NULL) OR ((rid IS NOT NULL) AND (v0 IS NOT v)))"#,
AttributeBitFlags::IndexAVET as u8,
AttributeBitFlags::IndexVAET as u8,
AttributeBitFlags::IndexFulltext as u8,
AttributeBitFlags::UniqueValue as u8);
let mut stmt = conn.prepare_cached(s)?;
let mut stmt = conn.prepare_cached(&s)?;
stmt.execute(&[&tx])
.map(|_c| ())
.chain_err(|| "Could not update datoms: failed to add datoms not already present")?;

View file

@ -19,6 +19,7 @@ pub use self::mentat_core::{
ValueType,
TypedValue,
Attribute,
AttributeBitFlags,
};
/// Represents one partition of the entid space.