diff --git a/db/src/db.rs b/db/src/db.rs index 38ed7613..502deedf 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -508,6 +508,7 @@ pub trait MentatStoring { // TODO: this is not a reasonable abstraction, but I don't want to really consider non-SQL storage just yet. fn insert_non_fts_searches<'a>(&self, entities: &'a [ReducedEntity], search_type: SearchType) -> Result<()>; + fn insert_fts_searches<'a>(&self, entities: &'a [ReducedEntity], search_type: SearchType) -> Result<()>; /// Finalize the underlying storage layer after a Mentat transaction. /// @@ -814,6 +815,104 @@ impl MentatStoring for rusqlite::Connection { results.map(|_| ()) } + /// Insert search rows into temporary search tables. + /// + /// Eventually, the details of this approach will be captured in + /// https://github.com/mozilla/mentat/wiki/Transacting:-entity-to-SQL-translation. + fn insert_fts_searches<'a>(&self, entities: &'a [ReducedEntity<'a>], search_type: SearchType) -> Result<()> { + let max_vars = self.limit(Limit::SQLITE_LIMIT_VARIABLE_NUMBER) as usize; + let bindings_per_statement = 6; + + let mut outer_searchid = 2000; + + let chunks: itertools::IntoChunks<_> = entities.into_iter().chunks(max_vars / bindings_per_statement); + + // We'd like to flat_map here, but it's not obvious how to flat_map across Result. + let results: Result> = chunks.into_iter().map(|chunk| -> Result<()> { + let mut count = 0; + + // We must keep these computed values somewhere to reference them later, so we can't + // combine this map and the subsequent flat_map. + // (e0, a0, v0, value_type_tag0, added0, flags0) + let block: Result /* value */, + i32 /* value_type_tag */, + bool /* added0 */, + u8 /* flags0 */, + i64 /* searchid */)>> = chunk.map(|&(e, a, ref attribute, ref typed_value, added)| { + if typed_value.value_type() != ValueType::String { + bail!("Cannot transact a fulltext assertion with a typed value that is not :db/valueType :db.type/string"); + } + + count += 1; + outer_searchid += 1; + + // Now we can represent the typed value as an SQL value. + let (value, value_type_tag): (ToSqlOutput, i32) = typed_value.to_sql_value_pair(); + + Ok((e, a, value, value_type_tag, added, attribute.flags(), outer_searchid)) + }).collect(); + let block = block?; + + // First, insert all fulltext string values. + // `fts_params` reference computed values in `block`. + let fts_params: Vec<&ToSql> = block.iter().flat_map(|&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref searchid)| { + // Avoid inner heap allocation. + once(value as &ToSql) + .chain(once(searchid as &ToSql)) + }).collect(); + + // TODO: make this maximally efficient. It's not terribly inefficient right now. + let fts_values: String = repeat_values(2, count); + let fts_s: String = format!("INSERT INTO fulltext_values_view (text, searchid) VALUES {}", fts_values); + + // TODO: consider ensuring we inserted the expected number of rows. + let mut stmt = self.prepare_cached(fts_s.as_str())?; + stmt.execute(&fts_params) + .map(|_c| ()) + .chain_err(|| "Could not insert fts values into fts table!")?; + + // Second, insert searches. + // `params` reference computed values in `block`. + let params: Vec<&ToSql> = block.iter().flat_map(|&(ref e, ref a, ref _value, ref value_type_tag, added, ref flags, ref searchid)| { + // Avoid inner heap allocation. + // TODO: extract some finite length iterator to make this less indented! + once(e as &ToSql) + .chain(once(a as &ToSql) + .chain(once(searchid as &ToSql) + .chain(once(value_type_tag as &ToSql) + .chain(once(to_bool_ref(added) as &ToSql) + .chain(once(flags as &ToSql)))))) + }).collect(); + + // TODO: cache this for selected values of count. + assert!(bindings_per_statement * count < max_vars, "Too many values: {} * {} >= {}", bindings_per_statement, count, max_vars); + let inner = "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?)".to_string(); + // Like "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?), (?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?)". + let fts_values: String = repeat(inner).take(count).join(", "); + let s: String = if search_type == SearchType::Exact { + format!("INSERT INTO temp.exact_searches (e0, a0, v0, value_type_tag0, added0, flags0) VALUES {}", fts_values) + } else { + format!("INSERT INTO temp.inexact_searches (e0, a0, v0, value_type_tag0, added0, flags0) VALUES {}", fts_values) + }; + + // TODO: consider ensuring we inserted the expected number of rows. + let mut stmt = self.prepare_cached(s.as_str())?; + stmt.execute(¶ms) + .map(|_c| ()) + .chain_err(|| "Could not insert fts statements into temporary search table!") + }).collect::>>(); + + // Finally, clean up temporary searchids. + let mut stmt = self.prepare_cached("UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL")?; + stmt.execute(&[]) + .map(|_c| ()) + .chain_err(|| "Could not drop fts search ids!")?; + + results.map(|_| ()) + } + fn commit_transaction(&self, tx_id: Entid) -> Result<()> { search(&self)?; insert_transaction(&self, tx_id)?; @@ -979,6 +1078,9 @@ mod tests { use bootstrap; use debug; use edn; + use mentat_core::{ + attribute, + }; use mentat_tx_parser; use rusqlite; use std::collections::{ @@ -1064,11 +1166,15 @@ mod tests { } fn last_transaction(&self) -> edn::Value { - debug::transactions_after(&self.sqlite, &self.schema, self.last_tx_id() - 1).unwrap().0[0].into_edn() + debug::transactions_after(&self.sqlite, &self.schema, self.last_tx_id() - 1).expect("last_transaction").0[0].into_edn() } fn datoms(&self) -> edn::Value { - debug::datoms_after(&self.sqlite, &self.schema, bootstrap::TX0).unwrap().into_edn() + debug::datoms_after(&self.sqlite, &self.schema, bootstrap::TX0).expect("datoms").into_edn() + } + + fn fulltext_values(&self) -> edn::Value { + debug::fulltext_values(&self.sqlite).expect("fulltext_values").into_edn() } } @@ -1555,4 +1661,178 @@ mod tests { [:db/add :test/ident :db/unique :db.unique/value] [:db/add :db.part/db :db.alter/attribute 100]]"); } + + /// Verify that we can't alter :db/fulltext schema characteristics at all. + #[test] + fn test_db_alter_fulltext() { + let mut conn = TestConn::default(); + + // Start by installing a :db/fulltext true and a :db/fulltext unset attribute. + assert_transact!(conn, "[[:db/add 111 :db/ident :test/fulltext] + [:db/add 111 :db/valueType :db.type/string] + [:db/add 111 :db/unique :db.unique/identity] + [:db/add 111 :db/index true] + [:db/add 111 :db/fulltext true] + [:db/add 222 :db/ident :test/string] + [:db/add 222 :db/cardinality :db.cardinality/one] + [:db/add 222 :db/valueType :db.type/string] + [:db/add 222 :db/index true]]"); + + assert_transact!(conn, + "[[:db/retract 111 :db/fulltext true]]", + Err("not yet implemented: Retracting metadata attribute assertions not yet implemented: retracted [e a] pairs [[111 12]]")); + + assert_transact!(conn, + "[[:db/add 222 :db/fulltext true]]", + Err("bad schema assertion: Schema alteration for existing attribute with entid 222 is not valid")); + } + + #[test] + fn test_db_fulltext() { + let mut conn = TestConn::default(); + + // Start by installing a few :db/fulltext true attributes. + assert_transact!(conn, "[[:db/add 111 :db/ident :test/fulltext] + [:db/add 111 :db/valueType :db.type/string] + [:db/add 111 :db/unique :db.unique/identity] + [:db/add 111 :db/index true] + [:db/add 111 :db/fulltext true] + [:db/add 222 :db/ident :test/other] + [:db/add 222 :db/cardinality :db.cardinality/one] + [:db/add 222 :db/valueType :db.type/string] + [:db/add 222 :db/index true] + [:db/add 222 :db/fulltext true]]"); + + // Let's check we actually have the schema characteristics we expect. + let fulltext = conn.schema.attribute_for_entid(111).cloned().expect(":test/fulltext"); + assert_eq!(fulltext.value_type, ValueType::String); + assert_eq!(fulltext.fulltext, true); + assert_eq!(fulltext.multival, false); + assert_eq!(fulltext.unique, Some(attribute::Unique::Identity)); + + let other = conn.schema.attribute_for_entid(222).cloned().expect(":test/other"); + assert_eq!(other.value_type, ValueType::String); + assert_eq!(other.fulltext, true); + assert_eq!(other.multival, false); + assert_eq!(other.unique, None); + + // We can add fulltext indexed datoms. + assert_transact!(conn, "[[:db/add 301 :test/fulltext \"test this\"]]"); + // value column is rowid into fulltext table. + assert_matches!(conn.fulltext_values(), + "[[1 \"test this\"]]"); + assert_matches!(conn.last_transaction(), + "[[301 :test/fulltext 1 ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(conn.datoms(), + "[[111 :db/ident :test/fulltext] + [111 :db/valueType :db.type/string] + [111 :db/unique :db.unique/identity] + [111 :db/index true] + [111 :db/fulltext true] + [222 :db/ident :test/other] + [222 :db/valueType :db.type/string] + [222 :db/cardinality :db.cardinality/one] + [222 :db/index true] + [222 :db/fulltext true] + [301 :test/fulltext 1]]"); + + // We can replace existing fulltext indexed datoms. + assert_transact!(conn, "[[:db/add 301 :test/fulltext \"alternate thing\"]]"); + // value column is rowid into fulltext table. + assert_matches!(conn.fulltext_values(), + "[[1 \"test this\"] + [2 \"alternate thing\"]]"); + assert_matches!(conn.last_transaction(), + "[[301 :test/fulltext 1 ?tx false] + [301 :test/fulltext 2 ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(conn.datoms(), + "[[111 :db/ident :test/fulltext] + [111 :db/valueType :db.type/string] + [111 :db/unique :db.unique/identity] + [111 :db/index true] + [111 :db/fulltext true] + [222 :db/ident :test/other] + [222 :db/valueType :db.type/string] + [222 :db/cardinality :db.cardinality/one] + [222 :db/index true] + [222 :db/fulltext true] + [301 :test/fulltext 2]]"); + + // We can upsert keyed by fulltext indexed datoms. + assert_transact!(conn, "[[:db/add \"t\" :test/fulltext \"alternate thing\"] + [:db/add \"t\" :test/other \"other\"]]"); + // value column is rowid into fulltext table. + assert_matches!(conn.fulltext_values(), + "[[1 \"test this\"] + [2 \"alternate thing\"] + [3 \"other\"]]"); + assert_matches!(conn.last_transaction(), + "[[301 :test/other 3 ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(conn.datoms(), + "[[111 :db/ident :test/fulltext] + [111 :db/valueType :db.type/string] + [111 :db/unique :db.unique/identity] + [111 :db/index true] + [111 :db/fulltext true] + [222 :db/ident :test/other] + [222 :db/valueType :db.type/string] + [222 :db/cardinality :db.cardinality/one] + [222 :db/index true] + [222 :db/fulltext true] + [301 :test/fulltext 2] + [301 :test/other 3]]"); + + // We can re-use fulltext values; they won't be added to the fulltext values table twice. + assert_transact!(conn, "[[:db/add 302 :test/other \"alternate thing\"]]"); + // value column is rowid into fulltext table. + assert_matches!(conn.fulltext_values(), + "[[1 \"test this\"] + [2 \"alternate thing\"] + [3 \"other\"]]"); + assert_matches!(conn.last_transaction(), + "[[302 :test/other 2 ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(conn.datoms(), + "[[111 :db/ident :test/fulltext] + [111 :db/valueType :db.type/string] + [111 :db/unique :db.unique/identity] + [111 :db/index true] + [111 :db/fulltext true] + [222 :db/ident :test/other] + [222 :db/valueType :db.type/string] + [222 :db/cardinality :db.cardinality/one] + [222 :db/index true] + [222 :db/fulltext true] + [301 :test/fulltext 2] + [301 :test/other 3] + [302 :test/other 2]]"); + + // We can retract fulltext indexed datoms. The underlying fulltext value remains -- indeed, + // it might still be in use. + assert_transact!(conn, "[[:db/retract 302 :test/other \"alternate thing\"]]"); + // value column is rowid into fulltext table. + assert_matches!(conn.fulltext_values(), + "[[1 \"test this\"] + [2 \"alternate thing\"] + [3 \"other\"]]"); + assert_matches!(conn.last_transaction(), + "[[302 :test/other 2 ?tx false] + [?tx :db/txInstant ?ms ?tx true]]"); + assert_matches!(conn.datoms(), + "[[111 :db/ident :test/fulltext] + [111 :db/valueType :db.type/string] + [111 :db/unique :db.unique/identity] + [111 :db/index true] + [111 :db/fulltext true] + [222 :db/ident :test/other] + [222 :db/valueType :db.type/string] + [222 :db/cardinality :db.cardinality/one] + [222 :db/index true] + [222 :db/fulltext true] + [301 :test/fulltext 2] + [301 :test/other 3]]"); + } } diff --git a/db/src/debug.rs b/db/src/debug.rs index d2bfee69..2db3d54e 100644 --- a/db/src/debug.rs +++ b/db/src/debug.rs @@ -21,13 +21,20 @@ use rusqlite::types::{ToSql}; use tabwriter::TabWriter; use bootstrap; +use db::TypedSQLValue; use edn; use entids; -use mentat_core::TypedValue; -use mentat_tx::entities::{Entid}; -use db::TypedSQLValue; -use types::Schema; use errors::Result; +use mentat_core::{ + SQLValueType, + TypedValue, + ValueType, +}; +use mentat_tx::entities::{Entid}; +use schema::{ + SchemaBuilding, +}; +use types::Schema; /// Represents a *datom* (assertion) in the store. #[derive(Clone,Debug,Eq,Hash,Ord,PartialOrd,PartialEq)] @@ -55,6 +62,9 @@ pub struct Datoms(pub Vec); /// retracted assertions appear before added assertions. pub struct Transactions(pub Vec); +/// Represents the fulltext values in the store. +pub struct FulltextValues(pub Vec<(i64, String)>); + impl Datom { pub fn into_edn(&self) -> edn::Value { let f = |entid: &Entid| -> edn::Value { @@ -86,6 +96,12 @@ impl Transactions { } } +impl FulltextValues { + pub fn into_edn(&self) -> edn::Value { + edn::Value::Vector((&self.0).into_iter().map(|&(x, ref y)| edn::Value::Vector(vec![edn::Value::Integer(x), edn::Value::Text(y.clone())])).collect()) + } +} + /// Turn TypedValue::Ref into TypedValue::Keyword when it is possible. trait ToIdent { fn map_ident(self, schema: &Schema) -> Self; @@ -132,6 +148,9 @@ pub fn datoms_after>(conn: &rusqlite::Connection, schema: &S, let v: rusqlite::types::Value = row.get_checked(2)?; let value_type_tag: i32 = row.get_checked(3)?; + let attribute = borrowed_schema.require_attribute_for_entid(a)?; + let value_type_tag = if !attribute.fulltext { value_type_tag } else { ValueType::Long.value_type_tag() }; + let typed_value = TypedValue::from_sql_value_pair(v, value_type_tag)?.map_ident(borrowed_schema); let (value, _) = typed_value.to_edn_value_pair(); @@ -165,6 +184,9 @@ pub fn transactions_after>(conn: &rusqlite::Connection, schema let v: rusqlite::types::Value = row.get_checked(2)?; let value_type_tag: i32 = row.get_checked(3)?; + let attribute = borrowed_schema.require_attribute_for_entid(a)?; + let value_type_tag = if !attribute.fulltext { value_type_tag } else { ValueType::Long.value_type_tag() }; + let typed_value = TypedValue::from_sql_value_pair(v, value_type_tag)?.map_ident(borrowed_schema); let (value, _) = typed_value.to_edn_value_pair(); @@ -185,6 +207,19 @@ pub fn transactions_after>(conn: &rusqlite::Connection, schema Ok(Transactions(r)) } +/// Return the set of fulltext values in the store, ordered by rowid. +pub fn fulltext_values(conn: &rusqlite::Connection) -> Result { + let mut stmt: rusqlite::Statement = conn.prepare("SELECT rowid, text FROM fulltext_values ORDER BY rowid")?; + + let r: Result> = stmt.query_and_then(&[], |row| { + let rowid: i64 = row.get_checked(0)?; + let text: String = row.get_checked(1)?; + Ok((rowid, text)) + })?.collect(); + + r.map(FulltextValues) +} + /// Execute the given `sql` query with the given `params` and format the results as a /// tab-and-newline formatted string suitable for debug printing. /// diff --git a/db/src/tx.rs b/db/src/tx.rs index 162b3b3c..f2b811c0 100644 --- a/db/src/tx.rs +++ b/db/src/tx.rs @@ -330,16 +330,23 @@ impl<'conn, 'a> Tx<'conn, 'a> { // store. let mut tx_might_update_metadata = false; -{ + let final_terms: Vec = [final_populations.resolved, + final_populations.allocated, + inert_terms.into_iter().map(|term| term.unwrap()).collect()].concat(); + + { // TODO: Don't use this block to scope borrowing the schema; instead, extract a helper function. + /// Assertions that are :db.cardinality/one and not :db.fulltext. let mut non_fts_one: Vec = vec![]; /// Assertions that are :db.cardinality/many and not :db.fulltext. let mut non_fts_many: Vec = vec![]; - let final_terms: Vec = [final_populations.resolved, - final_populations.allocated, - inert_terms.into_iter().map(|term| term.unwrap()).collect()].concat(); + /// Assertions that are :db.cardinality/one and :db.fulltext. + let mut fts_one: Vec = vec![]; + + /// Assertions that are :db.cardinality/many and :db.fulltext. + let mut fts_many: Vec = vec![]; // Pipeline stage 4: final terms (after rewriting) -> DB insertions. // Collect into non_fts_*. @@ -348,19 +355,17 @@ impl<'conn, 'a> Tx<'conn, 'a> { match term { Term::AddOrRetract(op, e, a, v) => { let attribute: &Attribute = self.schema.require_attribute_for_entid(a)?; - if attribute.fulltext { - bail!(ErrorKind::NotYetImplemented(format!("Transacting :db/fulltext entities is not yet implemented"))) // TODO: reference original input. Difficult! - } - if entids::might_update_metadata(a) { tx_might_update_metadata = true; } let added = op == OpType::Add; - if attribute.multival { - non_fts_many.push((e, a, attribute, v, added)); - } else { - non_fts_one.push((e, a, attribute, v, added)); + let reduced = (e, a, attribute, v, added); + match (attribute.fulltext, attribute.multival) { + (false, true) => non_fts_many.push(reduced), + (false, false) => non_fts_one.push(reduced), + (true, false) => fts_one.push(reduced), + (true, true) => fts_many.push(reduced), } }, } @@ -382,6 +387,14 @@ impl<'conn, 'a> Tx<'conn, 'a> { self.store.insert_non_fts_searches(&non_fts_many[..], db::SearchType::Exact)?; } + if !fts_one.is_empty() { + self.store.insert_fts_searches(&fts_one[..], db::SearchType::Inexact)?; + } + + if !fts_many.is_empty() { + self.store.insert_fts_searches(&fts_many[..], db::SearchType::Exact)?; + } + self.store.commit_transaction(self.tx_id)?; }