Allow two datoms in the same transaction to have the same fulltext string. (#565) r=emily
This commit is contained in:
parent
ae91603bd0
commit
df3cdb5db6
1 changed files with 44 additions and 17 deletions
55
db/src/db.rs
55
db/src/db.rs
|
@ -12,6 +12,9 @@
|
||||||
|
|
||||||
use std::borrow::Borrow;
|
use std::borrow::Borrow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::collections::hash_map::{
|
||||||
|
Entry,
|
||||||
|
};
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::iter::{once, repeat};
|
use std::iter::{once, repeat};
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
@ -863,44 +866,68 @@ impl MentatStoring for rusqlite::Connection {
|
||||||
|
|
||||||
let chunks: itertools::IntoChunks<_> = entities.into_iter().chunks(max_vars / bindings_per_statement);
|
let chunks: itertools::IntoChunks<_> = entities.into_iter().chunks(max_vars / bindings_per_statement);
|
||||||
|
|
||||||
|
// From string to (searchid, value_type_tag).
|
||||||
|
let mut seen: HashMap<Rc<String>, (i64, i32)> = HashMap::with_capacity(entities.len());
|
||||||
|
|
||||||
// We'd like to flat_map here, but it's not obvious how to flat_map across Result.
|
// We'd like to flat_map here, but it's not obvious how to flat_map across Result.
|
||||||
let results: Result<Vec<()>> = chunks.into_iter().map(|chunk| -> Result<()> {
|
let results: Result<Vec<()>> = chunks.into_iter().map(|chunk| -> Result<()> {
|
||||||
let mut count = 0;
|
let mut datom_count = 0;
|
||||||
|
let mut string_count = 0;
|
||||||
|
|
||||||
// We must keep these computed values somewhere to reference them later, so we can't
|
// We must keep these computed values somewhere to reference them later, so we can't
|
||||||
// combine this map and the subsequent flat_map.
|
// combine this map and the subsequent flat_map.
|
||||||
// (e0, a0, v0, value_type_tag0, added0, flags0)
|
// (e0, a0, v0, value_type_tag0, added0, flags0)
|
||||||
let block: Result<Vec<(i64 /* e */,
|
let block: Result<Vec<(i64 /* e */,
|
||||||
i64 /* a */,
|
i64 /* a */,
|
||||||
ToSqlOutput<'a> /* value */,
|
Option<ToSqlOutput<'a>> /* value */,
|
||||||
i32 /* value_type_tag */,
|
i32 /* value_type_tag */,
|
||||||
bool /* added0 */,
|
bool /* added0 */,
|
||||||
u8 /* flags0 */,
|
u8 /* flags0 */,
|
||||||
i64 /* searchid */)>> = chunk.map(|&(e, a, ref attribute, ref typed_value, added)| {
|
i64 /* searchid */)>> = chunk.map(|&(e, a, ref attribute, ref typed_value, added)| {
|
||||||
if typed_value.value_type() != ValueType::String {
|
match typed_value {
|
||||||
bail!("Cannot transact a fulltext assertion with a typed value that is not :db/valueType :db.type/string");
|
&TypedValue::String(ref rc) => {
|
||||||
}
|
datom_count += 1;
|
||||||
|
let entry = seen.entry(rc.clone());
|
||||||
count += 1;
|
match entry {
|
||||||
|
Entry::Occupied(entry) => {
|
||||||
|
let &(searchid, value_type_tag) = entry.get();
|
||||||
|
Ok((e, a, None, value_type_tag, added, attribute.flags(), searchid))
|
||||||
|
},
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
outer_searchid += 1;
|
outer_searchid += 1;
|
||||||
|
string_count += 1;
|
||||||
|
|
||||||
// Now we can represent the typed value as an SQL value.
|
// Now we can represent the typed value as an SQL value.
|
||||||
let (value, value_type_tag): (ToSqlOutput, i32) = typed_value.to_sql_value_pair();
|
let (value, value_type_tag): (ToSqlOutput, i32) = typed_value.to_sql_value_pair();
|
||||||
|
entry.insert((outer_searchid, value_type_tag));
|
||||||
|
|
||||||
|
Ok((e, a, Some(value), value_type_tag, added, attribute.flags(), outer_searchid))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
bail!("Cannot transact a fulltext assertion with a typed value that is not :db/valueType :db.type/string");
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Ok((e, a, value, value_type_tag, added, attribute.flags(), outer_searchid))
|
|
||||||
}).collect();
|
}).collect();
|
||||||
let block = block?;
|
let block = block?;
|
||||||
|
|
||||||
// First, insert all fulltext string values.
|
// First, insert all fulltext string values.
|
||||||
// `fts_params` reference computed values in `block`.
|
// `fts_params` reference computed values in `block`.
|
||||||
let fts_params: Vec<&ToSql> = block.iter().flat_map(|&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref searchid)| {
|
let fts_params: Vec<&ToSql> = block.iter()
|
||||||
|
.filter(|&&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref _searchid)| {
|
||||||
|
value.is_some()
|
||||||
|
})
|
||||||
|
.flat_map(|&(ref _e, ref _a, ref value, ref _value_type_tag, _added, ref _flags, ref searchid)| {
|
||||||
// Avoid inner heap allocation.
|
// Avoid inner heap allocation.
|
||||||
once(value as &ToSql)
|
once(value as &ToSql)
|
||||||
.chain(once(searchid as &ToSql))
|
.chain(once(searchid as &ToSql))
|
||||||
}).collect();
|
}).collect();
|
||||||
|
|
||||||
// TODO: make this maximally efficient. It's not terribly inefficient right now.
|
// TODO: make this maximally efficient. It's not terribly inefficient right now.
|
||||||
let fts_values: String = repeat_values(2, count);
|
let fts_values: String = repeat_values(2, string_count);
|
||||||
let fts_s: String = format!("INSERT INTO fulltext_values_view (text, searchid) VALUES {}", fts_values);
|
let fts_s: String = format!("INSERT INTO fulltext_values_view (text, searchid) VALUES {}", fts_values);
|
||||||
|
|
||||||
// TODO: consider ensuring we inserted the expected number of rows.
|
// TODO: consider ensuring we inserted the expected number of rows.
|
||||||
|
@ -923,10 +950,10 @@ impl MentatStoring for rusqlite::Connection {
|
||||||
}).collect();
|
}).collect();
|
||||||
|
|
||||||
// TODO: cache this for selected values of count.
|
// TODO: cache this for selected values of count.
|
||||||
assert!(bindings_per_statement * count < max_vars, "Too many values: {} * {} >= {}", bindings_per_statement, count, max_vars);
|
assert!(bindings_per_statement * datom_count < max_vars, "Too many values: {} * {} >= {}", bindings_per_statement, datom_count, max_vars);
|
||||||
let inner = "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?)".to_string();
|
let inner = "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?)".to_string();
|
||||||
// Like "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?), (?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?)".
|
// Like "(?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?), (?, ?, (SELECT rowid FROM fulltext_values WHERE searchid = ?), ?, ?, ?)".
|
||||||
let fts_values: String = repeat(inner).take(count).join(", ");
|
let fts_values: String = repeat(inner).take(datom_count).join(", ");
|
||||||
let s: String = if search_type == SearchType::Exact {
|
let s: String = if search_type == SearchType::Exact {
|
||||||
format!("INSERT INTO temp.exact_searches (e0, a0, v0, value_type_tag0, added0, flags0) VALUES {}", fts_values)
|
format!("INSERT INTO temp.exact_searches (e0, a0, v0, value_type_tag0, added0, flags0) VALUES {}", fts_values)
|
||||||
} else {
|
} else {
|
||||||
|
@ -937,14 +964,14 @@ impl MentatStoring for rusqlite::Connection {
|
||||||
let mut stmt = self.prepare_cached(s.as_str())?;
|
let mut stmt = self.prepare_cached(s.as_str())?;
|
||||||
stmt.execute(¶ms)
|
stmt.execute(¶ms)
|
||||||
.map(|_c| ())
|
.map(|_c| ())
|
||||||
.chain_err(|| "Could not insert fts statements into temporary search table!")
|
.chain_err(|| "Could not insert FTS statements into temporary search table!")
|
||||||
}).collect::<Result<Vec<()>>>();
|
}).collect::<Result<Vec<()>>>();
|
||||||
|
|
||||||
// Finally, clean up temporary searchids.
|
// Finally, clean up temporary searchids.
|
||||||
let mut stmt = self.prepare_cached("UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL")?;
|
let mut stmt = self.prepare_cached("UPDATE fulltext_values SET searchid = NULL WHERE searchid IS NOT NULL")?;
|
||||||
stmt.execute(&[])
|
stmt.execute(&[])
|
||||||
.map(|_c| ())
|
.map(|_c| ())
|
||||||
.chain_err(|| "Could not drop fts search ids!")?;
|
.chain_err(|| "Could not drop FTS search ids!")?;
|
||||||
|
|
||||||
results.map(|_| ())
|
results.map(|_| ())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue