Don't allow callers to specify arbitrary new entity IDs. (#447) r=nalexander

This commit adds a check to the partition map that a provided entity ID
has been mentioned (i.e., is present in the start:index range of one of
our partitions).

We introduce a newtype for known entity IDs, using this internally in
the tx expander to track user-provided entids that have passed the above
check (and IDs that we allocate as part of tempid processing). This
newtype is stripped prior to tx assertion.

In order that DB tests can continue to write

  [:db/add 111 :foo/bar 222]

we add an additional fake partition to our test connections, ranging
from 100 to 1000.
This commit is contained in:
Richard Newman 2017-05-07 20:00:04 -07:00 committed by Richard Newman
parent 5c5818069f
commit 9a12ced317
7 changed files with 139 additions and 36 deletions

View file

@ -1064,6 +1064,7 @@ SELECT EXISTS
pub trait PartitionMapping {
fn allocate_entid<S: ?Sized + Ord + Display>(&mut self, partition: &S) -> i64 where String: Borrow<S>;
fn allocate_entids<S: ?Sized + Ord + Display>(&mut self, partition: &S, n: usize) -> Range<i64> where String: Borrow<S>;
fn contains_entid(&self, entid: Entid) -> bool;
}
impl PartitionMapping for PartitionMap {
@ -1084,6 +1085,10 @@ impl PartitionMapping for PartitionMap {
None => panic!("Cannot allocate entid from unknown partition: {}", partition),
}
}
fn contains_entid(&self, entid: Entid) -> bool {
self.values().any(|partition| partition.contains_entid(entid))
}
}
#[cfg(test)]
@ -1208,10 +1213,20 @@ mod tests {
assert_eq!(transactions.0.len(), 1);
assert_eq!(transactions.0[0].0.len(), 77);
let mut parts = db.partition_map;
// Add a fake partition to allow tests to do things like
// [:db/add 111 :foo/bar 222]
{
let fake_partition = Partition { start: 100, index: 1000 };
parts.insert(":db.part/fake".into(), fake_partition);
}
let test_conn = TestConn {
sqlite: conn,
partition_map: db.partition_map,
schema: db.schema };
partition_map: parts,
schema: db.schema,
};
// Verify that we've created the materialized views during bootstrapping.
test_conn.assert_materialized_views();

View file

@ -76,9 +76,11 @@ error_chain! {
}
/// An entid->ident mapping failed.
/// We also use this error if you try to transact an entid that we didn't allocate,
/// in part because we blow the stack in error_chain if we define a new enum!
UnrecognizedEntid(entid: Entid) {
description("no ident found for entid")
display("no ident found for entid: {}", entid)
description("unrecognized or no ident found for entid")
display("unrecognized or no ident found for entid: {}", entid)
}
}
}

View file

@ -62,11 +62,15 @@ impl<L, R> Either<L, R> {
use self::Either::*;
pub type EntidOr<T> = Either<Entid, T>;
/// An entid that's either already in the store, or newly allocated to a tempid.
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)]
pub struct KnownEntid(pub Entid);
pub type KnownEntidOr<T> = Either<KnownEntid, T>;
pub type TypedValueOr<T> = Either<TypedValue, T>;
pub type TempIdHandle = Rc<TempId>;
pub type TempIdMap = HashMap<TempIdHandle, Entid>;
pub type TempIdMap = HashMap<TempIdHandle, KnownEntid>;
pub type LookupRef = Rc<AVPair>;
@ -79,9 +83,9 @@ pub enum LookupRefOrTempId {
TempId(TempIdHandle)
}
pub type TermWithTempIdsAndLookupRefs = Term<EntidOr<LookupRefOrTempId>, TypedValueOr<LookupRefOrTempId>>;
pub type TermWithTempIds = Term<EntidOr<TempIdHandle>, TypedValueOr<TempIdHandle>>;
pub type TermWithoutTempIds = Term<Entid, TypedValue>;
pub type TermWithTempIdsAndLookupRefs = Term<KnownEntidOr<LookupRefOrTempId>, TypedValueOr<LookupRefOrTempId>>;
pub type TermWithTempIds = Term<KnownEntidOr<TempIdHandle>, TypedValueOr<TempIdHandle>>;
pub type TermWithoutTempIds = Term<KnownEntid, TypedValue>;
pub type Population = Vec<TermWithTempIds>;
impl TermWithTempIds {
@ -96,7 +100,7 @@ impl TermWithTempIds {
}
}
/// Given an `EntidOr` or a `TypedValueOr`, replace any internal `LookupRef` with the entid from
/// Given a `KnownEntidOr` or a `TypedValueOr`, replace any internal `LookupRef` with the entid from
/// the given map. Fail if any `LookupRef` cannot be replaced.
///
/// `lift` allows to specify how the entid found is mapped into the output type. (This could

View file

@ -58,11 +58,15 @@ use db::{
MentatStoring,
PartitionMapping,
};
use edn::{
NamespacedKeyword,
};
use entids;
use errors::{ErrorKind, Result};
use internal_types::{
Either,
EntidOr,
KnownEntid,
KnownEntidOr,
LookupRef,
LookupRefOrTempId,
TempIdHandle,
@ -175,13 +179,13 @@ impl<'conn, 'a> Tx<'conn, 'a> {
let mut temp_id_map: TempIdMap = TempIdMap::default();
for &(ref temp_id, ref av_pair) in temp_id_avs {
if let Some(n) = av_map.get(&av_pair) {
if let Some(previous_n) = temp_id_map.get(&*temp_id) {
if n != previous_n {
if let Some(&KnownEntid(previous_n)) = temp_id_map.get(&*temp_id) {
if *n != previous_n {
// Conflicting upsert! TODO: collect conflicts and give more details on what failed this transaction.
bail!(ErrorKind::NotYetImplemented(format!("Conflicting upsert: tempid '{}' resolves to more than one entid: {:?}, {:?}", temp_id, previous_n, n))) // XXX
}
}
temp_id_map.insert(temp_id.clone(), *n);
temp_id_map.insert(temp_id.clone(), KnownEntid(*n));
}
}
@ -195,6 +199,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
/// interned handle sets so that consumers can ensure all handles are used appropriately.
fn entities_into_terms_with_temp_ids_and_lookup_refs<I>(&self, entities: I) -> Result<(Vec<TermWithTempIdsAndLookupRefs>, intern_set::InternSet<TempId>, intern_set::InternSet<AVPair>)> where I: IntoIterator<Item=Entity> {
struct InProcess<'a> {
partition_map: &'a PartitionMap,
schema: &'a Schema,
mentat_id_count: i64,
temp_ids: intern_set::InternSet<TempId>,
@ -202,15 +207,29 @@ impl<'conn, 'a> Tx<'conn, 'a> {
}
impl<'a> InProcess<'a> {
fn with_schema(schema: &'a Schema) -> InProcess<'a> {
fn with_schema_and_partition_map(schema: &'a Schema, partition_map: &'a PartitionMap) -> InProcess<'a> {
InProcess {
schema: schema,
partition_map,
schema,
mentat_id_count: 0,
temp_ids: intern_set::InternSet::new(),
lookup_refs: intern_set::InternSet::new(),
}
}
fn ensure_entid_exists(&self, e: Entid) -> Result<KnownEntid> {
if self.partition_map.contains_entid(e) {
Ok(KnownEntid(e))
} else {
bail!(ErrorKind::UnrecognizedEntid(e))
}
}
fn ensure_ident_exists(&self, e: &NamespacedKeyword) -> Result<KnownEntid> {
let entid = self.schema.require_entid(e)?;
Ok(KnownEntid(entid))
}
fn intern_lookup_ref(&mut self, lookup_ref: &entmod::LookupRef) -> Result<LookupRef> {
let lr_a: i64 = match lookup_ref.a {
entmod::Entid::Entid(ref a) => *a,
@ -237,12 +256,12 @@ impl<'conn, 'a> Tx<'conn, 'a> {
entmod::EntidOrLookupRefOrTempId::TempId(TempId::Internal(self.mentat_id_count))
}
fn entity_e_into_term_e(&mut self, x: entmod::EntidOrLookupRefOrTempId) -> Result<EntidOr<LookupRefOrTempId>> {
fn entity_e_into_term_e(&mut self, x: entmod::EntidOrLookupRefOrTempId) -> Result<KnownEntidOr<LookupRefOrTempId>> {
match x {
entmod::EntidOrLookupRefOrTempId::Entid(e) => {
let e: i64 = match e {
entmod::Entid::Entid(ref e) => *e,
entmod::Entid::Ident(ref e) => self.schema.require_entid(&e)?,
let e = match e {
entmod::Entid::Entid(ref e) => self.ensure_entid_exists(*e)?,
entmod::Entid::Ident(ref e) => self.ensure_ident_exists(&e)?,
};
Ok(Either::Left(e))
},
@ -266,10 +285,10 @@ impl<'conn, 'a> Tx<'conn, 'a> {
}
fn entity_e_into_term_v(&mut self, x: entmod::EntidOrLookupRefOrTempId) -> Result<TypedValueOr<LookupRefOrTempId>> {
self.entity_e_into_term_e(x).map(|r| r.map_left(TypedValue::Ref))
self.entity_e_into_term_e(x).map(|r| r.map_left(|ke| TypedValue::Ref(ke.0)))
}
fn entity_v_into_term_e(&mut self, x: entmod::AtomOrLookupRefOrVectorOrMapNotation, backward_a: &entmod::Entid) -> Result<EntidOr<LookupRefOrTempId>> {
fn entity_v_into_term_e(&mut self, x: entmod::AtomOrLookupRefOrVectorOrMapNotation, backward_a: &entmod::Entid) -> Result<KnownEntidOr<LookupRefOrTempId>> {
match backward_a.unreversed() {
None => {
bail!(ErrorKind::NotYetImplemented(format!("Cannot explode map notation value in :attr/_reversed notation for forward attribute")));
@ -290,7 +309,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
Ok(Either::Right(LookupRefOrTempId::TempId(self.intern_temp_id(TempId::External(text.clone())))))
} else {
if let TypedValue::Ref(entid) = self.schema.to_typed_value(&v.clone().without_spans(), ValueType::Ref)? {
Ok(Either::Left(entid))
Ok(Either::Left(KnownEntid(entid)))
} else {
// The given value is expected to be :db.type/ref, so this shouldn't happen.
bail!(ErrorKind::NotYetImplemented(format!("Cannot use :attr/_reversed notation for attribute {} with value that is not :db.valueType :db.type/ref", forward_a)))
@ -312,7 +331,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
}
}
let mut in_process = InProcess::with_schema(&self.schema);
let mut in_process = InProcess::with_schema_and_partition_map(&self.schema, &self.partition_map);
// We want to handle entities in the order they're given to us, while also "exploding" some
// entities into many. We therefore push the initial entities onto the back of the deque,
@ -475,7 +494,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
terms.into_iter().map(|term: TermWithTempIdsAndLookupRefs| -> Result<TermWithTempIds> {
match term {
Term::AddOrRetract(op, e, a, v) => {
let e = replace_lookup_ref(&lookup_ref_map, e, |x| x)?;
let e = replace_lookup_ref(&lookup_ref_map, e, |x| KnownEntid(x))?;
let v = replace_lookup_ref(&lookup_ref_map, v, |x| TypedValue::Ref(x))?;
Ok(Term::AddOrRetract(op, e, a, v))
},
@ -489,7 +508,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
// TODO: move this to the transactor layer.
pub fn transact_entities<I>(&mut self, entities: I) -> Result<TxReport> where I: IntoIterator<Item=Entity> {
// TODO: push these into an internal transaction report?
let mut tempids: BTreeMap<TempId, Entid> = BTreeMap::default();
let mut tempids: BTreeMap<TempId, KnownEntid> = BTreeMap::default();
// Pipeline stage 1: entities -> terms with tempids and lookup refs.
let (terms_with_temp_ids_and_lookup_refs, tempid_set, lookup_ref_set) = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?;
@ -533,7 +552,9 @@ impl<'conn, 'a> Tx<'conn, 'a> {
// TODO: track partitions for temporary IDs.
let entids = self.partition_map.allocate_entids(":db.part/user", unresolved_temp_ids.len());
let temp_id_allocations: TempIdMap = unresolved_temp_ids.into_iter().zip(entids).collect();
let temp_id_allocations: TempIdMap = unresolved_temp_ids.into_iter()
.zip(entids.map(|e| KnownEntid(e)))
.collect();
let final_populations = generation.into_final_populations(&temp_id_allocations)?;
@ -552,7 +573,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
// Any internal tempid has been allocated by the system and is a private implementation
// detail; it shouldn't be exposed in the final transaction report.
let tempids = tempids.into_iter().filter_map(|(tempid, e)| tempid.into_external().map(|s| (s, e))).collect();
let tempids = tempids.into_iter().filter_map(|(tempid, e)| tempid.into_external().map(|s| (s, e.0))).collect();
// A transaction might try to add or retract :db/ident assertions or other metadata mutating
// assertions , but those assertions might not make it to the store. If we see a possible
@ -579,6 +600,9 @@ impl<'conn, 'a> Tx<'conn, 'a> {
/// Assertions that are :db.cardinality/many and :db.fulltext.
let mut fts_many: Vec<db::ReducedEntity> = vec![];
// We need to ensure that callers can't blindly transact entities that haven't been
// allocated by this store.
// Pipeline stage 4: final terms (after rewriting) -> DB insertions.
// Collect into non_fts_*.
// TODO: use something like Clojure's group_by to do this.
@ -591,7 +615,7 @@ impl<'conn, 'a> Tx<'conn, 'a> {
}
let added = op == OpType::Add;
let reduced = (e, a, attribute, v, added);
let reduced = (e.0, a, attribute, v, added);
match (attribute.fulltext, attribute.multival) {
(false, true) => non_fts_many.push(reduced),
(false, false) => non_fts_one.push(reduced),

View file

@ -40,6 +40,10 @@ impl Partition {
assert!(start <= next, "A partition represents a monotonic increasing sequence of entids.");
Partition { start: start, index: next }
}
pub fn contains_entid(&self, e: i64) -> bool {
(e >= self.start) && (e < self.index)
}
}
/// Map partition names to `Partition` instances.

View file

@ -153,8 +153,8 @@ impl Generation {
for UpsertEV(t1, a, t2) in self.upserts_ev {
match (temp_id_map.get(&*t1), temp_id_map.get(&*t2)) {
(Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(OpType::Add, n1, a, TypedValue::Ref(n2))),
(None, Some(&n2)) => next.upserts_e.push(UpsertE(t1, a, TypedValue::Ref(n2))),
(Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(OpType::Add, n1, a, TypedValue::Ref(n2.0))),
(None, Some(&n2)) => next.upserts_e.push(UpsertE(t1, a, TypedValue::Ref(n2.0))),
(Some(&n1), None) => next.allocations.push(Term::AddOrRetract(OpType::Add, Left(n1), a, Right(t2))),
(None, None) => next.allocations.push(Term::AddOrRetract(OpType::Add, Right(t1), a, Right(t2))),
}
@ -168,8 +168,8 @@ impl Generation {
match term {
Term::AddOrRetract(op, Right(t1), a, Right(t2)) => {
match (temp_id_map.get(&*t1), temp_id_map.get(&*t2)) {
(Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2))),
(None, Some(&n2)) => next.allocations.push(Term::AddOrRetract(op, Right(t1), a, Left(TypedValue::Ref(n2)))),
(Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2.0))),
(None, Some(&n2)) => next.allocations.push(Term::AddOrRetract(op, Right(t1), a, Left(TypedValue::Ref(n2.0)))),
(Some(&n1), None) => next.allocations.push(Term::AddOrRetract(op, Left(n1), a, Right(t2))),
(None, None) => next.allocations.push(Term::AddOrRetract(op, Right(t1), a, Right(t2))),
}
@ -182,7 +182,7 @@ impl Generation {
},
Term::AddOrRetract(op, Left(e), a, Right(t)) => {
match temp_id_map.get(&*t) {
Some(&n) => next.resolved.push(Term::AddOrRetract(op, e, a, TypedValue::Ref(n))),
Some(&n) => next.resolved.push(Term::AddOrRetract(op, e, a, TypedValue::Ref(n.0))),
None => next.allocations.push(Term::AddOrRetract(op, Left(e), a, Right(t))),
}
},
@ -253,7 +253,7 @@ impl Generation {
// TODO: consider require implementing require on temp_id_map.
Term::AddOrRetract(op, Right(t1), a, Right(t2)) => {
match (op, temp_id_map.get(&*t1), temp_id_map.get(&*t2)) {
(op, Some(&n1), Some(&n2)) => Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2)),
(op, Some(&n1), Some(&n2)) => Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2.0)),
(OpType::Add, _, _) => unreachable!(), // This is a coding error -- every tempid in a :db/add entity should resolve or be allocated.
(OpType::Retract, _, _) => bail!(ErrorKind::NotYetImplemented(format!("[:db/retract ...] entity referenced tempid that did not upsert: one of {}, {}", t1, t2))),
}
@ -267,7 +267,7 @@ impl Generation {
},
Term::AddOrRetract(op, Left(e), a, Right(t)) => {
match (op, temp_id_map.get(&*t)) {
(op, Some(&n)) => Term::AddOrRetract(op, e, a, TypedValue::Ref(n)),
(op, Some(&n)) => Term::AddOrRetract(op, e, a, TypedValue::Ref(n.0)),
(OpType::Add, _) => unreachable!(), // This is a coding error.
(OpType::Retract, _) => bail!(ErrorKind::NotYetImplemented(format!("[:db/retract ...] entity referenced tempid that did not upsert: {}", t))),
}

View file

@ -178,6 +178,60 @@ mod tests {
extern crate mentat_parser_utils;
#[test]
fn test_transact_does_not_collide_existing_entids() {
let mut sqlite = db::new_connection("").unwrap();
let mut conn = Conn::connect(&mut sqlite).unwrap();
// Let's find out the next ID that'll be allocated. We're going to try to collide with it
// a bit later.
let next = conn.metadata.lock().expect("metadata")
.partition_map[":db.part/user"].index;
let t = format!("[[:db/add {} :db.schema/attribute \"tempid\"]]", next + 1);
match conn.transact(&mut sqlite, t.as_str()).unwrap_err() {
Error(ErrorKind::DbError(::mentat_db::errors::ErrorKind::UnrecognizedEntid(e)), _) => {
assert_eq!(e, next + 1);
},
x => panic!("expected transact error, got {:?}", x),
}
// Transact two more tempids.
let t = "[[:db/add \"one\" :db.schema/attribute \"more\"]]";
let report = conn.transact(&mut sqlite, t)
.expect("transact succeeded");
assert_eq!(report.tempids["more"], next);
assert_eq!(report.tempids["one"], next + 1);
}
#[test]
fn test_transact_does_not_collide_new_entids() {
let mut sqlite = db::new_connection("").unwrap();
let mut conn = Conn::connect(&mut sqlite).unwrap();
// Let's find out the next ID that'll be allocated. We're going to try to collide with it.
let next = conn.metadata.lock().expect("metadata").partition_map[":db.part/user"].index;
// If this were to be resolved, we'd get [:db/add 65537 :db.schema/attribute 65537], but
// we should reject this, because the first ID was provided by the user!
let t = format!("[[:db/add {} :db.schema/attribute \"tempid\"]]", next);
match conn.transact(&mut sqlite, t.as_str()).unwrap_err() {
Error(ErrorKind::DbError(::mentat_db::errors::ErrorKind::UnrecognizedEntid(e)), _) => {
// All this, despite this being the ID we were about to allocate!
assert_eq!(e, next);
},
x => panic!("expected transact error, got {:?}", x),
}
// And if we subsequently transact in a way that allocates one ID, we _will_ use that one.
// Note that `10` is a bootstrapped entid; we use it here as a known-good value.
let t = "[[:db/add 10 :db.schema/attribute \"temp\"]]";
let report = conn.transact(&mut sqlite, t)
.expect("transact succeeded");
assert_eq!(report.tempids["temp"], next);
}
#[test]
fn test_transact_errors() {
let mut sqlite = db::new_connection("").unwrap();