From 9a12ced31742d39610087092308d94559a08b57c Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Sun, 7 May 2017 20:00:04 -0700 Subject: [PATCH] Don't allow callers to specify arbitrary new entity IDs. (#447) r=nalexander This commit adds a check to the partition map that a provided entity ID has been mentioned (i.e., is present in the start:index range of one of our partitions). We introduce a newtype for known entity IDs, using this internally in the tx expander to track user-provided entids that have passed the above check (and IDs that we allocate as part of tempid processing). This newtype is stripped prior to tx assertion. In order that DB tests can continue to write [:db/add 111 :foo/bar 222] we add an additional fake partition to our test connections, ranging from 100 to 1000. --- db/src/db.rs | 19 ++++++++++-- db/src/errors.rs | 6 ++-- db/src/internal_types.rs | 16 ++++++---- db/src/tx.rs | 62 +++++++++++++++++++++++++------------ db/src/types.rs | 4 +++ db/src/upsert_resolution.rs | 14 ++++----- src/conn.rs | 54 ++++++++++++++++++++++++++++++++ 7 files changed, 139 insertions(+), 36 deletions(-) diff --git a/db/src/db.rs b/db/src/db.rs index 7df269ab..5397b717 100644 --- a/db/src/db.rs +++ b/db/src/db.rs @@ -1064,6 +1064,7 @@ SELECT EXISTS pub trait PartitionMapping { fn allocate_entid(&mut self, partition: &S) -> i64 where String: Borrow; fn allocate_entids(&mut self, partition: &S, n: usize) -> Range where String: Borrow; + fn contains_entid(&self, entid: Entid) -> bool; } impl PartitionMapping for PartitionMap { @@ -1084,6 +1085,10 @@ impl PartitionMapping for PartitionMap { None => panic!("Cannot allocate entid from unknown partition: {}", partition), } } + + fn contains_entid(&self, entid: Entid) -> bool { + self.values().any(|partition| partition.contains_entid(entid)) + } } #[cfg(test)] @@ -1208,10 +1213,20 @@ mod tests { assert_eq!(transactions.0.len(), 1); assert_eq!(transactions.0[0].0.len(), 77); + let mut parts = db.partition_map; + + // Add a fake partition to allow tests to do things like + // [:db/add 111 :foo/bar 222] + { + let fake_partition = Partition { start: 100, index: 1000 }; + parts.insert(":db.part/fake".into(), fake_partition); + } + let test_conn = TestConn { sqlite: conn, - partition_map: db.partition_map, - schema: db.schema }; + partition_map: parts, + schema: db.schema, + }; // Verify that we've created the materialized views during bootstrapping. test_conn.assert_materialized_views(); diff --git a/db/src/errors.rs b/db/src/errors.rs index f25709bc..62a263d0 100644 --- a/db/src/errors.rs +++ b/db/src/errors.rs @@ -76,9 +76,11 @@ error_chain! { } /// An entid->ident mapping failed. + /// We also use this error if you try to transact an entid that we didn't allocate, + /// in part because we blow the stack in error_chain if we define a new enum! UnrecognizedEntid(entid: Entid) { - description("no ident found for entid") - display("no ident found for entid: {}", entid) + description("unrecognized or no ident found for entid") + display("unrecognized or no ident found for entid: {}", entid) } } } diff --git a/db/src/internal_types.rs b/db/src/internal_types.rs index 724a1cf4..f97f1b0a 100644 --- a/db/src/internal_types.rs +++ b/db/src/internal_types.rs @@ -62,11 +62,15 @@ impl Either { use self::Either::*; -pub type EntidOr = Either; +/// An entid that's either already in the store, or newly allocated to a tempid. +#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Ord, PartialOrd)] +pub struct KnownEntid(pub Entid); + +pub type KnownEntidOr = Either; pub type TypedValueOr = Either; pub type TempIdHandle = Rc; -pub type TempIdMap = HashMap; +pub type TempIdMap = HashMap; pub type LookupRef = Rc; @@ -79,9 +83,9 @@ pub enum LookupRefOrTempId { TempId(TempIdHandle) } -pub type TermWithTempIdsAndLookupRefs = Term, TypedValueOr>; -pub type TermWithTempIds = Term, TypedValueOr>; -pub type TermWithoutTempIds = Term; +pub type TermWithTempIdsAndLookupRefs = Term, TypedValueOr>; +pub type TermWithTempIds = Term, TypedValueOr>; +pub type TermWithoutTempIds = Term; pub type Population = Vec; impl TermWithTempIds { @@ -96,7 +100,7 @@ impl TermWithTempIds { } } -/// Given an `EntidOr` or a `TypedValueOr`, replace any internal `LookupRef` with the entid from +/// Given a `KnownEntidOr` or a `TypedValueOr`, replace any internal `LookupRef` with the entid from /// the given map. Fail if any `LookupRef` cannot be replaced. /// /// `lift` allows to specify how the entid found is mapped into the output type. (This could diff --git a/db/src/tx.rs b/db/src/tx.rs index 1f6fc37c..9ad0f496 100644 --- a/db/src/tx.rs +++ b/db/src/tx.rs @@ -58,11 +58,15 @@ use db::{ MentatStoring, PartitionMapping, }; +use edn::{ + NamespacedKeyword, +}; use entids; use errors::{ErrorKind, Result}; use internal_types::{ Either, - EntidOr, + KnownEntid, + KnownEntidOr, LookupRef, LookupRefOrTempId, TempIdHandle, @@ -175,13 +179,13 @@ impl<'conn, 'a> Tx<'conn, 'a> { let mut temp_id_map: TempIdMap = TempIdMap::default(); for &(ref temp_id, ref av_pair) in temp_id_avs { if let Some(n) = av_map.get(&av_pair) { - if let Some(previous_n) = temp_id_map.get(&*temp_id) { - if n != previous_n { + if let Some(&KnownEntid(previous_n)) = temp_id_map.get(&*temp_id) { + if *n != previous_n { // Conflicting upsert! TODO: collect conflicts and give more details on what failed this transaction. bail!(ErrorKind::NotYetImplemented(format!("Conflicting upsert: tempid '{}' resolves to more than one entid: {:?}, {:?}", temp_id, previous_n, n))) // XXX } } - temp_id_map.insert(temp_id.clone(), *n); + temp_id_map.insert(temp_id.clone(), KnownEntid(*n)); } } @@ -195,6 +199,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { /// interned handle sets so that consumers can ensure all handles are used appropriately. fn entities_into_terms_with_temp_ids_and_lookup_refs(&self, entities: I) -> Result<(Vec, intern_set::InternSet, intern_set::InternSet)> where I: IntoIterator { struct InProcess<'a> { + partition_map: &'a PartitionMap, schema: &'a Schema, mentat_id_count: i64, temp_ids: intern_set::InternSet, @@ -202,15 +207,29 @@ impl<'conn, 'a> Tx<'conn, 'a> { } impl<'a> InProcess<'a> { - fn with_schema(schema: &'a Schema) -> InProcess<'a> { + fn with_schema_and_partition_map(schema: &'a Schema, partition_map: &'a PartitionMap) -> InProcess<'a> { InProcess { - schema: schema, + partition_map, + schema, mentat_id_count: 0, temp_ids: intern_set::InternSet::new(), lookup_refs: intern_set::InternSet::new(), } } + fn ensure_entid_exists(&self, e: Entid) -> Result { + if self.partition_map.contains_entid(e) { + Ok(KnownEntid(e)) + } else { + bail!(ErrorKind::UnrecognizedEntid(e)) + } + } + + fn ensure_ident_exists(&self, e: &NamespacedKeyword) -> Result { + let entid = self.schema.require_entid(e)?; + Ok(KnownEntid(entid)) + } + fn intern_lookup_ref(&mut self, lookup_ref: &entmod::LookupRef) -> Result { let lr_a: i64 = match lookup_ref.a { entmod::Entid::Entid(ref a) => *a, @@ -237,12 +256,12 @@ impl<'conn, 'a> Tx<'conn, 'a> { entmod::EntidOrLookupRefOrTempId::TempId(TempId::Internal(self.mentat_id_count)) } - fn entity_e_into_term_e(&mut self, x: entmod::EntidOrLookupRefOrTempId) -> Result> { + fn entity_e_into_term_e(&mut self, x: entmod::EntidOrLookupRefOrTempId) -> Result> { match x { entmod::EntidOrLookupRefOrTempId::Entid(e) => { - let e: i64 = match e { - entmod::Entid::Entid(ref e) => *e, - entmod::Entid::Ident(ref e) => self.schema.require_entid(&e)?, + let e = match e { + entmod::Entid::Entid(ref e) => self.ensure_entid_exists(*e)?, + entmod::Entid::Ident(ref e) => self.ensure_ident_exists(&e)?, }; Ok(Either::Left(e)) }, @@ -266,10 +285,10 @@ impl<'conn, 'a> Tx<'conn, 'a> { } fn entity_e_into_term_v(&mut self, x: entmod::EntidOrLookupRefOrTempId) -> Result> { - self.entity_e_into_term_e(x).map(|r| r.map_left(TypedValue::Ref)) + self.entity_e_into_term_e(x).map(|r| r.map_left(|ke| TypedValue::Ref(ke.0))) } - fn entity_v_into_term_e(&mut self, x: entmod::AtomOrLookupRefOrVectorOrMapNotation, backward_a: &entmod::Entid) -> Result> { + fn entity_v_into_term_e(&mut self, x: entmod::AtomOrLookupRefOrVectorOrMapNotation, backward_a: &entmod::Entid) -> Result> { match backward_a.unreversed() { None => { bail!(ErrorKind::NotYetImplemented(format!("Cannot explode map notation value in :attr/_reversed notation for forward attribute"))); @@ -290,7 +309,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { Ok(Either::Right(LookupRefOrTempId::TempId(self.intern_temp_id(TempId::External(text.clone()))))) } else { if let TypedValue::Ref(entid) = self.schema.to_typed_value(&v.clone().without_spans(), ValueType::Ref)? { - Ok(Either::Left(entid)) + Ok(Either::Left(KnownEntid(entid))) } else { // The given value is expected to be :db.type/ref, so this shouldn't happen. bail!(ErrorKind::NotYetImplemented(format!("Cannot use :attr/_reversed notation for attribute {} with value that is not :db.valueType :db.type/ref", forward_a))) @@ -312,7 +331,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { } } - let mut in_process = InProcess::with_schema(&self.schema); + let mut in_process = InProcess::with_schema_and_partition_map(&self.schema, &self.partition_map); // We want to handle entities in the order they're given to us, while also "exploding" some // entities into many. We therefore push the initial entities onto the back of the deque, @@ -475,7 +494,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { terms.into_iter().map(|term: TermWithTempIdsAndLookupRefs| -> Result { match term { Term::AddOrRetract(op, e, a, v) => { - let e = replace_lookup_ref(&lookup_ref_map, e, |x| x)?; + let e = replace_lookup_ref(&lookup_ref_map, e, |x| KnownEntid(x))?; let v = replace_lookup_ref(&lookup_ref_map, v, |x| TypedValue::Ref(x))?; Ok(Term::AddOrRetract(op, e, a, v)) }, @@ -489,7 +508,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { // TODO: move this to the transactor layer. pub fn transact_entities(&mut self, entities: I) -> Result where I: IntoIterator { // TODO: push these into an internal transaction report? - let mut tempids: BTreeMap = BTreeMap::default(); + let mut tempids: BTreeMap = BTreeMap::default(); // Pipeline stage 1: entities -> terms with tempids and lookup refs. let (terms_with_temp_ids_and_lookup_refs, tempid_set, lookup_ref_set) = self.entities_into_terms_with_temp_ids_and_lookup_refs(entities)?; @@ -533,7 +552,9 @@ impl<'conn, 'a> Tx<'conn, 'a> { // TODO: track partitions for temporary IDs. let entids = self.partition_map.allocate_entids(":db.part/user", unresolved_temp_ids.len()); - let temp_id_allocations: TempIdMap = unresolved_temp_ids.into_iter().zip(entids).collect(); + let temp_id_allocations: TempIdMap = unresolved_temp_ids.into_iter() + .zip(entids.map(|e| KnownEntid(e))) + .collect(); let final_populations = generation.into_final_populations(&temp_id_allocations)?; @@ -552,7 +573,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { // Any internal tempid has been allocated by the system and is a private implementation // detail; it shouldn't be exposed in the final transaction report. - let tempids = tempids.into_iter().filter_map(|(tempid, e)| tempid.into_external().map(|s| (s, e))).collect(); + let tempids = tempids.into_iter().filter_map(|(tempid, e)| tempid.into_external().map(|s| (s, e.0))).collect(); // A transaction might try to add or retract :db/ident assertions or other metadata mutating // assertions , but those assertions might not make it to the store. If we see a possible @@ -579,6 +600,9 @@ impl<'conn, 'a> Tx<'conn, 'a> { /// Assertions that are :db.cardinality/many and :db.fulltext. let mut fts_many: Vec = vec![]; + // We need to ensure that callers can't blindly transact entities that haven't been + // allocated by this store. + // Pipeline stage 4: final terms (after rewriting) -> DB insertions. // Collect into non_fts_*. // TODO: use something like Clojure's group_by to do this. @@ -591,7 +615,7 @@ impl<'conn, 'a> Tx<'conn, 'a> { } let added = op == OpType::Add; - let reduced = (e, a, attribute, v, added); + let reduced = (e.0, a, attribute, v, added); match (attribute.fulltext, attribute.multival) { (false, true) => non_fts_many.push(reduced), (false, false) => non_fts_one.push(reduced), diff --git a/db/src/types.rs b/db/src/types.rs index ab6dfd8f..6ede0acd 100644 --- a/db/src/types.rs +++ b/db/src/types.rs @@ -40,6 +40,10 @@ impl Partition { assert!(start <= next, "A partition represents a monotonic increasing sequence of entids."); Partition { start: start, index: next } } + + pub fn contains_entid(&self, e: i64) -> bool { + (e >= self.start) && (e < self.index) + } } /// Map partition names to `Partition` instances. diff --git a/db/src/upsert_resolution.rs b/db/src/upsert_resolution.rs index 2f4736d2..03cbf971 100644 --- a/db/src/upsert_resolution.rs +++ b/db/src/upsert_resolution.rs @@ -153,8 +153,8 @@ impl Generation { for UpsertEV(t1, a, t2) in self.upserts_ev { match (temp_id_map.get(&*t1), temp_id_map.get(&*t2)) { - (Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(OpType::Add, n1, a, TypedValue::Ref(n2))), - (None, Some(&n2)) => next.upserts_e.push(UpsertE(t1, a, TypedValue::Ref(n2))), + (Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(OpType::Add, n1, a, TypedValue::Ref(n2.0))), + (None, Some(&n2)) => next.upserts_e.push(UpsertE(t1, a, TypedValue::Ref(n2.0))), (Some(&n1), None) => next.allocations.push(Term::AddOrRetract(OpType::Add, Left(n1), a, Right(t2))), (None, None) => next.allocations.push(Term::AddOrRetract(OpType::Add, Right(t1), a, Right(t2))), } @@ -168,8 +168,8 @@ impl Generation { match term { Term::AddOrRetract(op, Right(t1), a, Right(t2)) => { match (temp_id_map.get(&*t1), temp_id_map.get(&*t2)) { - (Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2))), - (None, Some(&n2)) => next.allocations.push(Term::AddOrRetract(op, Right(t1), a, Left(TypedValue::Ref(n2)))), + (Some(&n1), Some(&n2)) => next.resolved.push(Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2.0))), + (None, Some(&n2)) => next.allocations.push(Term::AddOrRetract(op, Right(t1), a, Left(TypedValue::Ref(n2.0)))), (Some(&n1), None) => next.allocations.push(Term::AddOrRetract(op, Left(n1), a, Right(t2))), (None, None) => next.allocations.push(Term::AddOrRetract(op, Right(t1), a, Right(t2))), } @@ -182,7 +182,7 @@ impl Generation { }, Term::AddOrRetract(op, Left(e), a, Right(t)) => { match temp_id_map.get(&*t) { - Some(&n) => next.resolved.push(Term::AddOrRetract(op, e, a, TypedValue::Ref(n))), + Some(&n) => next.resolved.push(Term::AddOrRetract(op, e, a, TypedValue::Ref(n.0))), None => next.allocations.push(Term::AddOrRetract(op, Left(e), a, Right(t))), } }, @@ -253,7 +253,7 @@ impl Generation { // TODO: consider require implementing require on temp_id_map. Term::AddOrRetract(op, Right(t1), a, Right(t2)) => { match (op, temp_id_map.get(&*t1), temp_id_map.get(&*t2)) { - (op, Some(&n1), Some(&n2)) => Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2)), + (op, Some(&n1), Some(&n2)) => Term::AddOrRetract(op, n1, a, TypedValue::Ref(n2.0)), (OpType::Add, _, _) => unreachable!(), // This is a coding error -- every tempid in a :db/add entity should resolve or be allocated. (OpType::Retract, _, _) => bail!(ErrorKind::NotYetImplemented(format!("[:db/retract ...] entity referenced tempid that did not upsert: one of {}, {}", t1, t2))), } @@ -267,7 +267,7 @@ impl Generation { }, Term::AddOrRetract(op, Left(e), a, Right(t)) => { match (op, temp_id_map.get(&*t)) { - (op, Some(&n)) => Term::AddOrRetract(op, e, a, TypedValue::Ref(n)), + (op, Some(&n)) => Term::AddOrRetract(op, e, a, TypedValue::Ref(n.0)), (OpType::Add, _) => unreachable!(), // This is a coding error. (OpType::Retract, _) => bail!(ErrorKind::NotYetImplemented(format!("[:db/retract ...] entity referenced tempid that did not upsert: {}", t))), } diff --git a/src/conn.rs b/src/conn.rs index abd348ab..d6820fc0 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -178,6 +178,60 @@ mod tests { extern crate mentat_parser_utils; + #[test] + fn test_transact_does_not_collide_existing_entids() { + let mut sqlite = db::new_connection("").unwrap(); + let mut conn = Conn::connect(&mut sqlite).unwrap(); + + // Let's find out the next ID that'll be allocated. We're going to try to collide with it + // a bit later. + let next = conn.metadata.lock().expect("metadata") + .partition_map[":db.part/user"].index; + let t = format!("[[:db/add {} :db.schema/attribute \"tempid\"]]", next + 1); + + match conn.transact(&mut sqlite, t.as_str()).unwrap_err() { + Error(ErrorKind::DbError(::mentat_db::errors::ErrorKind::UnrecognizedEntid(e)), _) => { + assert_eq!(e, next + 1); + }, + x => panic!("expected transact error, got {:?}", x), + } + + // Transact two more tempids. + let t = "[[:db/add \"one\" :db.schema/attribute \"more\"]]"; + let report = conn.transact(&mut sqlite, t) + .expect("transact succeeded"); + assert_eq!(report.tempids["more"], next); + assert_eq!(report.tempids["one"], next + 1); + } + + #[test] + fn test_transact_does_not_collide_new_entids() { + let mut sqlite = db::new_connection("").unwrap(); + let mut conn = Conn::connect(&mut sqlite).unwrap(); + + // Let's find out the next ID that'll be allocated. We're going to try to collide with it. + let next = conn.metadata.lock().expect("metadata").partition_map[":db.part/user"].index; + + // If this were to be resolved, we'd get [:db/add 65537 :db.schema/attribute 65537], but + // we should reject this, because the first ID was provided by the user! + let t = format!("[[:db/add {} :db.schema/attribute \"tempid\"]]", next); + + match conn.transact(&mut sqlite, t.as_str()).unwrap_err() { + Error(ErrorKind::DbError(::mentat_db::errors::ErrorKind::UnrecognizedEntid(e)), _) => { + // All this, despite this being the ID we were about to allocate! + assert_eq!(e, next); + }, + x => panic!("expected transact error, got {:?}", x), + } + + // And if we subsequently transact in a way that allocates one ID, we _will_ use that one. + // Note that `10` is a bootstrapped entid; we use it here as a known-good value. + let t = "[[:db/add 10 :db.schema/attribute \"temp\"]]"; + let report = conn.transact(&mut sqlite, t) + .expect("transact succeeded"); + assert_eq!(report.tempids["temp"], next); + } + #[test] fn test_transact_errors() { let mut sqlite = db::new_connection("").unwrap();