From b22b29679b36f506c6b409d2d00fd54784285156 Mon Sep 17 00:00:00 2001 From: Grisha Kruglov Date: Fri, 7 Sep 2018 19:18:20 -0700 Subject: [PATCH] Basic sync support (#563) r=nalexander * Pre: remove remnants of 'open_empty' * Pre: Cleanup 'datoms' table after a timeline move Since timeline move operations use a transactor, they generate a "phantom" 'tx' and a 'txInstant' assertion. It is "phantom" in a sense that it was never present in the 'transactions' table, and is entirely synthetic as far as our database is concerned. It's an implementational artifact, and we were not cleaning it up. It becomes a problem when we start inserting transactions after a move. Once the transactor clashes with the phantom 'tx', it will retract the phantom 'txInstant' value, leaving the transactions log in an incorrect state. This patch adds a test for this scenario and elects the easy way out: simply remove the offending 'txInstant' datom. * Part 1: Sync without support for side-effects A "side-effect" is defined here as a mutation of a remote state as part of the sync. If, during a sync we determine that a remote state needs to be changed, bail out. This generally supports different variations of "baton-passing" syncing, where clients will succeed syncing if each change is non-conflicting. * Part 2: Support basic "side-effects" syncing This patch introduces a concept of a follow-up sync. If a sync generated a "merge transaction" (a regular transaction that contains assertions necessary for local and remote transaction logs to converge), then this transaction needs to be uploaded in a follow-up sync. Generated SyncReport indicates if a follow-up sync is required. Follow-up sync itself is just a regular sync. If remote state did not change, it will result in a simple RemoteFastForward. Otherwise, we'll continue merging and requesting a follow-up. Schema alterations are explicitly not supported. As local transactions are rebased on top of remote, following changes happen: - entids are changed into tempids, letting transactor upsert :db/unique values - entids for retractions are changed into lookup-refs if we're confident they'll succeed -- otherwise, retractions are dropped on the floor * Post: use a macro for more readable tests * Tolstoy README --- Cargo.toml | 1 + db/src/timelines.rs | 102 ++- ffi/src/lib.rs | 1 - public-traits/Cargo.toml | 11 +- public-traits/errors.rs | 60 +- public-traits/lib.rs | 11 +- src/conn.rs | 15 - src/lib.rs | 14 +- src/store.rs | 64 +- src/sync.rs | 46 ++ tests/tolstoy.rs | 1286 ++++++++++++++++++++++++++++- tolstoy-traits/errors.rs | 25 +- tolstoy/Cargo.toml | 6 + tolstoy/README.md | 138 ++++ tolstoy/src/bootstrap.rs | 90 ++ tolstoy/src/datoms.rs | 67 ++ tolstoy/src/debug.rs | 105 +++ tolstoy/src/lib.rs | 40 +- tolstoy/src/logger.rs | 32 + tolstoy/src/metadata.rs | 178 +++- tolstoy/src/remote_client.rs | 341 ++++++++ tolstoy/src/schema.rs | 96 ++- tolstoy/src/syncer.rs | 1170 +++++++++++++++++--------- tolstoy/src/tx_mapper.rs | 50 +- tolstoy/src/tx_processor.rs | 62 +- tolstoy/src/tx_uploader.rs | 220 +++++ tolstoy/src/types.rs | 101 +++ tools/cli/src/mentat_cli/repl.rs | 9 +- transaction/src/entity_builder.rs | 1 + transaction/src/lib.rs | 15 + 30 files changed, 3758 insertions(+), 599 deletions(-) create mode 100644 src/sync.rs create mode 100644 tolstoy/README.md create mode 100644 tolstoy/src/bootstrap.rs create mode 100644 tolstoy/src/datoms.rs create mode 100644 tolstoy/src/debug.rs create mode 100644 tolstoy/src/logger.rs create mode 100644 tolstoy/src/remote_client.rs create mode 100644 tolstoy/src/tx_uploader.rs create mode 100644 tolstoy/src/types.rs diff --git a/Cargo.toml b/Cargo.toml index 4b3a5e79..02555492 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ chrono = "0.4" failure = "0.1.1" lazy_static = "0.2" time = "0.1" +log = "0.4" uuid = { version = "0.5", features = ["v4", "serde"] } [dependencies.rusqlite] diff --git a/db/src/timelines.rs b/db/src/timelines.rs index 935748a7..f5aba66d 100644 --- a/db/src/timelines.rs +++ b/db/src/timelines.rs @@ -98,6 +98,11 @@ fn move_transactions_to(conn: &rusqlite::Connection, tx_ids: &[Entid], new_timel Ok(()) } +fn remove_tx_from_datoms(conn: &rusqlite::Connection, tx_id: Entid) -> Result<()> { + conn.execute("DELETE FROM datoms WHERE e = ?", &[&tx_id])?; + Ok(()) +} + fn is_timeline_empty(conn: &rusqlite::Connection, timeline: Entid) -> Result { let mut stmt = conn.prepare("SELECT timeline FROM timelined_transactions WHERE timeline = ? GROUP BY timeline")?; let rows = stmt.query_and_then(&[&timeline], |row| -> Result { @@ -152,11 +157,22 @@ pub fn move_from_main_timeline(conn: &rusqlite::Connection, schema: &Schema, let reversed_terms = reversed_terms_for(conn, *tx_id)?; // Rewind schema and datoms. - let (_, _, new_schema, _) = transact_terms_with_action( + let (report, _, new_schema, _) = transact_terms_with_action( conn, partition_map.clone(), schema, schema, NullWatcher(), reversed_terms.into_iter().map(|t| t.rewrap()), InternSet::new(), TransactorAction::Materialize )?; + + // Rewind operation generated a 'tx' and a 'txInstant' assertion, which got + // inserted into the 'datoms' table (due to TransactorAction::Materialize). + // This is problematic. If we transact a few more times, the transactor will + // generate the same 'tx', but with a different 'txInstant'. + // The end result will be a transaction which has a phantom + // retraction of a txInstant, since transactor operates against the state of + // 'datoms', and not against the 'transactions' table. + // A quick workaround is to just remove the bad txInstant datom. + // See test_clashing_tx_instants test case. + remove_tx_from_datoms(conn, report.tx_id)?; last_schema = new_schema; } @@ -191,7 +207,7 @@ mod tests { }; conn.partition_map = pmap.clone(); } - + #[test] fn test_pop_simple() { let mut conn = TestConn::default(); @@ -284,7 +300,85 @@ mod tests { "#); } - + #[test] + fn test_clashing_tx_instants() { + let mut conn = TestConn::default(); + conn.sanitized_partition_map(); + + // Transact a basic schema. + assert_transact!(conn, r#" + [{:db/ident :person/name :db/valueType :db.type/string :db/cardinality :db.cardinality/one :db/unique :db.unique/identity :db/index true}] + "#); + + // Make an assertion against our schema. + assert_transact!(conn, r#"[{:person/name "Vanya"}]"#); + + // Move that assertion away from the main timeline. + let (new_schema, new_partition_map) = move_from_main_timeline( + &conn.sqlite, &conn.schema, conn.partition_map.clone(), + conn.last_tx_id().., 1 + ).expect("moved single tx"); + update_conn(&mut conn, &new_schema, &new_partition_map); + + // Assert that our datoms are now just the schema. + assert_matches!(conn.datoms(), " + [[?e :db/ident :person/name] + [?e :db/valueType :db.type/string] + [?e :db/cardinality :db.cardinality/one] + [?e :db/unique :db.unique/identity] + [?e :db/index true]]"); + // Same for transactions. + assert_matches!(conn.transactions(), " + [[[?e :db/ident :person/name ?tx true] + [?e :db/valueType :db.type/string ?tx true] + [?e :db/cardinality :db.cardinality/one ?tx true] + [?e :db/unique :db.unique/identity ?tx true] + [?e :db/index true ?tx true] + [?tx :db/txInstant ?ms ?tx true]]]"); + + // Re-assert our initial fact against our schema. + assert_transact!(conn, r#" + [[:db/add "tempid" :person/name "Vanya"]]"#); + + // Now, change that fact. This is the "clashing" transaction, if we're + // performing a timeline move using the transactor. + assert_transact!(conn, r#" + [[:db/add (lookup-ref :person/name "Vanya") :person/name "Ivan"]]"#); + + // Assert that our datoms are now the schema and the final assertion. + assert_matches!(conn.datoms(), r#" + [[?e1 :db/ident :person/name] + [?e1 :db/valueType :db.type/string] + [?e1 :db/cardinality :db.cardinality/one] + [?e1 :db/unique :db.unique/identity] + [?e1 :db/index true] + [?e2 :person/name "Ivan"]] + "#); + + // Assert that we have three correct looking transactions. + // This will fail if we're not cleaning up the 'datoms' table + // after the timeline move. + assert_matches!(conn.transactions(), r#" + [[ + [?e1 :db/ident :person/name ?tx1 true] + [?e1 :db/valueType :db.type/string ?tx1 true] + [?e1 :db/cardinality :db.cardinality/one ?tx1 true] + [?e1 :db/unique :db.unique/identity ?tx1 true] + [?e1 :db/index true ?tx1 true] + [?tx1 :db/txInstant ?ms1 ?tx1 true] + ] + [ + [?e2 :person/name "Vanya" ?tx2 true] + [?tx2 :db/txInstant ?ms2 ?tx2 true] + ] + [ + [?e2 :person/name "Ivan" ?tx3 true] + [?e2 :person/name "Vanya" ?tx3 false] + [?tx3 :db/txInstant ?ms3 ?tx3 true] + ]] + "#); + } + #[test] fn test_pop_schema() { let mut conn = TestConn::default(); @@ -432,7 +526,7 @@ mod tests { assert_matches!(conn.datoms(), "[]"); assert_matches!(conn.transactions(), "[]"); assert_eq!(conn.partition_map, partition_map0); - + // Assert all of schema's components individually, for some guidance in case of failures: assert_eq!(conn.schema.entid_map, schema0.entid_map); assert_eq!(conn.schema.ident_map, schema0.ident_map); diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 5d2c532e..c1fb63b7 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -107,7 +107,6 @@ pub use mentat::{ QueryResults, RelResult, Store, - Syncable, TypedValue, TxObserver, TxReport, diff --git a/public-traits/Cargo.toml b/public-traits/Cargo.toml index 92bc255d..3b8440fc 100644 --- a/public-traits/Cargo.toml +++ b/public-traits/Cargo.toml @@ -10,11 +10,12 @@ path = "lib.rs" [features] default = ["syncable"] sqlcipher = ["rusqlite/sqlcipher"] -syncable = ["tolstoy_traits"] +syncable = ["tolstoy_traits", "hyper", "serde_json"] [dependencies] failure = "0.1.1" failure_derive = "0.1.1" +uuid = "0.5" [dependencies.rusqlite] version = "0.13" @@ -44,3 +45,11 @@ path = "../sql-traits" [dependencies.tolstoy_traits] path = "../tolstoy-traits" optional = true + +[dependencies.hyper] +version = "0.11" +optional = true + +[dependencies.serde_json] +version = "1.0" +optional = true diff --git a/public-traits/errors.rs b/public-traits/errors.rs index 5f212ab6..ff57d337 100644 --- a/public-traits/errors.rs +++ b/public-traits/errors.rs @@ -13,8 +13,10 @@ use std; // To refer to std::result::Result. use std::collections::BTreeSet; +use std::error::Error; use rusqlite; +use uuid; use edn; @@ -44,6 +46,12 @@ use tolstoy_traits::errors::{ TolstoyError, }; +#[cfg(feature = "syncable")] +use hyper; + +#[cfg(feature = "syncable")] +use serde_json; + pub type Result = std::result::Result; #[derive(Debug, Fail)] @@ -97,8 +105,8 @@ pub enum MentatError { // It would be better to capture the underlying `rusqlite::Error`, but that type doesn't // implement many useful traits, including `Clone`, `Eq`, and `PartialEq`. - #[fail(display = "SQL error: {}", _0)] - RusqliteError(String), + #[fail(display = "SQL error: {}, cause: {}", _0, _1)] + RusqliteError(String, String), #[fail(display = "{}", _0)] EdnParseError(#[cause] edn::ParseError), @@ -118,9 +126,24 @@ pub enum MentatError { #[fail(display = "{}", _0)] SQLError(#[cause] SQLError), + #[fail(display = "{}", _0)] + UuidError(#[cause] uuid::ParseError), + #[cfg(feature = "syncable")] #[fail(display = "{}", _0)] TolstoyError(#[cause] TolstoyError), + + #[cfg(feature = "syncable")] + #[fail(display = "{}", _0)] + NetworkError(#[cause] hyper::Error), + + #[cfg(feature = "syncable")] + #[fail(display = "{}", _0)] + UriError(#[cause] hyper::error::UriError), + + #[cfg(feature = "syncable")] + #[fail(display = "{}", _0)] + SerializationError(#[cause] serde_json::Error), } impl From for MentatError { @@ -131,7 +154,17 @@ impl From for MentatError { impl From for MentatError { fn from(error: rusqlite::Error) -> MentatError { - MentatError::RusqliteError(error.to_string()) + let cause = match error.cause() { + Some(e) => e.to_string(), + None => "".to_string() + }; + MentatError::RusqliteError(error.to_string(), cause) + } +} + +impl From for MentatError { + fn from(error: uuid::ParseError) -> MentatError { + MentatError::UuidError(error) } } @@ -177,3 +210,24 @@ impl From for MentatError { MentatError::TolstoyError(error) } } + +#[cfg(feature = "syncable")] +impl From for MentatError { + fn from(error: serde_json::Error) -> MentatError { + MentatError::SerializationError(error) + } +} + +#[cfg(feature = "syncable")] +impl From for MentatError { + fn from(error: hyper::Error) -> MentatError { + MentatError::NetworkError(error) + } +} + +#[cfg(feature = "syncable")] +impl From for MentatError { + fn from(error: hyper::error::UriError) -> MentatError { + MentatError::UriError(error) + } +} diff --git a/public-traits/lib.rs b/public-traits/lib.rs index 7cba001f..15598803 100644 --- a/public-traits/lib.rs +++ b/public-traits/lib.rs @@ -20,7 +20,16 @@ extern crate db_traits; extern crate query_pull_traits; extern crate query_projector_traits; extern crate query_algebrizer_traits; -extern crate tolstoy_traits; extern crate sql_traits; +extern crate uuid; + +#[cfg(feature = "syncable")] +extern crate tolstoy_traits; + +#[cfg(feature = "syncable")] +extern crate hyper; + +#[cfg(feature = "syncable")] +extern crate serde_json; pub mod errors; diff --git a/src/conn.rs b/src/conn.rs index 1462f73f..f8d28fe1 100644 --- a/src/conn.rs +++ b/src/conn.rs @@ -118,10 +118,6 @@ pub struct Conn { pub(crate) tx_observer_service: Mutex, } -pub trait Syncable { - fn sync(&mut self, server_uri: &String, user_uuid: &String) -> Result<()>; -} - impl Conn { // Intentionally not public. fn new(partition_map: PartitionMap, schema: Schema) -> Conn { @@ -131,17 +127,6 @@ impl Conn { } } - /// Prepare the provided SQLite handle for use as a Mentat store. Creates tables but - /// _does not_ write the bootstrap schema. This constructor should only be used by - /// consumers that expect to populate raw transaction data themselves. - - pub(crate) fn empty(sqlite: &mut rusqlite::Connection) -> Result { - let (tx, db) = db::create_empty_current_version(sqlite)?; - tx.commit()?; - Ok(Conn::new(db.partition_map, db.schema)) - } - - pub fn connect(sqlite: &mut rusqlite::Connection) -> Result { let db = db::ensure_current_version(sqlite)?; Ok(Conn::new(db.partition_map, db.schema)) diff --git a/src/lib.rs b/src/lib.rs index a0e0b0c8..c4e72814 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -178,13 +178,25 @@ pub mod query_builder; pub mod store; pub mod vocabulary; +#[cfg(feature = "syncable")] +mod sync; + +#[cfg(feature = "syncable")] +pub use sync::{ + Syncable, +}; + +#[cfg(feature = "syncable")] +pub use mentat_tolstoy::{ + SyncReport, +}; + pub use query_builder::{ QueryBuilder, }; pub use conn::{ Conn, - Syncable, }; pub use mentat_transaction::{ diff --git a/src/store.rs b/src/store.rs index 591b9fad..bd5a5389 100644 --- a/src/store.rs +++ b/src/store.rs @@ -37,9 +37,6 @@ use mentat_db::{ TxObserver, }; -#[cfg(feature = "syncable")] -use mentat_tolstoy::Syncer; - use mentat_transaction::{ CacheAction, CacheDirection, @@ -57,11 +54,6 @@ use public_traits::errors::{ Result, }; -#[cfg(feature = "syncable")] -use public_traits::errors::{ - MentatError, -}; - use mentat_transaction::query::{ PreparedResult, QueryExplanation, @@ -69,6 +61,18 @@ use mentat_transaction::query::{ QueryOutput, }; +#[cfg(feature = "syncable")] +use mentat_tolstoy::{ + SyncReport, + SyncResult, + SyncFollowup, +}; + +#[cfg(feature = "syncable")] +use sync::{ + Syncable, +}; + /// A convenience wrapper around a single SQLite connection and a Conn. This is suitable /// for applications that don't require complex connection management. pub struct Store { @@ -93,6 +97,32 @@ impl Store { ip.commit()?; Ok(report) } + + #[cfg(feature = "syncable")] + pub fn sync(&mut self, server_uri: &String, user_uuid: &String) -> Result { + let mut reports = vec![]; + loop { + let mut ip = self.begin_transaction()?; + let report = ip.sync(server_uri, user_uuid)?; + ip.commit()?; + + match report { + SyncReport::Merge(SyncFollowup::FullSync) => { + reports.push(report); + continue + }, + _ => { + reports.push(report); + break + }, + } + } + if reports.len() == 1 { + Ok(SyncResult::Atomic(reports[0].clone())) + } else { + Ok(SyncResult::NonAtomic(reports)) + } + } } #[cfg(feature = "sqlcipher")] @@ -209,26 +239,14 @@ impl Pullable for Store { } } -#[cfg(feature = "syncable")] -use uuid::Uuid; - -#[cfg(feature = "syncable")] -use conn::Syncable; - -#[cfg(feature = "syncable")] -impl Syncable for Store { - fn sync(&mut self, server_uri: &String, user_uuid: &String) -> Result<()> { - let uuid = Uuid::parse_str(&user_uuid).map_err(|_| MentatError::BadUuid(user_uuid.clone()))?; - Ok(Syncer::flow(&mut self.sqlite, server_uri, &uuid)?) - } -} - #[cfg(test)] mod tests { use super::*; extern crate time; + use uuid::Uuid; + use std::collections::{ BTreeSet, }; @@ -244,8 +262,6 @@ mod tests { Duration, }; - use uuid::Uuid; - use mentat_db::cache::{ SQLiteAttributeCache, }; diff --git a/src/sync.rs b/src/sync.rs new file mode 100644 index 00000000..0b5a1d30 --- /dev/null +++ b/src/sync.rs @@ -0,0 +1,46 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use uuid::Uuid; + +use mentat_transaction::{ + InProgress, +}; + +use errors::{ + Result, +}; + +use mentat_tolstoy::{ + Syncer, + RemoteClient, + SyncReport, +}; + +pub trait Syncable { + fn sync(&mut self, server_uri: &String, user_uuid: &String) -> Result; +} + +impl<'a, 'c> Syncable for InProgress<'a, 'c> { + fn sync(&mut self, server_uri: &String, user_uuid: &String) -> Result { + // Syncer behaves as if it's part of InProgress. + // This split into a separate crate is segment synchronization functionality + // in a single crate which can be easily disabled by consumers, + // and to separate concerns. + // But for all intents and purposes, Syncer operates over a "mentat transaction", + // which is exactly what InProgress represents. + let mut remote_client = RemoteClient::new( + server_uri.to_string(), + Uuid::parse_str(&user_uuid)? + ); + Syncer::sync(self, &mut remote_client) + .map_err(|e| e.into()) + } +} diff --git a/tests/tolstoy.rs b/tests/tolstoy.rs index 34ef0361..0d958961 100644 --- a/tests/tolstoy.rs +++ b/tests/tolstoy.rs @@ -8,8 +8,14 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +extern crate uuid; extern crate mentat; +extern crate edn; extern crate core_traits; +extern crate public_traits; + +extern crate log; +#[macro_use] extern crate mentat_db; #[cfg(feature = "syncable")] extern crate mentat_tolstoy; @@ -17,68 +23,95 @@ extern crate mentat_tolstoy; #[cfg(feature = "syncable")] extern crate tolstoy_traits; +// Run with 'cargo test tolstoy_tests' from top-level. #[cfg(feature = "syncable")] -mod tests { +mod tolstoy_tests { + use std::collections::HashMap; use std::collections::BTreeMap; + use std::collections::hash_map::Entry; + + use std::borrow::Borrow; + + use edn; + + use uuid::Uuid; + use mentat::conn::Conn; use mentat::new_connection; + + use mentat_db::TX0; + + use mentat_tolstoy::{ + Tx, + TxPart, + GlobalTransactionLog, + SyncReport, + SyncFollowup, + Syncer, + }; + + use mentat_tolstoy::debug::{ + parts_to_datoms, + txs_after, + }; + use mentat_tolstoy::tx_processor::{ Processor, TxReceiver, - TxPart, }; - use tolstoy_traits::errors::Result; use core_traits::{ Entid, TypedValue, ValueType, }; + use public_traits::errors::{ + Result, + MentatError, + }; + use tolstoy_traits::errors::{ + TolstoyError, + }; struct TxCountingReceiver { - pub tx_count: usize, - pub is_done: bool, + tx_count: usize, } impl TxCountingReceiver { fn new() -> TxCountingReceiver { TxCountingReceiver { tx_count: 0, - is_done: false, } } } - impl TxReceiver for TxCountingReceiver { + impl TxReceiver for TxCountingReceiver { fn tx(&mut self, _tx_id: Entid, _d: &mut T) -> Result<()> where T: Iterator { self.tx_count = self.tx_count + 1; Ok(()) } - fn done(&mut self) -> Result<()> { - self.is_done = true; - Ok(()) + fn done(self) -> usize { + self.tx_count } } #[derive(Debug)] struct TestingReceiver { - pub txes: BTreeMap>, - pub is_done: bool, + txes: BTreeMap>, } impl TestingReceiver { fn new() -> TestingReceiver { TestingReceiver { txes: BTreeMap::new(), - is_done: false, } } } - impl TxReceiver for TestingReceiver { + impl TxReceiver>> for TestingReceiver { fn tx(&mut self, tx_id: Entid, d: &mut T) -> Result<()> where T: Iterator { let datoms = self.txes.entry(tx_id).or_insert(vec![]); @@ -86,16 +119,135 @@ mod tests { Ok(()) } - fn done(&mut self) -> Result<()> { - self.is_done = true; + fn done(self) -> BTreeMap> { + self.txes + } + } + + fn assert_tx_datoms_count(txes: &BTreeMap>, tx_num: usize, expected_datoms: usize) { + let tx = txes.keys().nth(tx_num).expect("first tx"); + let datoms = txes.get(tx).expect("datoms"); + assert_eq!(expected_datoms, datoms.len()); + } + + #[derive(Debug)] + struct TestRemoteClient { + pub head: Uuid, + pub chunks: HashMap, + pub transactions: HashMap>, + // Keep transactions in order: + pub tx_rowid: HashMap, + pub rowid_tx: Vec, + } + + impl TestRemoteClient { + fn new() -> TestRemoteClient { + TestRemoteClient { + head: Uuid::nil(), + chunks: HashMap::default(), + transactions: HashMap::default(), + tx_rowid: HashMap::default(), + rowid_tx: vec![], + } + } + } + + impl GlobalTransactionLog for TestRemoteClient { + fn head(&self) -> Result { + Ok(self.head) + } + + fn transactions_after(&self, tx: &Uuid) -> Result> { + let rowid_range; + if tx == &Uuid::nil() { + rowid_range = 0..; + } else { + rowid_range = self.tx_rowid[tx] + 1 ..; + } + + let mut txs = vec![]; + for tx_uuid in &self.rowid_tx[rowid_range] { + txs.push(Tx { + tx: tx_uuid.clone(), + parts: self.transactions.get(tx_uuid).unwrap().clone(), + }); + } + Ok(txs) + } + + fn set_head(&mut self, tx: &Uuid) -> Result<()> { + self.head = tx.clone(); + Ok(()) + } + + fn put_chunk(&mut self, tx: &Uuid, payload: &TxPart) -> Result<()> { + match self.chunks.entry(tx.clone()) { + Entry::Occupied(_) => panic!("trying to overwrite chunk"), + Entry::Vacant(entry) => { + entry.insert(payload.clone()); + () + }, + } + Ok(()) + } + + fn put_transaction(&mut self, tx: &Uuid, _parent_tx: &Uuid, chunk_txs: &Vec) -> Result<()> { + let mut parts = vec![]; + for chunk_tx in chunk_txs { + parts.push(self.chunks.get(chunk_tx).unwrap().clone()); + } + self.transactions.insert(tx.clone(), parts); + self.rowid_tx.push(tx.clone()); + self.tx_rowid.insert(tx.clone(), self.rowid_tx.len() - 1); Ok(()) } } - fn assert_tx_datoms_count(receiver: &TestingReceiver, tx_num: usize, expected_datoms: usize) { - let tx = receiver.txes.keys().nth(tx_num).expect("first tx"); - let datoms = receiver.txes.get(tx).expect("datoms"); - assert_eq!(expected_datoms, datoms.len()); + macro_rules! assert_sync { + ( $report: pat, $conn: expr, $sqlite: expr, $remote: expr ) => {{ + let mut ip = $conn.begin_transaction(&mut $sqlite).expect("begun successfully"); + match Syncer::sync(&mut ip, &mut $remote).expect("sync report") { + $report => (), + wr => panic!("Wrong sync report: {:?}", wr), + } + ip.commit().expect("committed"); + }}; + ( error => $error: pat, $conn: expr, $sqlite: expr, $remote: expr ) => {{ + let mut ip = $conn.begin_transaction(&mut $sqlite).expect("begun successfully"); + match Syncer::sync(&mut ip, &mut $remote).expect_err("expected sync to fail, but did not") { + $error => (), + we => panic!("Failed with wrong error: {:?}", we), + } + }}; + } + + macro_rules! assert_transactions { + ($sqlite:expr, $conn:expr, $($tx:expr),+) => { + let txs = txs_after(&$sqlite, &$conn.current_schema(), TX0); + + let mut index = 1; + $( + assert_matches!(parts_to_datoms(&$conn.current_schema(), &txs[index].parts), $tx); + index = index + 1; + )* + + assert_eq!(index, txs.len()); + }; + + ($sqlite:expr, $conn:expr, schema => $schema:expr, $($tx:expr),*) => { + let txs = txs_after(&$sqlite, &$conn.current_schema(), TX0); + + // Schema assumed to be first transaction. + assert_matches!(parts_to_datoms(&$conn.current_schema(), &txs[0].parts), $schema); + + let mut index = 1; + $( + assert_matches!(parts_to_datoms(&$conn.current_schema(), &txs[index].parts), $tx); + index = index + 1; + )* + + assert_eq!(index, txs.len()); + }; } #[test] @@ -104,12 +256,10 @@ mod tests { let mut conn = Conn::connect(&mut c).expect("Couldn't open DB."); { let db_tx = c.transaction().expect("db tx"); - // Don't inspect the bootstrap transaction, but we'd like to see it's there. - let mut receiver = TxCountingReceiver::new(); - assert_eq!(false, receiver.is_done); - Processor::process(&db_tx, None, &mut receiver).expect("processor"); - assert_eq!(true, receiver.is_done); - assert_eq!(1, receiver.tx_count); + // Ensure that we see a bootstrap transaction. + assert_eq!(1, Processor::process( + &db_tx, None, TxCountingReceiver::new() + ).expect("processor")); } let ids = conn.transact(&mut c, r#"[ @@ -119,19 +269,25 @@ mod tests { ]"#).expect("successful transaction").tempids; let numba_entity_id = ids.get("s").unwrap(); - let bootstrap_tx; + let ids = conn.transact(&mut c, r#"[ + [:db/add "b" :foo/numba 123] + ]"#).expect("successful transaction").tempids; + let _asserted_e = ids.get("b").unwrap(); + + let first_tx; { let db_tx = c.transaction().expect("db tx"); // Expect to see one more transaction of four parts (one for tx datom itself). - let mut receiver = TestingReceiver::new(); - Processor::process(&db_tx, None, &mut receiver).expect("processor"); + let receiver = TestingReceiver::new(); + let txes = Processor::process(&db_tx, None, receiver).expect("processor"); - println!("{:#?}", receiver); + println!("{:#?}", txes); - assert_eq!(2, receiver.txes.keys().count()); - assert_tx_datoms_count(&receiver, 1, 4); + // Three transactions: bootstrap, vocab, assertion. + assert_eq!(3, txes.keys().count()); + assert_tx_datoms_count(&txes, 2, 2); - bootstrap_tx = Some(*receiver.txes.keys().nth(0).expect("bootstrap tx")); + first_tx = txes.keys().nth(1).expect("first non-bootstrap tx").clone(); } let ids = conn.transact(&mut c, r#"[ @@ -143,17 +299,19 @@ mod tests { let db_tx = c.transaction().expect("db tx"); // Expect to see a single two part transaction - let mut receiver = TestingReceiver::new(); + let receiver = TestingReceiver::new(); - // Note that we're asking for the bootstrap tx to be skipped by the processor. - Processor::process(&db_tx, bootstrap_tx, &mut receiver).expect("processor"); + // Note that we're asking for the first transacted tx to be skipped by the processor. + let txes = Processor::process(&db_tx, Some(first_tx), receiver).expect("processor"); - assert_eq!(2, receiver.txes.keys().count()); - assert_tx_datoms_count(&receiver, 1, 2); + // Vocab, assertion. + assert_eq!(2, txes.keys().count()); + // Assertion datoms. + assert_tx_datoms_count(&txes, 1, 2); - // Inspect the transaction part. - let tx_id = receiver.txes.keys().nth(1).expect("tx"); - let datoms = receiver.txes.get(tx_id).expect("datoms"); + // Inspect the assertion. + let tx_id = txes.keys().nth(1).expect("tx"); + let datoms = txes.get(tx_id).expect("datoms"); let part = datoms.iter().find(|&part| &part.e == asserted_e).expect("to find asserted datom"); assert_eq!(numba_entity_id, &part.a); @@ -163,4 +321,1050 @@ mod tests { } } + #[test] + fn test_bootstrap_upload() { + let mut sqlite = new_connection("").unwrap(); + let mut conn = Conn::connect(&mut sqlite).unwrap(); + let mut remote_client = TestRemoteClient::new(); + + // Fast forward empty remote with a bootstrap transaction. + assert_sync!(SyncReport::RemoteFastForward, conn, sqlite, remote_client); + + let bootstrap_tx_parts = remote_client.transactions.get(&remote_client.rowid_tx[0]).unwrap(); + + assert_matches!(parts_to_datoms(&conn.current_schema(), &bootstrap_tx_parts), "[ + [:db.schema/core :db.schema/attribute 1 ?tx true] + [:db.schema/core :db.schema/attribute 3 ?tx true] + [:db.schema/core :db.schema/attribute 4 ?tx true] + [:db.schema/core :db.schema/attribute 5 ?tx true] + [:db.schema/core :db.schema/attribute 6 ?tx true] + [:db.schema/core :db.schema/attribute 7 ?tx true] + [:db.schema/core :db.schema/attribute 8 ?tx true] + [:db.schema/core :db.schema/attribute 9 ?tx true] + [:db.schema/core :db.schema/attribute 10 ?tx true] + [:db.schema/core :db.schema/attribute 11 ?tx true] + [:db.schema/core :db.schema/attribute 12 ?tx true] + [:db.schema/core :db.schema/attribute 13 ?tx true] + [:db.schema/core :db.schema/attribute 22 ?tx true] + [:db.schema/core :db.schema/attribute 37 ?tx true] + [:db.schema/core :db.schema/attribute 38 ?tx true] + [:db.schema/core :db.schema/attribute 39 ?tx true] + [:db/ident :db/ident :db/ident ?tx true] + [:db.part/db :db/ident :db.part/db ?tx true] + [:db/txInstant :db/ident :db/txInstant ?tx true] + [:db.install/partition :db/ident :db.install/partition ?tx true] + [:db.install/valueType :db/ident :db.install/valueType ?tx true] + [:db.install/attribute :db/ident :db.install/attribute ?tx true] + [:db/valueType :db/ident :db/valueType ?tx true] + [:db/cardinality :db/ident :db/cardinality ?tx true] + [:db/unique :db/ident :db/unique ?tx true] + [:db/isComponent :db/ident :db/isComponent ?tx true] + [:db/index :db/ident :db/index ?tx true] + [:db/fulltext :db/ident :db/fulltext ?tx true] + [:db/noHistory :db/ident :db/noHistory ?tx true] + [:db/add :db/ident :db/add ?tx true] + [:db/retract :db/ident :db/retract ?tx true] + [:db.part/user :db/ident :db.part/user ?tx true] + [:db.part/tx :db/ident :db.part/tx ?tx true] + [:db/excise :db/ident :db/excise ?tx true] + [:db.excise/attrs :db/ident :db.excise/attrs ?tx true] + [:db.excise/beforeT :db/ident :db.excise/beforeT ?tx true] + [:db.excise/before :db/ident :db.excise/before ?tx true] + [:db.alter/attribute :db/ident :db.alter/attribute ?tx true] + [:db.type/ref :db/ident :db.type/ref ?tx true] + [:db.type/keyword :db/ident :db.type/keyword ?tx true] + [:db.type/long :db/ident :db.type/long ?tx true] + [:db.type/double :db/ident :db.type/double ?tx true] + [:db.type/string :db/ident :db.type/string ?tx true] + [:db.type/uuid :db/ident :db.type/uuid ?tx true] + [:db.type/uri :db/ident :db.type/uri ?tx true] + [:db.type/boolean :db/ident :db.type/boolean ?tx true] + [:db.type/instant :db/ident :db.type/instant ?tx true] + [:db.type/bytes :db/ident :db.type/bytes ?tx true] + [:db.cardinality/one :db/ident :db.cardinality/one ?tx true] + [:db.cardinality/many :db/ident :db.cardinality/many ?tx true] + [:db.unique/value :db/ident :db.unique/value ?tx true] + [:db.unique/identity :db/ident :db.unique/identity ?tx true] + [:db/doc :db/ident :db/doc ?tx true] + [:db.schema/version :db/ident :db.schema/version ?tx true] + [:db.schema/attribute :db/ident :db.schema/attribute ?tx true] + [:db.schema/core :db/ident :db.schema/core ?tx true] + [?tx :db/txInstant ?ms ?tx true] + [:db/ident :db/valueType 24 ?tx true] + [:db/txInstant :db/valueType 31 ?tx true] + [:db.install/partition :db/valueType 23 ?tx true] + [:db.install/valueType :db/valueType 23 ?tx true] + [:db.install/attribute :db/valueType 23 ?tx true] + [:db/valueType :db/valueType 23 ?tx true] + [:db/cardinality :db/valueType 23 ?tx true] + [:db/unique :db/valueType 23 ?tx true] + [:db/isComponent :db/valueType 30 ?tx true] + [:db/index :db/valueType 30 ?tx true] + [:db/fulltext :db/valueType 30 ?tx true] + [:db/noHistory :db/valueType 30 ?tx true] + [:db.alter/attribute :db/valueType 23 ?tx true] + [:db/doc :db/valueType 27 ?tx true] + [:db.schema/version :db/valueType 25 ?tx true] + [:db.schema/attribute :db/valueType 23 ?tx true] + [:db/ident :db/cardinality 33 ?tx true] + [:db/txInstant :db/cardinality 33 ?tx true] + [:db.install/partition :db/cardinality 34 ?tx true] + [:db.install/valueType :db/cardinality 34 ?tx true] + [:db.install/attribute :db/cardinality 34 ?tx true] + [:db/valueType :db/cardinality 33 ?tx true] + [:db/cardinality :db/cardinality 33 ?tx true] + [:db/unique :db/cardinality 33 ?tx true] + [:db/isComponent :db/cardinality 33 ?tx true] + [:db/index :db/cardinality 33 ?tx true] + [:db/fulltext :db/cardinality 33 ?tx true] + [:db/noHistory :db/cardinality 33 ?tx true] + [:db.alter/attribute :db/cardinality 34 ?tx true] + [:db/doc :db/cardinality 33 ?tx true] + [:db.schema/version :db/cardinality 33 ?tx true] + [:db.schema/attribute :db/cardinality 34 ?tx true] + [:db/ident :db/unique 36 ?tx true] + [:db.schema/attribute :db/unique 35 ?tx true] + [:db/ident :db/index true ?tx true] + [:db/txInstant :db/index true ?tx true] + [:db.schema/attribute :db/index true ?tx true] + [:db.schema/core :db.schema/version 1 ?tx true]]"); + } + + #[test] + fn test_against_bootstrap() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Fast forward empty remote with a bootstrap transaction from 1. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge 1 and 2 bootstrap transactions. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + // Assert that nothing besides a bootstrap transaction is present after a sync on 2. + let synced_txs_2 = txs_after(&sqlite_2, &conn_2.current_schema(), TX0); + assert_eq!(0, synced_txs_2.len()); + + // Assert that 1's sync didn't affect remote. + assert_sync!(SyncReport::NoChanges, conn_1, sqlite_1, remote_client); + + // Assert that nothing besides a bootstrap transaction is present after a sync on 1. + let synced_txs_1 = txs_after(&sqlite_1, &conn_1.current_schema(), TX0); + assert_eq!(0, synced_txs_1.len()); + } + + #[test] + fn test_empty_merge() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions from 1. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + // Assert that we end up with the same schema on 2 as we had on 1. + assert_transactions!(sqlite_2, conn_2, + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + ); + + // Assert that 2's sync didn't affect remote state. + assert_sync!(SyncReport::NoChanges, conn_1, sqlite_1, remote_client); + } + + #[test] + fn test_non_conflicting_merge_exact() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both 1 and 2 define the same schema. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + // Assert that 2's schema didn't change after sync. + assert_transactions!(sqlite_2, conn_2, + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + ); + + // Assert that 2's sync didn't change remote state. + assert_sync!(SyncReport::NoChanges, conn_1, sqlite_1, remote_client); + } + + #[test] + fn test_non_conflicting_merge_subset() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both 1 and 2 define the same schema. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // But 1 also has an assertion against its schema. + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + assert_transactions!(sqlite_2, conn_2, + // Assert that 2's schema is the same as before the sync. + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + // Assert that 2 has an additional transaction from 1 (name=Ivan). + r#"[[?e :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms1 ?tx true]]"# + ); + + // Assert that 2's sync didn't change remote state. + assert_sync!(SyncReport::NoChanges, conn_1, sqlite_1, remote_client); + } + + #[test] + fn test_schema_merge() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // 1 defines a richer schema than 2. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/ident :person/age + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + // Assert that 2's schema has been augmented with 1's. + assert_transactions!(sqlite_2, conn_2, + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [:person/age :db/ident :person/age ?tx true] + [:person/age :db/valueType :db.type/long ?tx true] + [:person/age :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + ); + + // Assert that 2's sync didn't change remote state. + assert_sync!(SyncReport::NoChanges, conn_1, sqlite_1, remote_client); + } + + #[test] + fn test_entity_merge_unique_identity() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both have the same schema with a unique/identity attribute. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + // Both have the same assertion against the schema. + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + assert_transactions!(sqlite_2, conn_2, + // Assert that 2's schema is unchanged. + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [:person/name :db/unique :db.unique/identity ?tx true] + [:person/name :db/index true ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + // Assert that 2's unique entity got smushed with 1's. + r#"[[?e :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + + // Assert that 2's sync didn't change remote state. + assert_sync!(SyncReport::NoChanges, conn_1, sqlite_1, remote_client); + } + + #[test] + fn test_entity_merge_unique_identity_conflict() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both start off with the same schema (a single unique/identity attribute) and an assertion. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + // First removes the entity. + conn_1.transact(&mut sqlite_1, r#"[ + [:db/retract (lookup-ref :person/name "Ivan") :person/name "Ivan"]]"#).expect("transacted"); + + // Second changes the entitiy. + conn_2.transact(&mut sqlite_2, r#"[ + {:db/id (lookup-ref :person/name "Ivan") :person/name "Vanya"} + ]"#).expect("transacted"); + + // First syncs first. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // And now, merge! + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + + // We currently have a primitive conflict resolution strategy, + // ending up with a new "Vanya" entity. + assert_transactions!(sqlite_2, conn_2, + // These hard-coded entids are brittle but deterministic. + // They signify that we end up with a new entity Vanya, separate from the one + // that was renamed. + r#"[[65537 :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65537 :person/name "Ivan" ?tx false] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65538 :person/name "Vanya" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + } + + #[test] + fn test_entity_merge_unique_identity_conflict_reversed() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both start off with the same schema (a single unique/identity attribute) and an assertion. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + // First removes the entity. + conn_1.transact(&mut sqlite_1, r#"[ + [:db/retract (lookup-ref :person/name "Ivan") :person/name "Ivan"]]"#).expect("transacted"); + + // Second changes the entitiy. + conn_2.transact(&mut sqlite_2, r#"[ + [:db/add (lookup-ref :person/name "Ivan") :person/name "Vanya"] + ]"#).expect("transacted"); + + // Second syncs first. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // And now, merge! + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_1, sqlite_1, remote_client); + + // Deletion of "Ivan" will be dropped on the floor, since there's no such + // entity anymore (it's "Vanya"). + assert_transactions!(sqlite_1, conn_1, + // These hard-coded entids are brittle but deterministic. + r#"[[65537 :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65537 :person/name "Ivan" ?tx false] + [65537 :person/name "Vanya" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + } + + #[test] + fn test_entity_merge_unique_identity_conflict_simple() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both start off with the same schema (a single unique/identity attribute) and an assertion. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_2, sqlite_2, remote_client); + + // First renames the entity. + conn_1.transact(&mut sqlite_1, r#"[ + [:db/add (lookup-ref :person/name "Ivan") :person/name "Vanechka"]]"#).expect("transacted"); + + // Second also renames the entitiy. + conn_2.transact(&mut sqlite_2, r#"[ + [:db/add (lookup-ref :person/name "Ivan") :person/name "Vanya"] + ]"#).expect("transacted"); + + // Second syncs first. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // And now, merge! + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_1, sqlite_1, remote_client); + + // These hard-coded entids are brittle but deterministic. + // They signify that we end up with a new entity Vanechka, separate from the one + // that was renamed. + assert_transactions!(sqlite_1, conn_1, + r#"[[65537 :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65537 :person/name "Ivan" ?tx false] + [65537 :person/name "Vanya" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + // A new entity is created for the second rename. + r#"[[65538 :person/name "Vanechka" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + } + + #[test] + fn test_conflicting_schema() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/many}]").expect("transacted"); + + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + assert_sync!( + error => MentatError::TolstoyError(TolstoyError::NotYetImplemented(_)), + conn_2, sqlite_2, remote_client); + } + + + #[test] + fn test_schema_with_non_matching_entids() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // :person/name will be e=65536. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // This entity will be e=65537. + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // :person/name will be e=65536, :person/age will be e=65537 (NB conflict w/ above entity). + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/ident :person/age + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + assert_sync!(SyncReport::LocalFastForward, conn_1, sqlite_1, remote_client); + + assert_transactions!(sqlite_1, conn_1, + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + // Assert that 2's unique entity got smushed with 1's. + r#"[[?e :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + // Assert that 2's extra vocabulary is present. + "[[:person/age :db/ident :person/age ?tx true] + [:person/age :db/valueType :db.type/long ?tx true] + [:person/age :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]" + ); + } + + #[test] + fn test_entity_merge_non_unique_entity_conflict() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both start off with the same schema (a single unique/identity attribute) and an assertion. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + // Will result in two Ivans. + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + // Upload the second Ivan. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // Get the second Ivan. + assert_sync!(SyncReport::LocalFastForward, conn_1, sqlite_1, remote_client); + + // These entids are determenistic. We can't use lookup-refs because :person/name is + // a non-unique attribute. + // First removes an Ivan. + conn_1.transact(&mut sqlite_1, r#"[ + [:db/retract 65537 :person/name "Ivan"]]"#).expect("transacted"); + + // Second renames an Ivan. + conn_2.transact(&mut sqlite_2, r#"[ + {:db/id 65537 :person/name "Vanya"}]"#).expect("transacted"); + + // First syncs first. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // And now, merge! + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + + // We currently have a primitive conflict resolution strategy, + // ending up with a new "Vanya" entity. + + // These hard-coded entids are brittle but deterministic. + // They signify that we end up with a new entity Vanya, separate from the one + // that was renamed. + assert_transactions!(sqlite_2, conn_2, + r#"[[65537 :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65538 :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65537 :person/name "Ivan" ?tx false] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65538 :person/name "Ivan" ?tx false] + [65538 :person/name "Vanya" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + } + + #[test] + fn test_entity_merge_non_unique_entity_conflict_reversed() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // Both start off with the same schema (a single unique/identity attribute) and an assertion. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + // Merge will result in two Ivans. + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + // Upload the second Ivan. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // Get the second Ivan. + assert_sync!(SyncReport::LocalFastForward, conn_1, sqlite_1, remote_client); + + // These entids are determenistic. We can't use lookup-refs because :person/name is + // a non-unique attribute. + // First removes an Ivan. + conn_1.transact(&mut sqlite_1, r#"[ + [:db/retract 65537 :person/name "Ivan"]]"#).expect("transacted"); + + // Second renames an Ivan. + conn_2.transact(&mut sqlite_2, r#"[ + [:db/add 65537 :person/name "Vanya"]]"#).expect("transacted"); + + // Second wins the sync race. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // First merges its changes with second's. + assert_sync!(SyncReport::Merge(SyncFollowup::None), conn_1, sqlite_1, remote_client); + + // We currently have a primitive conflict resolution strategy, + // ending up dropping first's removal of "Ivan". + // Internally that happens because :person/name is not :db/unique. + + // These hard-coded entids are brittle but deterministic. + // They signify that we end up with a new entity Vanya, separate from the one + // that was renamed. + assert_transactions!(sqlite_1, conn_1, + r#"[[65537 :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[65538 :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + // Just the rename left, removal is dropped on the floor. + r#"[[65537 :person/name "Ivan" ?tx false] + [65537 :person/name "Vanya" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + } + + #[test] + fn test_entity_merge_non_unique() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // 1 defines the same schema as 2. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :world/city + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // Vancouver, BC + conn_1.transact(&mut sqlite_1, r#"[{:world/city "Vancouver"}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :world/city + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // Vancouver, WA + conn_2.transact(&mut sqlite_2, r#"[{:world/city "Vancouver"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Since :world/city is not unique, we elect not to smush these entities. + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + + assert_transactions!(sqlite_2, conn_2, + schema => + "[[?e :db/ident :world/city ?tx true] + [?e :db/valueType :db.type/string ?tx true] + [?e :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + // Assert that we didn't try smushing non-unique entities. + r#"[[?e :world/city "Vancouver" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[?e :world/city "Vancouver" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + + // Since follow-up must be manually triggered, 1 shouldn't observe any changes yet. + assert_sync!(SyncReport::NoChanges, conn_1, sqlite_1, remote_client); + + // Follow-up sync. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // 2 should now observe merge results from 1. + assert_sync!(SyncReport::LocalFastForward, conn_1, sqlite_1, remote_client); + + assert_transactions!(sqlite_1, conn_1, + // Assert that 1's schema is unchanged. + schema => + "[[?e :db/ident :world/city ?tx true] + [?e :db/valueType :db.type/string ?tx true] + [?e :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + // Assert that we didn't try smushing non-unique entities. + r#"[[?e :world/city "Vancouver" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[?e :world/city "Vancouver" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + } + + #[test] + fn test_schema_with_assertions_merge() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // 1 defines a richer schema than 2. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/ident :person/age + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_1.transact(&mut sqlite_1, r#"[{:person/name "Ivan" :person/age 28}]"#).expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, r#"[{:person/name "Ivan"}]"#).expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + + assert_transactions!(sqlite_2, conn_2, + // Assert that 2's schema has been augmented with 1's. + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [:person/age :db/ident :person/age ?tx true] + [:person/age :db/valueType :db.type/long ?tx true] + [:person/age :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + r#"[[?e :person/name "Ivan" ?tx true] + [?e :person/age 28 ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[?e :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + + // Follow-up sync after merge. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // Assert that 2's sync fast-forwarded remote. + assert_sync!(SyncReport::LocalFastForward, conn_1, sqlite_1, remote_client); + + assert_transactions!(sqlite_1, conn_1, + // Assert that 1's schema remains the same, and it sees the extra Ivan. + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [:person/age :db/ident :person/age ?tx true] + [:person/age :db/valueType :db.type/long ?tx true] + [:person/age :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + r#"[[?e :person/name "Ivan" ?tx true] + [?e :person/age 28 ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"#, + + r#"[[?e :person/name "Ivan" ?tx true] + [?tx :db/txInstant ?ms ?tx true]]"# + ); + } + + #[test] + fn test_non_subset_merge() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // 1 and 2 define different schemas. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/ident :person/age + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/ident :person/sin + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!(SyncReport::Merge(SyncFollowup::FullSync), conn_2, sqlite_2, remote_client); + + assert_transactions!(sqlite_2, conn_2, + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [:person/age :db/ident :person/age ?tx true] + [:person/age :db/valueType :db.type/long ?tx true] + [:person/age :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + "[[:person/sin :db/ident :person/sin ?tx true] + [:person/sin :db/valueType :db.type/long ?tx true] + [:person/sin :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]" + ); + + // Follow-up sync after merge. + assert_sync!(SyncReport::RemoteFastForward, conn_2, sqlite_2, remote_client); + + // Assert that 2's sync moved forward the remote state. + assert_sync!(SyncReport::LocalFastForward, conn_1, sqlite_1, remote_client); + + assert_transactions!(sqlite_1, conn_1, + // Assert that 1's schema is intact, and has been augmented with 2's. + schema => + "[[:person/name :db/ident :person/name ?tx true] + [:person/name :db/valueType :db.type/string ?tx true] + [:person/name :db/cardinality :db.cardinality/one ?tx true] + [:person/age :db/ident :person/age ?tx true] + [:person/age :db/valueType :db.type/long ?tx true] + [:person/age :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]", + + "[[:person/sin :db/ident :person/sin ?tx true] + [:person/sin :db/valueType :db.type/long ?tx true] + [:person/sin :db/cardinality :db.cardinality/one ?tx true] + [?tx :db/txInstant ?ms ?tx true]]" + ); + } + + #[test] + fn test_merge_schema_with_different_attribute_definitions() { + let mut sqlite_1 = new_connection("").unwrap(); + let mut sqlite_2 = new_connection("").unwrap(); + + let mut conn_1 = Conn::connect(&mut sqlite_1).unwrap(); + let mut conn_2 = Conn::connect(&mut sqlite_2).unwrap(); + + let mut remote_client = TestRemoteClient::new(); + + // 1 and 2 define same idents but with different cardinality. + conn_1.transact(&mut sqlite_1, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/ident :person/bff + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one}]").expect("transacted"); + + conn_2.transact(&mut sqlite_2, "[ + {:db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one} + {:db/ident :person/bff + :db/valueType :db.type/string + :db/cardinality :db.cardinality/many}]").expect("transacted"); + + // Fast forward empty remote with a bootstrap and schema transactions. + assert_sync!(SyncReport::RemoteFastForward, conn_1, sqlite_1, remote_client); + + // Merge bootstrap+schema transactions from 1 into 2. + assert_sync!( + error => MentatError::TolstoyError(TolstoyError::NotYetImplemented(_)), + conn_2, sqlite_2, remote_client + ); + } } diff --git a/tolstoy-traits/errors.rs b/tolstoy-traits/errors.rs index 6f9bcff9..5d6739f6 100644 --- a/tolstoy-traits/errors.rs +++ b/tolstoy-traits/errors.rs @@ -9,19 +9,24 @@ // specific language governing permissions and limitations under the License. use std; +use std::error::Error; use rusqlite; use uuid; use hyper; use serde_json; -use db_traits::errors::DbError; - -pub type Result = ::std::result::Result; +use db_traits::errors::{ + DbError, +}; #[derive(Debug, Fail)] pub enum TolstoyError { - #[fail(display = "Received bad response from the server: {}", _0)] - BadServerResponse(String), + #[fail(display = "Received bad response from the remote: {}", _0)] + BadRemoteResponse(String), + + // TODO expand this into concrete error types + #[fail(display = "Received bad remote state: {}", _0)] + BadRemoteState(String), #[fail(display = "encountered more than one metadata value for key: {}", _0)] DuplicateMetadata(String), @@ -46,8 +51,8 @@ pub enum TolstoyError { // It would be better to capture the underlying `rusqlite::Error`, but that type doesn't // implement many useful traits, including `Clone`, `Eq`, and `PartialEq`. - #[fail(display = "SQL error: {}", _0)] - RusqliteError(String), + #[fail(display = "SQL error: {}, cause: {}", _0, _1)] + RusqliteError(String, String), #[fail(display = "{}", _0)] IoError(#[cause] std::io::Error), @@ -76,7 +81,11 @@ impl From for TolstoyError { impl From for TolstoyError { fn from(error: rusqlite::Error) -> TolstoyError { - TolstoyError::RusqliteError(error.to_string()) + let cause = match error.cause() { + Some(e) => e.to_string(), + None => "".to_string() + }; + TolstoyError::RusqliteError(error.to_string(), cause) } } diff --git a/tolstoy/Cargo.toml b/tolstoy/Cargo.toml index 12e9e9db..e44de45f 100644 --- a/tolstoy/Cargo.toml +++ b/tolstoy/Cargo.toml @@ -39,6 +39,12 @@ path = "../db-traits" [dependencies.tolstoy_traits] path = "../tolstoy-traits" +[dependencies.public_traits] +path = "../public-traits" + +[dependencies.mentat_transaction] +path = "../transaction" + [dependencies.rusqlite] version = "0.13" features = ["limits"] diff --git a/tolstoy/README.md b/tolstoy/README.md new file mode 100644 index 00000000..63c0bd0e --- /dev/null +++ b/tolstoy/README.md @@ -0,0 +1,138 @@ +# Tolstoy, a Mentat Sync implementation + +## Current state +This work is partially a proof-of-concept, partially an alpha implementation of how a generic sync might operate on top of a log-oriented storage layer a la Mentat. + +## Overview +### Very briefly +Tolstoy will synchronize a local Mentat database against a remote server, modifying local state if necessary, and uploading changes to the server if necessary. Schema additions are allowed (adding vocabulary). Schema mutations are currently not implemented (changing vocabulary). Mentat's core schema must be the same on all participating clients (i.e. core schema alterations are unsupported). + +**Basic example:** + +Client 1 knows about `name` and `age` of a person. +``` +[ + { + :db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + } + { + :db/ident :person/age + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one + } + {:person/name "Grisha" :person/age 30} +] +``` + +Client 2 doesn't know about `age`, but knows about `ssn`: +``` +[ + { + :db/ident :person/name + :db/valueType :db.type/string + :db/cardinality :db.cardinality/one + } + { + :db/ident :person/ssn + :db/valueType :db.type/long + :db/cardinality :db.cardinality/one + :db/unique :db.unique/identity + :db/index true + } + {:person/name "Grisha" :person/ssn 123} +] +``` +Sync Client 1, then Client 2, then Client 1 again. + +Entity `Grisha` will be "duplicated", since `:person/name` is not defined as unique. +``` +[ + [:person/name :db/ident :person/name] + [:person/name :db/valueType :db.type/string] + [:person/name :db/cardinality :db.cardinality/one] + + [:person/age :db/ident :person/age] + [:person/age :db/valueType :db.type/long] + [:person/age :db/cardinality :db.cardinality/one] + + [:person/ssn :db/ident :person/ssn] + [:person/ssn :db/valueType :db.type/long] + [:person/ssn :db/cardinality :db.cardinality/one] + [:person/ssn :db/unique :db.unique/identity] + [:person/ssn :db/index true] + + [?grisha_one :person/name "Grisha"] + [?grisha_one :person/age 30] + + [?grisha_two :person/name "Grisha"] + [?grisha_two :person/ssn 123] +] +``` + +If, instead, `:person/name` was defined as `:db/unique :db.unique/identity`, then our final state will be: +``` +[ + [...schema datoms...] + + [?united_grisha :person/name "Grisha"] + [?united_grisha :person/age 30] + [?united_grisha :person/ssn 123] +] +``` + +Note that in above examples, symbols such as `?grisha_one` are meant to expand to some internal entitiy id (e.g. 65536). + +### Try it via the CLI +In the Mentat CLI, a `.sync` operation exposes Tolstoy's functionality. Basic usage: `.sync http://path-to-server account-uuid`. Authentication, etc., is not implemented. + +### In more detail... +Syncing is defined in terms of coming to an agreement between local and remote states. A local state is what's currently present on the current instance. A remote state is what's currently present on a server. + +Mentat is a log-oriented store, and so "local" and "remote" are really just two transaction logs. + +Internally, Tolstoy tracks the "locally known remote HEAD" and the "last-synced local transaction", which gives us three basic primitives: +- a shared root, a state which is at the root of both local and remote logs +- incoming changes - what remote changed on top of the shared root +- local changes of the shared root. + +Thus, four kinds of positive-case things might occur during a sync: +- a no-op - there are no incoming changes and local didn't change +- a local fast-forward - there are remote changes, but no local changes +- a remote fast-forward - there are local changes, but no remote changes +- a merge - there are both local and remote changes. + +The first three cases are "trivial" - we either do nothing, or we download and transact remote transactions, or we upload local transactions and advance the remote HEAD. + +The merge case is a little more complicated. During a merging sync, a kind of a rebase is performed: +1. local transactions are moved off of the main timeline +2. remote transactions are transacted on top of the shared root +3. local transactions are transacted + +Generally, intuition about the transactor's behaviour applies to reasoning about Tolstoy's sync as well. If a transaction "makes sense", it will be applied. + +Remote transactions are applied "as-is", with an exception of the `txInstance` - it must be preserved, and so the datom describing it is re-written prior to application to use the `(transaction-tx)` transaction function. + +Local transactions are rewritten to use tempids instead of their entids if they are assertions, and `(lookup-ref a v)` form in cases of retractions - but only if `lookup-ref` is guaranteed to succeed, otherwise retractions are dropped on the floor. Cases where local retractions are dropped: +- we're retracting an entitiy which isn't `:db/unique` +- we're retracting an entitiy which was already retracted by one of the `remote` transactions. + +### Sync report +A sync operation produces either a single or multiple sync reports. + +A single report - internally referred to as an atomic sync report - indicates that sync was able to finish within a single local database transaction. + +Alternatively a non-atomic report is produced. It's a series of regular atomic reports. This indicates that sync required multiple "passes" to complete - e.g. a merge first, then remote fast-forward - and each step was performed within a separate local database transaction. + +## Explicitly not supported - will abort with a NotYetImplemented +This alpha implementation doesn't support some cases, but it recognizes them and gracefully aborts (leaving local and remote states untouched): +- Syncing against a Mentat instance which uses a different core schema version. +- Syncing with schema mutations. Schema additions are fine, but transactions which change a set of attributes that define a user-defined `:db/ident` will cause sync to abort. + +## Misc operational properties +- All sync operations happen in a context of an `InProgress` - an internal Mentat transaction representation. If sync succeeds, all necessary operations are comitted to the underlying database in a single SQLite transaction. Similarly, an aborting sync will simply drop an uncomitted transaction. +- "Follow-up" syncing is currently supported in a basic manner: if there are local changes arising from a merge operation, they are comitted to the local store, and a full sync is requested which is expected to fast-forward remote state in an optimal case, and if we lost the race to the server - to merge the local "merged state" with further remote changes. + +## Server +Tolstoy operates against an instance of [Mentat Sync Prototype Server](https://github.com/rfk/mentat-sync-prototype/tree/480d43d7001cd92455fdbbd374255db458e18b6c). That repository defines a transaction-oriented API, which is all that Tolstoy expects of the server. diff --git a/tolstoy/src/bootstrap.rs b/tolstoy/src/bootstrap.rs new file mode 100644 index 00000000..9920edbb --- /dev/null +++ b/tolstoy/src/bootstrap.rs @@ -0,0 +1,90 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use mentat_core::{ + Keyword, +}; + +use mentat_db::{ + CORE_SCHEMA_VERSION, +}; + +use public_traits::errors::{ + Result, +}; + +use tolstoy_traits::errors::{ + TolstoyError, +}; + +use datoms::{ + DatomsHelper, +}; + +use types::{ + Tx, +}; + +pub struct BootstrapHelper<'a> { + parts: DatomsHelper<'a> +} + +impl<'a> BootstrapHelper<'a> { + pub fn new(assumed_bootstrap_tx: &Tx) -> BootstrapHelper { + BootstrapHelper { + parts: DatomsHelper::new(&assumed_bootstrap_tx.parts), + } + } + + // TODO we could also iterate through our own bootstrap schema definition and check that everything matches + // "version" is used here as a proxy for doing that work + pub fn is_compatible(&self) -> Result { + Ok(self.core_schema_version()? == CORE_SCHEMA_VERSION as i64) + } + + pub fn core_schema_version(&self) -> Result { + match self.parts.ea_lookup( + Keyword::namespaced("db.schema", "core"), + Keyword::namespaced("db.schema", "version"), + ) { + Some(v) => { + // TODO v is just a type tag and a Copy value, we shouldn't need to clone. + match v.clone().into_long() { + Some(v) => Ok(v), + None => bail!(TolstoyError::BadRemoteState("incorrect type for core schema version".to_string())) + } + }, + None => bail!(TolstoyError::BadRemoteState("missing core schema version".to_string())) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use mentat_db::debug::{ + TestConn, + }; + + use debug::txs_after; + + #[test] + fn test_bootstrap_version() { + let remote = TestConn::default(); + + let remote_txs = txs_after(&remote.sqlite, &remote.schema, remote.last_tx_id() - 1); + + assert_eq!(1, remote_txs.len()); + + let bh = BootstrapHelper::new(&remote_txs[0]); + assert_eq!(1, bh.core_schema_version().expect("schema version")); + } +} diff --git a/tolstoy/src/datoms.rs b/tolstoy/src/datoms.rs new file mode 100644 index 00000000..b610d5d2 --- /dev/null +++ b/tolstoy/src/datoms.rs @@ -0,0 +1,67 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use edn::{ + Keyword, +}; + +use core_traits::{ + Entid, + TypedValue, +}; + +use types::TxPart; + +/// A primitive query interface geared toward processing bootstrap-like sets of datoms. +pub struct DatomsHelper<'a> { + parts: &'a Vec, +} + +impl<'a> DatomsHelper<'a> { + pub fn new(parts: &'a Vec) -> DatomsHelper { + DatomsHelper { + parts: parts, + } + } + + // TODO these are obviously quite inefficient + pub fn e_lookup(&self, e: Keyword) -> Option { + // This wraps Keyword (e) in ValueRc (aliased Arc), which is rather expensive. + let kw_e = TypedValue::Keyword(e.into()); + + for part in self.parts { + if kw_e == part.v && part.added { + return Some(part.e); + } + } + + None + } + + pub fn ea_lookup(&self, e: Keyword, a: Keyword) -> Option<&TypedValue> { + let e_e = self.e_lookup(e); + let a_e = self.e_lookup(a); + + if e_e.is_none() || a_e.is_none() { + return None; + } + + let e_e = e_e.unwrap(); + let a_e = a_e.unwrap(); + + for part in self.parts { + if part.e == e_e && part.a == a_e && part.added { + return Some(&part.v); + } + } + + None + } +} diff --git a/tolstoy/src/debug.rs b/tolstoy/src/debug.rs new file mode 100644 index 00000000..4306c9be --- /dev/null +++ b/tolstoy/src/debug.rs @@ -0,0 +1,105 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// TODO could hide this behind #[cfg(test)], since this is only for test use. + +use rusqlite; + +use uuid::Uuid; + +use edn::entities::{ + EntidOrIdent, +}; + +use core_traits::{ + Entid, + TypedValue, +}; + +use mentat_core::{ + HasSchema, + Schema, +}; + +use mentat_db::{ + TypedSQLValue, +}; + +use mentat_db::debug::{ + Datom, + Datoms, + transactions_after, +}; + +use types::{ + Tx, + TxPart, +}; + +/// A rough equivalent of mentat_db::debug::transactions_after +/// for Tolstoy's Tx type. +pub fn txs_after(sqlite: &rusqlite::Connection, schema: &Schema, after: Entid) -> Vec { + let transactions = transactions_after( + sqlite, schema, after + ).expect("remote transactions"); + + let mut txs = vec![]; + + for transaction in transactions.0 { + let mut tx = Tx { + tx: Uuid::new_v4(), + parts: vec![], + }; + + for datom in &transaction.0 { + let e = match datom.e { + EntidOrIdent::Entid(ref e) => *e, + _ => panic!(), + }; + let a = match datom.a { + EntidOrIdent::Entid(ref a) => *a, + EntidOrIdent::Ident(ref a) => schema.get_entid(a).unwrap().0, + }; + + tx.parts.push(TxPart { + partitions: None, + e: e, + a: a, + v: TypedValue::from_edn_value(&datom.v).unwrap(), + tx: datom.tx, + added: datom.added.unwrap() + }); + } + + txs.push(tx); + } + + txs +} + +pub fn part_to_datom(schema: &Schema, part: &TxPart) -> Datom { + Datom { + e: match schema.get_ident(part.e) { + Some(ident) => EntidOrIdent::Ident(ident.clone()), + None => EntidOrIdent::Entid(part.e), + }, + a: match schema.get_ident(part.a) { + Some(ident) => EntidOrIdent::Ident(ident.clone()), + None => EntidOrIdent::Entid(part.a), + }, + v: TypedValue::to_edn_value_pair(&part.v).0, + tx: part.tx, + added: Some(part.added), + } +} + +pub fn parts_to_datoms(schema: &Schema, parts: &Vec) -> Datoms { + Datoms(parts.iter().map(|p| part_to_datom(schema, p)).collect()) +} diff --git a/tolstoy/src/lib.rs b/tolstoy/src/lib.rs index bed77332..0b78a7b4 100644 --- a/tolstoy/src/lib.rs +++ b/tolstoy/src/lib.rs @@ -27,22 +27,50 @@ extern crate serde; extern crate serde_cbor; extern crate serde_json; -// See https://github.com/rust-lang/rust/issues/44342#issuecomment-376010077. -#[cfg_attr(test, macro_use)] extern crate log; -#[cfg_attr(test, macro_use)] extern crate mentat_db; +extern crate log; +extern crate mentat_db; extern crate mentat_core; extern crate db_traits; #[macro_use] extern crate core_traits; +extern crate public_traits; extern crate rusqlite; extern crate uuid; extern crate tolstoy_traits; +extern crate mentat_transaction; -pub mod schema; +pub mod bootstrap; pub mod metadata; -pub mod tx_processor; +pub use metadata::{ + PartitionsTable, + SyncMetadata, +}; +mod datoms; +pub mod debug; +pub mod remote_client; +pub use remote_client::{ + RemoteClient, +}; +pub mod schema; pub mod syncer; +pub use syncer::{ + Syncer, + SyncReport, + SyncResult, + SyncFollowup, +}; +mod tx_uploader; +pub mod logger; pub mod tx_mapper; -pub use syncer::Syncer; +pub use tx_mapper::{ + TxMapper, +}; +pub mod tx_processor; +pub mod types; +pub use types::{ + Tx, + TxPart, + GlobalTransactionLog, +}; diff --git a/tolstoy/src/logger.rs b/tolstoy/src/logger.rs new file mode 100644 index 00000000..41e072f0 --- /dev/null +++ b/tolstoy/src/logger.rs @@ -0,0 +1,32 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// TODO: use `log` crate. + +// TODO it would be nice to be able to pass +// in a logger into Syncer::flow; would allow for a "debug mode" +// and getting useful logs out of clients. +// See https://github.com/mozilla/mentat/issues/571 +// Below is some debug Android-friendly logging: + +// use std::os::raw::c_char; +// use std::os::raw::c_int; +// use std::ffi::CString; +// pub const ANDROID_LOG_DEBUG: i32 = 3; +// extern { pub fn __android_log_write(prio: c_int, tag: *const c_char, text: *const c_char) -> c_int; } + +pub fn d(message: &str) { + println!("d: {}", message); + // let message = CString::new(message).unwrap(); + // let message = message.as_ptr(); + // let tag = CString::new("RustyToodle").unwrap(); + // let tag = tag.as_ptr(); + // unsafe { __android_log_write(ANDROID_LOG_DEBUG, tag, message) }; +} diff --git a/tolstoy/src/metadata.rs b/tolstoy/src/metadata.rs index 8eeb58ef..f0bc0a33 100644 --- a/tolstoy/src/metadata.rs +++ b/tolstoy/src/metadata.rs @@ -13,21 +13,57 @@ use rusqlite; use uuid::Uuid; +use core_traits::{ + Entid, +}; + use schema; -use tolstoy_traits::errors::{ - TolstoyError, + +use public_traits::errors::{ Result, }; -pub trait HeadTrackable { - fn remote_head(tx: &rusqlite::Transaction) -> Result; - fn set_remote_head(tx: &rusqlite::Transaction, uuid: &Uuid) -> Result<()>; +use tolstoy_traits::errors::{ + TolstoyError, +}; + +use mentat_db::{ + Partition, + PartitionMap, + db, +}; + +use types::{ + LocalGlobalTxMapping, +}; + +use TxMapper; + +// Could be Copy, but that might change +pub struct SyncMetadata { + // Local head: latest transaction that we have in the store, + // but with one caveat: its tx might will not be mapped if it's + // never been synced successfully. + // In other words: if latest tx isn't mapped, then HEAD moved + // since last sync and server needs to be updated. + pub root: Entid, + pub head: Entid, } -pub struct SyncMetadataClient {} +pub enum PartitionsTable { + Core, + Tolstoy, +} -impl HeadTrackable for SyncMetadataClient { - fn remote_head(tx: &rusqlite::Transaction) -> Result { +impl SyncMetadata { + pub fn new(root: Entid, head: Entid) -> SyncMetadata { + SyncMetadata { + root: root, + head: head, + } + } + + pub fn remote_head(tx: &rusqlite::Transaction) -> Result { tx.query_row( "SELECT value FROM tolstoy_metadata WHERE key = ?", &[&schema::REMOTE_HEAD_KEY], |r| { @@ -37,7 +73,7 @@ impl HeadTrackable for SyncMetadataClient { )?.map_err(|e| e.into()) } - fn set_remote_head(tx: &rusqlite::Transaction, uuid: &Uuid) -> Result<()> { + pub fn set_remote_head(tx: &rusqlite::Transaction, uuid: &Uuid) -> Result<()> { let uuid_bytes = uuid.as_bytes().to_vec(); let updated = tx.execute("UPDATE tolstoy_metadata SET value = ? WHERE key = ?", &[&uuid_bytes, &schema::REMOTE_HEAD_KEY])?; @@ -46,25 +82,135 @@ impl HeadTrackable for SyncMetadataClient { } Ok(()) } + + pub fn set_remote_head_and_map(tx: &mut rusqlite::Transaction, mapping: LocalGlobalTxMapping) -> Result<()> { + SyncMetadata::set_remote_head(tx, mapping.remote)?; + TxMapper::set_lg_mapping(tx, mapping)?; + Ok(()) + } + + // TODO Functions below start to blur the line between mentat-proper and tolstoy... + pub fn get_partitions(tx: &rusqlite::Transaction, parts_table: PartitionsTable) -> Result { + match parts_table { + PartitionsTable::Core => { + db::read_partition_map(tx).map_err(|e| e.into()) + }, + PartitionsTable::Tolstoy => { + let mut stmt: ::rusqlite::Statement = tx.prepare("SELECT part, start, end, idx, allow_excision FROM tolstoy_parts")?; + let m: Result = stmt.query_and_then(&[], |row| -> Result<(String, Partition)> { + Ok((row.get_checked(0)?, Partition::new(row.get_checked(1)?, row.get_checked(2)?, row.get_checked(3)?, row.get_checked(4)?))) + })?.collect(); + m + } + } + } + + pub fn root_and_head_tx(tx: &rusqlite::Transaction) -> Result<(Entid, Entid)> { + let mut stmt: ::rusqlite::Statement = tx.prepare("SELECT tx FROM timelined_transactions WHERE timeline = 0 GROUP BY tx ORDER BY tx")?; + let txs: Vec<_> = stmt.query_and_then(&[], |row| -> Result { + Ok(row.get_checked(0)?) + })?.collect(); + + let mut txs = txs.into_iter(); + + let root_tx = match txs.nth(0) { + None => bail!(TolstoyError::UnexpectedState(format!("Could not get root tx"))), + Some(t) => t? + }; + + match txs.last() { + None => Ok((root_tx, root_tx)), + Some(t) => Ok((root_tx, t?)) + } + } + + pub fn local_txs(db_tx: &rusqlite::Transaction, after: Option) -> Result> { + let after_clause = match after { + Some(t) => format!("WHERE timeline = 0 AND tx > {}", t), + None => format!("WHERE timeline = 0") + }; + let mut stmt: ::rusqlite::Statement = db_tx.prepare(&format!("SELECT tx FROM timelined_transactions {} GROUP BY tx ORDER BY tx", after_clause))?; + let txs: Vec<_> = stmt.query_and_then(&[], |row| -> Result { + Ok(row.get_checked(0)?) + })?.collect(); + + let mut all = Vec::with_capacity(txs.len()); + for tx in txs { + all.push(tx?); + } + + Ok(all) + } + + pub fn is_tx_empty(db_tx: &rusqlite::Transaction, tx_id: Entid) -> Result { + let count = db_tx.query_row("SELECT count(rowid) FROM timelined_transactions WHERE timeline = 0 AND tx = ? AND e != ?", &[&tx_id, &tx_id], |row| -> Result { + Ok(row.get_checked(0)?) + })?; + Ok(count? == 0) + } + + pub fn has_entity_assertions_in_tx(db_tx: &rusqlite::Transaction, e: Entid, tx_id: Entid) -> Result { + let count = db_tx.query_row("SELECT count(rowid) FROM timelined_transactions WHERE timeline = 0 AND tx = ? AND e = ?", &[&tx_id, &e], |row| -> Result { + Ok(row.get_checked(0)?) + })?; + Ok(count? > 0) + } } #[cfg(test)] mod tests { use super::*; + use mentat_db::db; + #[test] fn test_get_remote_head_default() { - let mut conn = schema::tests::setup_conn(); - let tx = conn.transaction().expect("db tx"); - assert_eq!(Uuid::nil(), SyncMetadataClient::remote_head(&tx).expect("fetch succeeded")); + let mut conn = schema::tests::setup_conn_bare(); + let tx = schema::tests::setup_tx(&mut conn); + assert_eq!(Uuid::nil(), SyncMetadata::remote_head(&tx).expect("fetch succeeded")); } #[test] fn test_set_and_get_remote_head() { - let mut conn = schema::tests::setup_conn(); + let mut conn = schema::tests::setup_conn_bare(); + let tx = schema::tests::setup_tx(&mut conn); let uuid = Uuid::new_v4(); - let tx = conn.transaction().expect("db tx"); - SyncMetadataClient::set_remote_head(&tx, &uuid).expect("update succeeded"); - assert_eq!(uuid, SyncMetadataClient::remote_head(&tx).expect("fetch succeeded")); + SyncMetadata::set_remote_head(&tx, &uuid).expect("update succeeded"); + assert_eq!(uuid, SyncMetadata::remote_head(&tx).expect("fetch succeeded")); + } + + #[test] + fn test_root_and_head_tx() { + let mut conn = schema::tests::setup_conn_bare(); + db::ensure_current_version(&mut conn).expect("mentat db init"); + let db_tx = conn.transaction().expect("transaction"); + + let (root_tx, last_tx) = SyncMetadata::root_and_head_tx(&db_tx).expect("last tx"); + assert_eq!(268435456, root_tx); + assert_eq!(268435456, last_tx); + + // These are determenistic, but brittle. + // Inserting a tx 268435457 at time 1529971773701734 + // 268435457|3|1529971773701734|268435457|1|4 + // ... which defines entity ':person/name'... + // 65536|1|:person/name|268435457|1|13 + // ... which has valueType of string + // 65536|7|27|268435457|1|0 + // ... which is unique... + // 65536|9|36|268435457|1|0 + // ... ident + // 65536|11|1|268435457|1|1 + + // last attribute is the timeline (0). + + db_tx.execute("INSERT INTO timelined_transactions VALUES (?, ?, ?, ?, ?, ?, ?)", &[&268435457, &3, &1529971773701734_i64, &268435457, &1, &4, &0]).expect("inserted"); + db_tx.execute("INSERT INTO timelined_transactions VALUES (?, ?, ?, ?, ?, ?, ?)", &[&65536, &1, &":person/name", &268435457, &1, &13, &0]).expect("inserted"); + db_tx.execute("INSERT INTO timelined_transactions VALUES (?, ?, ?, ?, ?, ?, ?)", &[&65536, &7, &27, &268435457, &1, &0, &0]).expect("inserted"); + db_tx.execute("INSERT INTO timelined_transactions VALUES (?, ?, ?, ?, ?, ?, ?)", &[&65536, &9, &36, &268435457, &1, &0, &0]).expect("inserted"); + db_tx.execute("INSERT INTO timelined_transactions VALUES (?, ?, ?, ?, ?, ?, ?)", &[&65536, &11, &1, &268435457, &1, &1, &0]).expect("inserted"); + + let (root_tx, last_tx) = SyncMetadata::root_and_head_tx(&db_tx).expect("last tx"); + assert_eq!(268435456, root_tx); + assert_eq!(268435457, last_tx); } } diff --git a/tolstoy/src/remote_client.rs b/tolstoy/src/remote_client.rs new file mode 100644 index 00000000..6cf0bd06 --- /dev/null +++ b/tolstoy/src/remote_client.rs @@ -0,0 +1,341 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#![allow(dead_code)] + +use std; + +use futures::{future, Future, Stream}; +use hyper; +// TODO: enable TLS support; hurdle is cross-compiling openssl for Android. +// See https://github.com/mozilla/mentat/issues/569 +// use hyper_tls; +use hyper::{ + Method, + Request, + StatusCode, + Error as HyperError +}; +use hyper::header::{ + ContentType, +}; +// TODO: https://github.com/mozilla/mentat/issues/570 +// use serde_cbor; +use serde_json; +use tokio_core::reactor::Core; +use uuid::Uuid; + +use public_traits::errors::{ + Result, +}; + +use logger::d; + +use types::{ + Tx, + TxPart, + GlobalTransactionLog, +}; + +#[derive(Serialize,Deserialize)] +struct SerializedHead { + head: Uuid +} + +#[derive(Serialize)] +struct SerializedTransaction<'a> { + parent: &'a Uuid, + chunks: &'a Vec +} + +#[derive(Deserialize)] +struct DeserializableTransaction { + parent: Uuid, + chunks: Vec, + id: Uuid, + seq: i64, +} + +#[derive(Deserialize)] +struct SerializedTransactions { + limit: i64, + from: Uuid, + transactions: Vec, +} + +pub struct RemoteClient { + base_uri: String, + user_uuid: Uuid, +} + +impl RemoteClient { + pub fn new(base_uri: String, user_uuid: Uuid) -> Self { + RemoteClient { + base_uri: base_uri, + user_uuid: user_uuid, + } + } + + fn bound_base_uri(&self) -> String { + // TODO escaping + format!("{}/{}", self.base_uri, self.user_uuid) + } + + // TODO what we want is a method that returns a deserialized json structure. + // It'll need a type T so that consumers can specify what downloaded json will + // map to. I ran into borrow issues doing that - probably need to restructure + // this and use PhantomData markers or somesuch. + // But for now, we get code duplication. + fn get_uuid(&self, uri: String) -> Result { + let mut core = Core::new()?; + // TODO https://github.com/mozilla/mentat/issues/569 + // let client = hyper::Client::configure() + // .connector(hyper_tls::HttpsConnector::new(4, &core.handle()).unwrap()) + // .build(&core.handle()); + let client = hyper::Client::new(&core.handle()); + + d(&format!("client")); + + let uri = uri.parse()?; + + d(&format!("parsed uri {:?}", uri)); + let work = client.get(uri).and_then(|res| { + println!("Response: {}", res.status()); + + res.body().concat2().and_then(move |body| { + let json: SerializedHead = serde_json::from_slice(&body).map_err(|e| { + std::io::Error::new(std::io::ErrorKind::Other, e) + })?; + Ok(json) + }) + }); + + d(&format!("running...")); + + let head_json = core.run(work)?; + d(&format!("got head: {:?}", &head_json.head)); + Ok(head_json.head) + } + + fn put(&self, uri: String, payload: T, expected: StatusCode) -> Result<()> + where hyper::Body: std::convert::From, { + let mut core = Core::new()?; + // TODO https://github.com/mozilla/mentat/issues/569 + // let client = hyper::Client::configure() + // .connector(hyper_tls::HttpsConnector::new(4, &core.handle()).unwrap()) + // .build(&core.handle()); + let client = hyper::Client::new(&core.handle()); + + let uri = uri.parse()?; + + d(&format!("PUT {:?}", uri)); + + let mut req = Request::new(Method::Put, uri); + req.headers_mut().set(ContentType::json()); + req.set_body(payload); + + let put = client.request(req).and_then(|res| { + let status_code = res.status(); + + if status_code != expected { + d(&format!("bad put response: {:?}", status_code)); + future::err(HyperError::Status) + } else { + future::ok(()) + } + }); + + core.run(put)?; + Ok(()) + } + + fn get_transactions(&self, parent_uuid: &Uuid) -> Result> { + let mut core = Core::new()?; + // TODO https://github.com/mozilla/mentat/issues/569 + // let client = hyper::Client::configure() + // .connector(hyper_tls::HttpsConnector::new(4, &core.handle()).unwrap()) + // .build(&core.handle()); + let client = hyper::Client::new(&core.handle()); + + d(&format!("client")); + + let uri = format!("{}/transactions?from={}", self.bound_base_uri(), parent_uuid); + let uri = uri.parse()?; + + d(&format!("parsed uri {:?}", uri)); + + let work = client.get(uri).and_then(|res| { + println!("Response: {}", res.status()); + + res.body().concat2().and_then(move |body| { + let json: SerializedTransactions = serde_json::from_slice(&body).map_err(|e| { + std::io::Error::new(std::io::ErrorKind::Other, e) + })?; + Ok(json) + }) + }); + + d(&format!("running...")); + + let transactions_json = core.run(work)?; + d(&format!("got transactions: {:?}", &transactions_json.transactions)); + Ok(transactions_json.transactions) + } + + fn get_chunks(&self, transaction_uuid: &Uuid) -> Result> { + let mut core = Core::new()?; + // TODO https://github.com/mozilla/mentat/issues/569 + // let client = hyper::Client::configure() + // .connector(hyper_tls::HttpsConnector::new(4, &core.handle()).unwrap()) + // .build(&core.handle()); + let client = hyper::Client::new(&core.handle()); + + d(&format!("client")); + + let uri = format!("{}/transactions/{}", self.bound_base_uri(), transaction_uuid); + let uri = uri.parse()?; + + d(&format!("parsed uri {:?}", uri)); + + let work = client.get(uri).and_then(|res| { + println!("Response: {}", res.status()); + + res.body().concat2().and_then(move |body| { + let json: DeserializableTransaction = serde_json::from_slice(&body).map_err(|e| { + std::io::Error::new(std::io::ErrorKind::Other, e) + })?; + Ok(json) + }) + }); + + d(&format!("running...")); + + let transaction_json = core.run(work)?; + d(&format!("got transaction chunks: {:?}", &transaction_json.chunks)); + Ok(transaction_json.chunks) + } + + fn get_chunk(&self, chunk_uuid: &Uuid) -> Result { + let mut core = Core::new()?; + // TODO https://github.com/mozilla/mentat/issues/569 + // let client = hyper::Client::configure() + // .connector(hyper_tls::HttpsConnector::new(4, &core.handle()).unwrap()) + // .build(&core.handle()); + let client = hyper::Client::new(&core.handle()); + + d(&format!("client")); + + let uri = format!("{}/chunks/{}", self.bound_base_uri(), chunk_uuid); + let uri = uri.parse()?; + + d(&format!("parsed uri {:?}", uri)); + + let work = client.get(uri).and_then(|res| { + println!("Response: {}", res.status()); + + res.body().concat2().and_then(move |body| { + let json: TxPart = serde_json::from_slice(&body).map_err(|e| { + std::io::Error::new(std::io::ErrorKind::Other, e) + })?; + Ok(json) + }) + }); + + d(&format!("running...")); + + let chunk = core.run(work)?; + d(&format!("got transaction chunk: {:?}", &chunk)); + Ok(chunk) + } +} + +impl GlobalTransactionLog for RemoteClient { + fn head(&self) -> Result { + let uri = format!("{}/head", self.bound_base_uri()); + self.get_uuid(uri) + } + + fn set_head(&mut self, uuid: &Uuid) -> Result<()> { + // {"head": uuid} + let head = SerializedHead { + head: uuid.clone() + }; + + let uri = format!("{}/head", self.bound_base_uri()); + let json = serde_json::to_string(&head)?; + d(&format!("serialized head: {:?}", json)); + self.put(uri, json, StatusCode::NoContent) + } + + /// Slurp transactions and datoms after `tx`, returning them as owned data. + /// + /// This is inefficient but convenient for development. + fn transactions_after(&self, tx: &Uuid) -> Result> { + let new_txs = self.get_transactions(tx)?; + let mut tx_list = Vec::new(); + + for tx in new_txs { + let mut tx_parts = Vec::new(); + let chunks = self.get_chunks(&tx)?; + + // We pass along all of the downloaded parts, including transaction's + // metadata datom. Transactor is expected to do the right thing, and + // use txInstant from one of our datoms. + for chunk in chunks { + let part = self.get_chunk(&chunk)?; + tx_parts.push(part); + } + + tx_list.push(Tx { + tx: tx.into(), + parts: tx_parts + }); + } + + d(&format!("got tx list: {:?}", &tx_list)); + + Ok(tx_list) + } + + fn put_transaction(&mut self, transaction_uuid: &Uuid, parent_uuid: &Uuid, chunks: &Vec) -> Result<()> { + // {"parent": uuid, "chunks": [chunk1, chunk2...]} + let transaction = SerializedTransaction { + parent: parent_uuid, + chunks: chunks + }; + + let uri = format!("{}/transactions/{}", self.bound_base_uri(), transaction_uuid); + let json = serde_json::to_string(&transaction)?; + d(&format!("serialized transaction: {:?}", json)); + self.put(uri, json, StatusCode::Created) + } + + fn put_chunk(&mut self, chunk_uuid: &Uuid, payload: &TxPart) -> Result<()> { + let payload: String = serde_json::to_string(payload)?; + let uri = format!("{}/chunks/{}", self.bound_base_uri(), chunk_uuid); + d(&format!("serialized chunk: {:?}", payload)); + // TODO don't want to clone every datom! + self.put(uri, payload, StatusCode::Created) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::str::FromStr; + + #[test] + fn test_remote_client_bound_uri() { + let user_uuid = Uuid::from_str(&"316ea470-ce35-4adf-9c61-e0de6e289c59").expect("uuid"); + let server_uri = String::from("https://example.com/api/0.1"); + let remote_client = RemoteClient::new(server_uri, user_uuid); + assert_eq!("https://example.com/api/0.1/316ea470-ce35-4adf-9c61-e0de6e289c59", remote_client.bound_base_uri()); + } +} diff --git a/tolstoy/src/schema.rs b/tolstoy/src/schema.rs index cc7593d3..066b4181 100644 --- a/tolstoy/src/schema.rs +++ b/tolstoy/src/schema.rs @@ -9,30 +9,44 @@ // specific language governing permissions and limitations under the License. use rusqlite; -use tolstoy_traits::errors::Result; -pub static REMOTE_HEAD_KEY: &str = r#"remote_head"#; +use mentat_db::V1_PARTS as BOOTSTRAP_PARTITIONS; + +use public_traits::errors::{ + Result, +}; + +pub static REMOTE_HEAD_KEY: &str = r"remote_head"; +pub static PARTITION_DB: &str = r":db.part/db"; +pub static PARTITION_USER: &str = r":db.part/user"; +pub static PARTITION_TX: &str = r":db.part/tx"; lazy_static! { /// SQL statements to be executed, in order, to create the Tolstoy SQL schema (version 1). + /// "tolstoy_parts" records what the partitions were at the end of last sync, and is used + /// as a "root partition" during renumbering (a three-way merge of partitions). #[cfg_attr(rustfmt, rustfmt_skip)] static ref SCHEMA_STATEMENTS: Vec<&'static str> = { vec![ - r#"CREATE TABLE IF NOT EXISTS tolstoy_tu (tx INTEGER PRIMARY KEY, uuid BLOB NOT NULL UNIQUE) WITHOUT ROWID"#, - r#"CREATE TABLE IF NOT EXISTS tolstoy_metadata (key BLOB NOT NULL UNIQUE, value BLOB NOT NULL)"#, - r#"CREATE INDEX IF NOT EXISTS idx_tolstoy_tu_ut ON tolstoy_tu (uuid, tx)"#, + "CREATE TABLE IF NOT EXISTS tolstoy_tu (tx INTEGER PRIMARY KEY, uuid BLOB NOT NULL UNIQUE) WITHOUT ROWID", + "CREATE TABLE IF NOT EXISTS tolstoy_metadata (key BLOB NOT NULL UNIQUE, value BLOB NOT NULL)", + "CREATE TABLE IF NOT EXISTS tolstoy_parts (part TEXT NOT NULL PRIMARY KEY, start INTEGER NOT NULL, end INTEGER NOT NULL, idx INTEGER NOT NULL, allow_excision SMALLINT NOT NULL)", + "CREATE INDEX IF NOT EXISTS idx_tolstoy_tu_ut ON tolstoy_tu (uuid, tx)", ] }; } -pub fn ensure_current_version(conn: &mut rusqlite::Connection) -> Result<()> { - let tx = conn.transaction()?; - +pub fn ensure_current_version(tx: &mut rusqlite::Transaction) -> Result<()> { for statement in (&SCHEMA_STATEMENTS).iter() { tx.execute(statement, &[])?; } + // Initial partition information is what we'd see at bootstrap, and is used during first sync. + for (name, start, end, index, allow_excision) in BOOTSTRAP_PARTITIONS.iter() { + tx.execute("INSERT OR IGNORE INTO tolstoy_parts VALUES (?, ?, ?, ?, ?)", &[&name.to_string(), start, end, index, allow_excision])?; + } + tx.execute("INSERT OR IGNORE INTO tolstoy_metadata (key, value) VALUES (?, zeroblob(16))", &[&REMOTE_HEAD_KEY])?; - tx.commit().map_err(|e| e.into()) + Ok(()) } #[cfg(test)] @@ -40,7 +54,14 @@ pub mod tests { use super::*; use uuid::Uuid; - fn setup_conn_bare() -> rusqlite::Connection { + use metadata::{ + PartitionsTable, + SyncMetadata, + }; + + use mentat_db::USER0; + + pub fn setup_conn_bare() -> rusqlite::Connection { let conn = rusqlite::Connection::open_in_memory().unwrap(); conn.execute_batch(" @@ -54,19 +75,24 @@ pub mod tests { conn } - pub fn setup_conn() -> rusqlite::Connection { - let mut conn = setup_conn_bare(); - ensure_current_version(&mut conn).expect("connection setup"); - conn + pub fn setup_tx_bare<'a>(conn: &'a mut rusqlite::Connection) -> rusqlite::Transaction<'a> { + conn.transaction().expect("tx") + } + + pub fn setup_tx<'a>(conn: &'a mut rusqlite::Connection) -> rusqlite::Transaction<'a> { + let mut tx = conn.transaction().expect("tx"); + ensure_current_version(&mut tx).expect("connection setup"); + tx } #[test] fn test_empty() { let mut conn = setup_conn_bare(); + let mut tx = setup_tx_bare(&mut conn); - assert!(ensure_current_version(&mut conn).is_ok()); + assert!(ensure_current_version(&mut tx).is_ok()); - let mut stmt = conn.prepare("SELECT key FROM tolstoy_metadata WHERE value = zeroblob(16)").unwrap(); + let mut stmt = tx.prepare("SELECT key FROM tolstoy_metadata WHERE value = zeroblob(16)").unwrap(); let mut keys_iter = stmt.query_map(&[], |r| r.get(0)).expect("query works"); let first: Result = keys_iter.next().unwrap().map_err(|e| e.into()); @@ -77,32 +103,46 @@ pub mod tests { }, (_, _) => { panic!("Wrong number of results."); }, } + + let partitions = SyncMetadata::get_partitions(&tx, PartitionsTable::Tolstoy).unwrap(); + + assert_eq!(partitions.len(), BOOTSTRAP_PARTITIONS.len()); + + for (name, start, end, index, allow_excision) in BOOTSTRAP_PARTITIONS.iter() { + let p = partitions.get(&name.to_string()).unwrap(); + assert_eq!(p.start, *start); + assert_eq!(p.end, *end); + assert_eq!(p.next_entid(), *index); + assert_eq!(p.allow_excision, *allow_excision); + } } #[test] fn test_non_empty() { let mut conn = setup_conn_bare(); + let mut tx = setup_tx_bare(&mut conn); - assert!(ensure_current_version(&mut conn).is_ok()); + assert!(ensure_current_version(&mut tx).is_ok()); let test_uuid = Uuid::new_v4(); { - let tx = conn.transaction().unwrap(); let uuid_bytes = test_uuid.as_bytes().to_vec(); match tx.execute("UPDATE tolstoy_metadata SET value = ? WHERE key = ?", &[&uuid_bytes, &REMOTE_HEAD_KEY]) { Err(e) => panic!("Error running an update: {}", e), _ => () } - match tx.commit() { - Err(e) => panic!("Error committing an update: {}", e), - _ => () - } } - assert!(ensure_current_version(&mut conn).is_ok()); + let new_idx = USER0 + 1; + match tx.execute("UPDATE tolstoy_parts SET idx = ? WHERE part = ?", &[&new_idx, &PARTITION_USER]) { + Err(e) => panic!("Error running an update: {}", e), + _ => () + } + + assert!(ensure_current_version(&mut tx).is_ok()); // Check that running ensure_current_version on an initialized conn doesn't change anything. - let mut stmt = conn.prepare("SELECT value FROM tolstoy_metadata").unwrap(); + let mut stmt = tx.prepare("SELECT value FROM tolstoy_metadata").unwrap(); let mut values_iter = stmt.query_map(&[], |r| { let raw_uuid: Vec = r.get(0); Uuid::from_bytes(raw_uuid.as_slice()).unwrap() @@ -116,5 +156,13 @@ pub mod tests { }, (_, _) => { panic!("Wrong number of results."); }, } + + let partitions = SyncMetadata::get_partitions(&tx, PartitionsTable::Tolstoy).unwrap(); + + assert_eq!(partitions.len(), BOOTSTRAP_PARTITIONS.len()); + + let user_partition = partitions.get(PARTITION_USER).unwrap(); + assert_eq!(user_partition.start, USER0); + assert_eq!(user_partition.next_entid(), new_idx); } } diff --git a/tolstoy/src/syncer.rs b/tolstoy/src/syncer.rs index 6b535094..6d169503 100644 --- a/tolstoy/src/syncer.rs +++ b/tolstoy/src/syncer.rs @@ -8,494 +8,864 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std; -use std::collections::HashMap; +use std::fmt; + +use std::collections::HashSet; -use futures::{future, Future, Stream}; -use hyper; -// TODO: enable TLS support; hurdle is cross-compiling openssl for Android. -// See https://github.com/mozilla/mentat/issues/569 -// use hyper_tls; -use hyper::{Method, Request, StatusCode, Error as HyperError}; -use hyper::header::{ContentType}; use rusqlite; -// TODO: https://github.com/mozilla/mentat/issues/570 -// use serde_cbor; -use serde_json; -use tokio_core::reactor::Core; use uuid::Uuid; use core_traits::{ Entid, + KnownEntid, + TypedValue, }; -use metadata::SyncMetadataClient; -use metadata::HeadTrackable; -use schema::ensure_current_version; +use edn::{ + PlainSymbol, +}; +use edn::entities::{ + TxFunction, + EntityPlace, + LookupRef, +}; +use mentat_db::{ + CORE_SCHEMA_VERSION, + timelines, + debug, + entids, + PartitionMap, +}; +use mentat_transaction::{ + InProgress, + TermBuilder, + Queryable, +}; -use tolstoy_traits::errors::{ - TolstoyError, +use mentat_transaction::entity_builder::{ + BuildTerms, +}; + +use mentat_transaction::query::{ + QueryInputs, + Variable, +}; + +use bootstrap::{ + BootstrapHelper, +}; + +use public_traits::errors::{ Result, }; +use tolstoy_traits::errors::{ + TolstoyError, +}; +use metadata::{ + PartitionsTable, + SyncMetadata, +}; +use schema::{ + ensure_current_version, +}; +use tx_uploader::TxUploader; use tx_processor::{ Processor, TxReceiver, +}; +use tx_mapper::{ + TxMapper, +}; +use types::{ + LocalTx, + Tx, TxPart, + GlobalTransactionLog, }; -use tx_mapper::TxMapper; - -// TODO it would be nice to be able to pass -// in a logger into Syncer::flow; would allow for a "debug mode" -// and getting useful logs out of clients. -// See https://github.com/mozilla/mentat/issues/571 -// Below is some debug Android-friendly logging: - -// use std::os::raw::c_char; -// use std::os::raw::c_int; -// use std::ffi::CString; -// pub const ANDROID_LOG_DEBUG: i32 = 3; -// extern { pub fn __android_log_write(prio: c_int, tag: *const c_char, text: *const c_char) -> c_int; } - -pub fn d(message: &str) { - println!("d: {}", message); - // let message = CString::new(message).unwrap(); - // let message = message.as_ptr(); - // let tag = CString::new("RustyToodle").unwrap(); - // let tag = tag.as_ptr(); - // unsafe { __android_log_write(ANDROID_LOG_DEBUG, tag, message) }; -} +use logger::d; pub struct Syncer {} -// TODO this is sub-optimal, we don't need to walk the table -// to query the last thing in it w/ an index on tx!! -// but it's the hammer at hand! -// See https://github.com/mozilla/mentat/issues/572 -struct InquiringTxReceiver { - pub last_tx: Option, - pub is_done: bool, +#[derive(Debug,PartialEq,Clone)] +pub enum SyncFollowup { + None, + FullSync, } -impl InquiringTxReceiver { - fn new() -> InquiringTxReceiver { - InquiringTxReceiver { - last_tx: None, - is_done: false, +impl fmt::Display for SyncFollowup { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SyncFollowup::None => write!(f, "None"), + SyncFollowup::FullSync => write!(f, "Full sync"), } } } -impl TxReceiver for InquiringTxReceiver { - fn tx(&mut self, tx_id: Entid, _datoms: &mut T) -> Result<()> - where T: Iterator { - self.last_tx = Some(tx_id); - Ok(()) - } - - fn done(&mut self) -> Result<()> { - self.is_done = true; - Ok(()) - } +#[derive(Debug,PartialEq,Clone)] +pub enum SyncReport { + IncompatibleRemoteBootstrap(i64, i64), + BadRemoteState(String), + NoChanges, + RemoteFastForward, + LocalFastForward, + Merge(SyncFollowup), } -struct UploadingTxReceiver<'c> { - pub tx_temp_uuids: HashMap, - pub is_done: bool, - remote_client: &'c RemoteClient, - remote_head: &'c Uuid, - rolling_temp_head: Option, +pub enum SyncResult { + Atomic(SyncReport), + NonAtomic(Vec), } -impl<'c> UploadingTxReceiver<'c> { - fn new(client: &'c RemoteClient, remote_head: &'c Uuid) -> UploadingTxReceiver<'c> { - UploadingTxReceiver { - tx_temp_uuids: HashMap::new(), - remote_client: client, - remote_head: remote_head, - rolling_temp_head: None, - is_done: false - } - } -} - -impl<'c> TxReceiver for UploadingTxReceiver<'c> { - fn tx(&mut self, tx_id: Entid, datoms: &mut T) -> Result<()> - where T: Iterator { - // Yes, we generate a new UUID for a given Tx, even if we might - // already have one mapped locally. Pre-existing local mapping will - // be replaced if this sync succeeds entirely. - // If we're seeing this tx again, it implies that previous attempt - // to sync didn't update our local head. Something went wrong last time, - // and it's unwise to try to re-use these remote tx mappings. - // We just leave garbage txs to be GC'd on the server. - let tx_uuid = Uuid::new_v4(); - self.tx_temp_uuids.insert(tx_id, tx_uuid); - let mut tx_chunks = vec![]; - - // TODO separate bits of network work should be combined into single 'future' - - // Upload all chunks. - for datom in datoms { - let datom_uuid = Uuid::new_v4(); - tx_chunks.push(datom_uuid); - d(&format!("putting chunk: {:?}, {:?}", &datom_uuid, &datom)); - // TODO switch over to CBOR once we're past debugging stuff. - // See https://github.com/mozilla/mentat/issues/570 - // let cbor_val = serde_cbor::to_value(&datom)?; - // self.remote_client.put_chunk(&datom_uuid, &serde_cbor::ser::to_vec_sd(&cbor_val)?)?; - self.remote_client.put_chunk(&datom_uuid, &serde_json::to_string(&datom)?)?; - } - - // Upload tx. - // NB: At this point, we may choose to update remote & local heads. - // Depending on how much we're uploading, and how unreliable our connection - // is, this might be a good thing to do to ensure we make at least some progress. - // Comes at a cost of possibly increasing racing against other clients. - match self.rolling_temp_head { - Some(parent) => { - d(&format!("putting transaction: {:?}, {:?}, {:?}", &tx_uuid, &parent, &tx_chunks)); - self.remote_client.put_transaction(&tx_uuid, &parent, &tx_chunks)?; - +impl fmt::Display for SyncReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SyncReport::IncompatibleRemoteBootstrap(local, remote) => { + write!(f, "Incompatible remote bootstrap transaction version. Local: {}, remote: {}.", local, remote) }, - None => { - d(&format!("putting transaction: {:?}, {:?}, {:?}", &tx_uuid, &self.remote_head, &tx_chunks)); - self.remote_client.put_transaction(&tx_uuid, self.remote_head, &tx_chunks)?; + SyncReport::BadRemoteState(err) => { + write!(f, "Bad remote state: {}", err) + }, + SyncReport::NoChanges => { + write!(f, "Neither local nor remote have any new changes") + }, + SyncReport::RemoteFastForward => { + write!(f, "Fast-forwarded remote") + }, + SyncReport::LocalFastForward => { + write!(f, "Fast-forwarded local") + }, + SyncReport::Merge(follow_up) => { + write!(f, "Merged local and remote, requesting a follow-up: {}", follow_up) } } + } +} - d(&format!("updating rolling head: {:?}", tx_uuid)); - self.rolling_temp_head = Some(tx_uuid.clone()); +impl fmt::Display for SyncResult { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SyncResult::Atomic(report) => write!(f, "Single atomic sync: {}", report), + SyncResult::NonAtomic(reports) => { + writeln!(f, "Series of atomic syncs ({})", reports.len())?; + for report in reports { + writeln!(f, "{}", report)?; + } + writeln!(f, "\\o/") + } + } + } +} +#[derive(Debug,PartialEq)] +enum SyncAction { + NoOp, + // TODO this is the same as remote fast-forward from local root. + // It's currently distinguished from remote fast-forward for a more + // path through the "first-sync against non-empty remote" flow. + PopulateRemote, + RemoteFastForward, + LocalFastForward, + // Generic name since we might merge, or rebase, or do something else. + CombineChanges, +} + +/// Represents remote state relative to previous sync. +/// On first sync, it's always "Changed" unless remote is "Empty". +pub enum RemoteDataState { + Empty, + Changed, + Unchanged, +} + +/// Remote state is expressed in terms of what "remote head" actually is, +/// and what we think it is. +impl<'a> From<(&'a Uuid, &'a Uuid)> for RemoteDataState { + fn from((known_remote_head, actual_remote_head): (&Uuid, &Uuid)) -> RemoteDataState { + if *actual_remote_head == Uuid::nil() { + RemoteDataState::Empty + } else if actual_remote_head != known_remote_head { + RemoteDataState::Changed + } else { + RemoteDataState::Unchanged + } + } +} + +/// Represents local state relative to previous sync. +/// On first sync it's always "Changed". +/// Local client can't be empty: there's always at least a bootstrap transaction. +pub enum LocalDataState { + Changed, + Unchanged, +} + +/// Local state is expressed in terms of presence of a "mapping" for the local head. +/// Presence of a mapping means that we've uploaded our local head, +/// indicating that there's no local changes. +/// Absence of a mapping indicates that local head hasn't been uploaded +/// and that we have local changes. +impl From> for LocalDataState { + fn from(mapped_local_head: Option) -> LocalDataState { + match mapped_local_head { + Some(_) => LocalDataState::Unchanged, + None => LocalDataState::Changed + } + } +} + +// TODO rename this thing. +pub struct LocalTxSet { + txs: Vec, +} + +impl LocalTxSet { + pub fn new() -> LocalTxSet { + LocalTxSet { + txs: vec![] + } + } +} + +impl TxReceiver> for LocalTxSet { + fn tx(&mut self, tx_id: Entid, datoms: &mut T) -> Result<()> + where T: Iterator { + self.txs.push(LocalTx { + tx: tx_id, + parts: datoms.collect() + }); Ok(()) } - fn done(&mut self) -> Result<()> { - self.is_done = true; - Ok(()) + fn done(self) -> Vec { + self.txs } } impl Syncer { - fn upload_ours(db_tx: &mut rusqlite::Transaction, from_tx: Option, remote_client: &RemoteClient, remote_head: &Uuid) -> Result<()> { - let mut uploader = UploadingTxReceiver::new(remote_client, remote_head); - Processor::process(db_tx, from_tx, &mut uploader)?; - if !uploader.is_done { - bail!(TolstoyError::TxProcessorUnfinished); - } - // Last tx uuid uploaded by the tx receiver. - // It's going to be our new head. - if let Some(last_tx_uploaded) = uploader.rolling_temp_head { - // Upload remote head. - remote_client.put_head(&last_tx_uploaded)?; + /// Produces a SyncAction based on local and remote states. + fn what_do(remote_state: RemoteDataState, local_state: LocalDataState) -> SyncAction { + match remote_state { + RemoteDataState::Empty => { + SyncAction::PopulateRemote + }, - // On succes: - // - persist local mappings from the receiver - // - update our local "remote head". - TxMapper::set_bulk(db_tx, &uploader.tx_temp_uuids)?; - SyncMetadataClient::set_remote_head(db_tx, &last_tx_uploaded)?; - } + RemoteDataState::Changed => { + match local_state { + LocalDataState::Changed => { + SyncAction::CombineChanges + }, - Ok(()) - } - - pub fn flow(sqlite: &mut rusqlite::Connection, server_uri: &String, user_uuid: &Uuid) -> Result<()> { - d(&format!("sync flowing")); - - ensure_current_version(sqlite)?; - - // TODO configure this sync with some auth data - let remote_client = RemoteClient::new(server_uri.clone(), user_uuid.clone()); - let mut db_tx = sqlite.transaction()?; - - let remote_head = remote_client.get_head()?; - d(&format!("remote head {:?}", remote_head)); - - let locally_known_remote_head = SyncMetadataClient::remote_head(&db_tx)?; - d(&format!("local head {:?}", locally_known_remote_head)); - - // Local head: latest transaction that we have in the store, - // but with one caveat: its tx might will not be mapped if it's - // never been synced successfully. - // In other words: if latest tx isn't mapped, then HEAD moved - // since last sync and server needs to be updated. - let mut inquiring_tx_receiver = InquiringTxReceiver::new(); - // TODO don't just start from the beginning... but then again, we should do this - // without walking the table at all, and use the tx index. - Processor::process(&db_tx, None, &mut inquiring_tx_receiver)?; - if !inquiring_tx_receiver.is_done { - bail!(TolstoyError::TxProcessorUnfinished); - } - let have_local_changes = match inquiring_tx_receiver.last_tx { - Some(tx) => { - match TxMapper::get(&db_tx, tx)? { - Some(_) => false, - None => true + LocalDataState::Unchanged => { + SyncAction::LocalFastForward + }, } }, - None => false - }; - // Check if the server is empty - populate it. - if remote_head == Uuid::nil() { - d(&format!("empty server!")); - Syncer::upload_ours(&mut db_tx, None, &remote_client, &remote_head)?; - - // Check if the server is the same as us, and if our HEAD moved. - } else if locally_known_remote_head == remote_head { - d(&format!("server unchanged since last sync.")); - - if !have_local_changes { - d(&format!("local HEAD did not move. Nothing to do!")); - return Ok(()); - } + RemoteDataState::Unchanged => { + match local_state { + LocalDataState::Changed => { + SyncAction::RemoteFastForward + }, - d(&format!("local HEAD moved.")); - // TODO it's possible that we've successfully advanced remote head previously, - // but failed to advance our own local head. If that's the case, and we can recognize it, - // our sync becomes just bumping our local head. AFAICT below would currently fail. - if let Some(upload_from_tx) = TxMapper::get_tx_for_uuid(&db_tx, &locally_known_remote_head)? { - d(&format!("Fast-forwarding the server.")); - Syncer::upload_ours(&mut db_tx, Some(upload_from_tx), &remote_client, &remote_head)?; + LocalDataState::Unchanged => { + SyncAction::NoOp + }, + } + }, + } + } + + /// Upload local txs: (from_tx, HEAD]. Remote head is necessary here because we need to specify + /// "parent" for each transaction we'll upload; remote head will be first transaction's parent. + fn fast_forward_remote(db_tx: &mut rusqlite::Transaction, from_tx: Option, remote_client: &mut R, remote_head: &Uuid) -> Result<()> + where R: GlobalTransactionLog { + + // TODO consider moving head manipulations into uploader? + + let report; + + // Scope to avoid double-borrowing mutable remote_client. + { + // Prepare an uploader. + let uploader = TxUploader::new( + remote_client, + remote_head, + SyncMetadata::get_partitions(db_tx, PartitionsTable::Tolstoy)? + ); + // Walk the local transactions in the database and upload them. + report = Processor::process(db_tx, from_tx, uploader)?; + } + + if let Some(last_tx_uploaded) = report.head { + // Upload remote head. + remote_client.set_head(&last_tx_uploaded)?; + + // On success: + // - persist local mappings from the receiver + // - update our local "remote head". + TxMapper::set_lg_mappings( + db_tx, + report.temp_uuids.iter().map(|v| (*v.0, v.1).into()).collect() + )?; + + SyncMetadata::set_remote_head(db_tx, &last_tx_uploaded)?; + } + + Ok(()) + } + + fn local_tx_for_uuid(db_tx: &rusqlite::Transaction, uuid: &Uuid) -> Result { + match TxMapper::get_tx_for_uuid(db_tx, uuid)? { + Some(t) => Ok(t), + None => bail!(TolstoyError::TxIncorrectlyMapped(0)) + } + } + + fn remote_parts_to_builder(builder: &mut TermBuilder, parts: Vec) -> Result<()> { + for part in parts { + let e: EntityPlace; + let a = KnownEntid(part.a); + let v = part.v; + + // Instead of providing a 'txInstant' datom directly, we map it + // into a (transaction-tx) style assertion. + // Transactor knows how to pick out a txInstant value out of these + // assertions and use that value for the generated transaction's txInstant. + if part.a == entids::DB_TX_INSTANT { + e = EntityPlace::TxFunction(TxFunction { op: PlainSymbol("transaction-tx".to_string()) } ).into(); } else { - d(&format!("Unable to fast-forward the server; missing local tx mapping")); - bail!(TolstoyError::TxIncorrectlyMapped(0)); + e = KnownEntid(part.e).into(); } - - // We diverged from the server. - // We'll need to rebase/merge ourselves on top of it. + + if part.added { + builder.add(e, a, v)?; + } else { + builder.retract(e, a, v)?; + } + } + + Ok(()) + } + + /// In context of a "transaction to be applied", a PartitionMap supplied here + /// represents what a PartitionMap will be once this transaction is applied. + /// This works well for regular assertions: entids are supplied, and we need + /// them to be allocated in the user partition space. + /// However, we need to decrement 'tx' partition's index, so that the transactor's + /// allocated tx will match what came off the wire. + /// N.B.: this depends on absence of holes in the 'tx' partition! + fn rewind_tx_partition_by_one(partition_map: &mut PartitionMap) -> Result<()> { + if let Some(tx_part) = partition_map.get_mut(":db.part/tx") { + assert_eq!(false, tx_part.allow_excision); // Sanity check. + + let next_entid = tx_part.next_entid() - 1; + tx_part.set_next_entid(next_entid); + Ok(()) } else { - d(&format!("server changed since last sync.")); - - bail!(TolstoyError::NotYetImplemented( - format!("Can't yet sync against changed server. Local head {:?}, remote head {:?}", locally_known_remote_head, remote_head) - )); - } - - // Commit everything, if there's anything to commit! - // Any new tx->uuid mappings and the new HEAD. We're synced! - db_tx.commit()?; - - Ok(()) - } -} - -#[derive(Serialize,Deserialize)] -struct SerializedHead { - head: Uuid -} - -#[derive(Serialize)] -struct SerializedTransaction<'a> { - parent: &'a Uuid, - chunks: &'a Vec -} - -struct RemoteClient { - base_uri: String, - user_uuid: Uuid -} - - -impl RemoteClient { - fn new(base_uri: String, user_uuid: Uuid) -> Self { - RemoteClient { - base_uri: base_uri, - user_uuid: user_uuid + bail!(TolstoyError::BadRemoteState("Missing tx partition in an incoming transaction".to_string())); } } - fn bound_base_uri(&self) -> String { - // TODO escaping - format!("{}/{}", self.base_uri, self.user_uuid) + fn fast_forward_local<'a, 'c>(in_progress: &mut InProgress<'a, 'c>, txs: Vec) -> Result { + let mut last_tx = None; + + for tx in txs { + let mut builder = TermBuilder::new(); + + // TODO both here and in the merge scenario we're doing the same thing with the partition maps + // and with the txInstant datom rewriting. + // Figure out how to combine these operations into a resuable primitive(s). + // See notes in 'merge' for why we're doing this stuff. + let mut partition_map = match tx.parts[0].partitions.clone() { + Some(parts) => parts, + None => return Ok(SyncReport::BadRemoteState("Missing partition map in incoming transaction".to_string())) + }; + + // Make space in the provided tx partition for the transaction we're about to create. + // See function's notes for details. + Syncer::rewind_tx_partition_by_one(&mut partition_map)?; + Syncer::remote_parts_to_builder(&mut builder, tx.parts)?; + + // Allocate space for the incoming entids. + in_progress.partition_map = partition_map; + let report = in_progress.transact_builder(builder)?; + last_tx = Some((report.tx_id, tx.tx.clone())); + } + + // We've just transacted a new tx, and generated a new tx entid. Map it to the corresponding + // incoming tx uuid, advance our "locally known remote head". + if let Some((entid, uuid)) = last_tx { + SyncMetadata::set_remote_head_and_map(&mut in_progress.transaction, (entid, &uuid).into())?; + } + + Ok(SyncReport::LocalFastForward) } - fn get_uuid(&self, uri: String) -> Result { - let mut core = Core::new()?; - // TODO enable TLS, see https://github.com/mozilla/mentat/issues/569 - // let client = hyper::Client::configure() - // .connector(hyper_tls::HttpsConnector::new(4, &core.handle()).unwrap()) - // .build(&core.handle()); - let client = hyper::Client::new(&core.handle()); + fn merge(ip: &mut InProgress, incoming_txs: Vec, mut local_txs_to_merge: Vec) -> Result { + d(&format!("Rewinding local transactions.")); - d(&format!("client")); + // 1) Rewind local to shared root. + local_txs_to_merge.sort(); // TODO sort at the interface level? - let uri = uri.parse()?; + let (new_schema, new_partition_map) = timelines::move_from_main_timeline( + &ip.transaction, + &ip.schema, + ip.partition_map.clone(), + local_txs_to_merge[0].tx.., + // A poor man's parent reference. This might be brittle, although + // excisions are prohibited in the 'tx' partition, so this should hold... + local_txs_to_merge[0].tx - 1 + )?; + match new_schema { + Some(schema) => ip.schema = schema, + None => () + }; + ip.partition_map = new_partition_map; - d(&format!("parsed uri {:?}", uri)); - - let work = client.get(uri).and_then(|res| { - println!("Response: {}", res.status()); + // 2) Transact incoming. + // 2.1) Prepare remote tx tuples (TermBuilder, PartitionMap, Uuid), which represent + // a remote transaction, its global identifier and partitions after it's applied. + d(&format!("Transacting incoming...")); + let mut builders = vec![]; + for remote_tx in incoming_txs { + let mut builder = TermBuilder::new(); - res.body().concat2().and_then(move |body| { - let head_json: SerializedHead = serde_json::from_slice(&body).map_err(|e| { - std::io::Error::new(std::io::ErrorKind::Other, e) - })?; - Ok(head_json) - }) - }); + let partition_map = match remote_tx.parts[0].partitions.clone() { + Some(parts) => parts, + None => return Ok(SyncReport::BadRemoteState("Missing partition map in incoming transaction".to_string())) + }; - d(&format!("running...")); + Syncer::remote_parts_to_builder(&mut builder, remote_tx.parts)?; - let head_json = core.run(work)?; - d(&format!("got head: {:?}", &head_json.head)); - Ok(head_json.head) - } + builders.push((builder, partition_map, remote_tx.tx)); + } - fn put(&self, uri: String, payload: T, expected: StatusCode) -> Result<()> - where hyper::Body: std::convert::From, { - let mut core = Core::new()?; - // TODO enable TLS, see https://github.com/mozilla/mentat/issues/569 - // let client = hyper::Client::configure() - // .connector(hyper_tls::HttpsConnector::new(4, &core.handle()).unwrap()) - // .build(&core.handle()); - let client = hyper::Client::new(&core.handle()); + let mut remote_report = None; + for (builder, mut partition_map, remote_tx) in builders { + // Make space in the provided tx partition for the transaction we're about to create. + // See function's notes for details. + Syncer::rewind_tx_partition_by_one(&mut partition_map)?; - let uri = uri.parse()?; + // This allocates our incoming entids in each builder, + // letting us just use KnownEntid in the builders. + ip.partition_map = partition_map; + remote_report = Some((ip.transact_builder(builder)?.tx_id, remote_tx)); + } - d(&format!("PUT {:?}", uri)); + d(&format!("Transacting local on top of incoming...")); + // 3) Rebase local transactions on top of remote. + let mut clean_rebase = true; + for local_tx in local_txs_to_merge { + let mut builder = TermBuilder::new(); - let mut req = Request::new(Method::Put, uri); - req.headers_mut().set(ContentType::json()); - req.set_body(payload); + // This is the beginnings of entity merging. - let put = client.request(req).and_then(|res| { - let status_code = res.status(); + // An entid might be already known to the Schema, or it + // might be allocated in this transaction. + // In the former case, refer to it verbatim. + // In the latter case, rewrite it as a tempid, and let the transactor allocate it. + let mut entids_that_will_allocate = HashSet::new(); - if status_code != expected { - d(&format!("bad put response: {:?}", status_code)); - future::err(HyperError::Status) - } else { - future::ok(()) + // We currently support "strict schema merging": we'll smush attribute definitions, + // but only if they're the same. + // e.g. prohibited would be defining different cardinality for the same attribute. + // Defining new attributes is allowed if: + // - attribute is defined either on local or remote, + // - attribute is defined on both local and remote in the same way. + // Modifying an attribute is currently not supported (requires higher order schema migrations). + // Note that "same" local and remote attributes might have different entids in the + // two sets of transactions. + + // Set of entities that may alter "installed" attribute. + // Since this is a rebase of local on top of remote, an "installed" + // attribute might be one that was present in the root, or one that was + // defined by remote. + let mut might_alter_installed_attributes = HashSet::new(); + + // Set of entities that describe a new attribute, not present in the root + // or on the remote. + let mut will_not_alter_installed_attribute = HashSet::new(); + + // Note that at this point, remote and local have flipped - we're transacting + // local on top of incoming (which are already in the schema). + + // Go through local datoms, and classify any schema-altering entids into + // one of the two sets above. + for part in &local_tx.parts { + // If we have an ident definition locally, check if remote + // already defined this ident. If it did, we'll need to ensure + // both local and remote are defining it in the same way. + if part.a == entids::DB_IDENT { + match part.v { + TypedValue::Keyword(ref local_kw) => { + // Remote did not define this ident. Make a note of it, + // so that we'll know to ignore its attribute datoms. + if !ip.schema.ident_map.contains_key(local_kw) { + will_not_alter_installed_attribute.insert(part.e); + + // Otherwise, we'll need to ensure we have the same attribute definition + // for it. + } else { + might_alter_installed_attributes.insert(part.e); + } + }, + _ => panic!("programming error: wrong value type for a local ident") + } + } else if entids::is_a_schema_attribute(part.a) && !will_not_alter_installed_attribute.contains(&part.e) { + might_alter_installed_attributes.insert(part.e); + } } - }); - core.run(put)?; - Ok(()) + for part in &local_tx.parts { + match part.a { + // We'll be ignoring this datom later on (to be generated by the transactor). + // During a merge we're concerned with entities in the "user" partition, + // while this falls into the "tx" partition. + // We have preserved the original txInstant value on the alternate timeline. + entids::DB_TX_INSTANT => continue, + + // 'e's will be replaced with tempids, letting transactor handle everything. + // Non-unique entities are "duplicated". Unique entities are upserted. + _ => { + // Retractions never allocated tempids in the transactor. + if part.added { + entids_that_will_allocate.insert(part.e); + } + }, + } + } + + // :db/ident is a db.unique/identity attribute, which means transactor will upsert + // attribute assertions. E.g. if a new attribute was defined on local and not on remote, + // it will be inserted. If both local and remote defined the same attribute + // with different entids, we'll converge and use remote's entid. + + // Same follows for other types of db.unique/identity attributes. + // If user-defined attribute is db.unique/identity, we'll "smush" local and remote + // assertions against it. + // For example, {:p/name "Grisha"} assertion on local and + // {:p/name "Grisha"} assertion on remote will result in a single entity. + + // If user-defined attribute is not unique, however, no smushing will be performed. + // The above example will result in two entities. + + for part in local_tx.parts { + // Skip the "tx instant" datom: it will be generated by our transactor. + // We don't care about preserving exact state of these datoms: they're already + // stashed away on the timeline we've created above. + if part.a == entids::DB_TX_INSTANT { + continue; + } + + let e: EntityPlace; + let a = KnownEntid(part.a); + let v = part.v; + + // Rewrite entids if they will allocate (see entity merging notes above). + if entids_that_will_allocate.contains(&part.e) { + e = builder.named_tempid(format!("{}", part.e)).into(); + // Otherwise, refer to existing entities. + } else { + e = KnownEntid(part.e).into(); + } + + // TODO we need to do the same rewriting for part.v if it's a Ref. + + // N.b.: attribute can't refer to an unallocated entity, so it's always a KnownEntid. + // To illustrate, this is not a valid transaction, and will fail ("no entid found for ident: :person/name"): + // [ + // {:db/ident :person/name :db/valueType :db.type/string :db/cardinality :db.cardinality/one} + // {:person/name "Grisha"} + // ] + // One would need to split that transaction into two, + // at which point :person/name will refer to an allocated entity. + + match part.added { + true => builder.add(e, a, v)?, + false => { + if entids_that_will_allocate.contains(&part.e) { + builder.retract(e, a, v)?; + continue; + } + + // TODO handle tempids in ValuePlace, as well. + + // Retractions with non-upserting tempids are not currently supported. + // We work around this by using a lookup-ref instead of the entity tempid. + // However: + // - lookup-ref can only be used for attributes which are :db/unique, + // - a lookup-ref must resolve. If it doesn't, our transaction will fail. + // And so: + // - we skip retractions of non-unique attributes, + // - we "pre-run" a lookup-ref to ensure it will resolve, + // and skip the retraction otherwise. + match ip.schema.attribute_map.get(&part.a) { + Some(attributes) => { + // A lookup-ref using a non-unique attribute will fail. + // Skip this retraction, since we can't make sense of it. + if attributes.unique.is_none() { + continue; + } + }, + None => panic!("programming error: missing attribute map for a known attribute") + } + + // TODO prepare a query and re-use it for all retractions of this type + let pre_lookup = ip.q_once( + "[:find ?e . :in ?a ?v :where [?e ?a ?v]]", + QueryInputs::with_value_sequence( + vec![ + (Variable::from_valid_name("?a"), a.into()), + (Variable::from_valid_name("?v"), v.clone()), + ] + ) + )?; + + if pre_lookup.is_empty() { + continue; + } + + // TODO just use the value from the query instead of doing _another_ lookup-ref! + + builder.retract( + EntityPlace::LookupRef(LookupRef {a: a.into(), v: v.clone()}), a, v + )?; + } + } + } + + // After all these checks, our builder might be empty: short-circuit. + if builder.is_empty() { + continue; + } + + d(&format!("Savepoint before transacting a local tx...")); + ip.savepoint("speculative_local")?; + + d(&format!("Transacting builder filled with local txs... {:?}", builder)); + + let report = ip.transact_builder(builder)?; + + // Let's check that we didn't modify any schema attributes. + // Our current attribute map in the schema isn't rich enough to allow + // for this check: it's missing a notion of "attribute absence" - we can't + // distinguish between a missing attribute and a default value. + // Instead, we simply query the database, checking if transaction produced + // any schema-altering datoms. + for e in might_alter_installed_attributes.iter() { + match report.tempids.get(&format!("{}", e)) { + Some(resolved_e) => { + if SyncMetadata::has_entity_assertions_in_tx(&ip.transaction, *resolved_e, report.tx_id)? { + bail!(TolstoyError::NotYetImplemented("Can't sync with schema alterations yet.".to_string())); + } + }, + None => () + } + } + + if !SyncMetadata::is_tx_empty(&ip.transaction, report.tx_id)? { + d(&format!("tx {} is not a no-op", report.tx_id)); + clean_rebase = false; + ip.release_savepoint("speculative_local")?; + } else { + d(&format!("Applied tx {} as a no-op. Rolling back the savepoint (empty tx clean-up).", report.tx_id)); + ip.rollback_savepoint("speculative_local")?; + } + } + + // TODO + // At this point, we've rebased local transactions on top of remote. + // This would be a good point to create a "merge commit" and upload our loosing timeline. + + // Since we don't upload during a merge (instead, we request a follow-up sync), + // set the locally known remote HEAD to what we received from the 'remote'. + if let Some((entid, uuid)) = remote_report { + SyncMetadata::set_remote_head_and_map(&mut ip.transaction, (entid, &uuid).into())?; + } + + // If necessary, request a full sync as a follow-up to fast-forward remote. + if clean_rebase { + Ok(SyncReport::Merge(SyncFollowup::None)) + } else { + Ok(SyncReport::Merge(SyncFollowup::FullSync)) + } } - fn put_transaction(&self, transaction_uuid: &Uuid, parent_uuid: &Uuid, chunks: &Vec) -> Result<()> { - // {"parent": uuid, "chunks": [chunk1, chunk2...]} - let transaction = SerializedTransaction { - parent: parent_uuid, - chunks: chunks + fn first_sync_against_non_empty(ip: &mut InProgress, remote_client: &R, local_metadata: &SyncMetadata) -> Result + where R: GlobalTransactionLog { + + d(&format!("remote non-empty on first sync, adopting remote state.")); + + // 1) Download remote transactions. + let incoming_txs = remote_client.transactions_after(&Uuid::nil())?; + if incoming_txs.len() == 0 { + return Ok(SyncReport::BadRemoteState("Remote specified non-root HEAD but gave no transactions".to_string())); + } + + // 2) Process remote bootstrap. + let remote_bootstrap = &incoming_txs[0]; + let local_bootstrap = local_metadata.root; + let bootstrap_helper = BootstrapHelper::new(remote_bootstrap); + + if !bootstrap_helper.is_compatible()? { + return Ok(SyncReport::IncompatibleRemoteBootstrap(CORE_SCHEMA_VERSION as i64, bootstrap_helper.core_schema_version()?)); + } + + d(&format!("mapping incoming bootstrap tx uuid to local bootstrap entid: {} -> {}", remote_bootstrap.tx, local_bootstrap)); + + // Map incoming bootstrap tx uuid to local bootstrap entid. + // If there's more work to do, we'll move the head again. + SyncMetadata::set_remote_head_and_map(&mut ip.transaction, (local_bootstrap, &remote_bootstrap.tx).into())?; + + // 3) Determine new local and remote data states, now that bootstrap has been dealt with. + let remote_state = if incoming_txs.len() > 1 { + RemoteDataState::Changed + } else { + RemoteDataState::Unchanged }; - let uri = format!("{}/transactions/{}", self.bound_base_uri(), transaction_uuid); - let json = serde_json::to_string(&transaction)?; - d(&format!("serialized transaction: {:?}", json)); - self.put(uri, json, StatusCode::Created) - } - - fn get_head(&self) -> Result { - let uri = format!("{}/head", self.bound_base_uri()); - self.get_uuid(uri) - } - - fn put_head(&self, uuid: &Uuid) -> Result<()> { - // {"head": uuid} - let head = SerializedHead { - head: uuid.clone() + let local_state = if local_metadata.root != local_metadata.head { + LocalDataState::Changed + } else { + LocalDataState::Unchanged }; - let uri = format!("{}/head", self.bound_base_uri()); - let json = serde_json::to_string(&head)?; - d(&format!("serialized head: {:?}", json)); - self.put(uri, json, StatusCode::NoContent) + // 4) The rest of this flow isn't that special anymore. + // Since we've "merged" with the remote bootstrap, the "no-op" and + // "local fast-forward" cases are reported as merges. + match Syncer::what_do(remote_state, local_state) { + SyncAction::NoOp => { + Ok(SyncReport::Merge(SyncFollowup::None)) + }, + + SyncAction::PopulateRemote => { + // This is a programming error. + bail!(TolstoyError::UnexpectedState(format!("Remote state can't be empty on first sync against non-empty remote"))) + }, + + SyncAction::RemoteFastForward => { + bail!(TolstoyError::NotYetImplemented(format!("TODO fast-forward remote on first sync when remote is just bootstrap and local has more"))) + }, + + SyncAction::LocalFastForward => { + Syncer::fast_forward_local(ip, incoming_txs[1 ..].to_vec())?; + Ok(SyncReport::Merge(SyncFollowup::None)) + }, + + SyncAction::CombineChanges => { + let local_txs = Processor::process( + &mut ip.transaction, Some(local_metadata.root), LocalTxSet::new())?; + Syncer::merge( + ip, + incoming_txs[1 ..].to_vec(), + local_txs + ) + } + } } - fn put_chunk(&self, chunk_uuid: &Uuid, payload: &String) -> Result<()> { - let uri = format!("{}/chunks/{}", self.bound_base_uri(), chunk_uuid); - d(&format!("serialized chunk: {:?}", payload)); - // TODO don't want to clone every datom! - self.put(uri, payload.clone(), StatusCode::Created) + pub fn sync(ip: &mut InProgress, remote_client: &mut R) -> Result + where R: GlobalTransactionLog { + + d(&format!("sync flowing")); + + ensure_current_version(&mut ip.transaction)?; + + let remote_head = remote_client.head()?; + d(&format!("remote head {:?}", remote_head)); + + let locally_known_remote_head = SyncMetadata::remote_head(&mut ip.transaction)?; + d(&format!("local head {:?}", locally_known_remote_head)); + + let (root, head) = SyncMetadata::root_and_head_tx(&mut ip.transaction)?; + let local_metadata = SyncMetadata::new(root, head); + + // impl From ... vs ::new() calls to constuct "state" objects? + let local_state = TxMapper::get(&mut ip.transaction, local_metadata.head)?.into(); + let remote_state = (&locally_known_remote_head, &remote_head).into(); + + // Currently, first sync against a non-empty remote is special. + if locally_known_remote_head == Uuid::nil() && remote_head != Uuid::nil() { + return Syncer::first_sync_against_non_empty(ip, remote_client, &local_metadata); + } + + match Syncer::what_do(remote_state, local_state) { + SyncAction::NoOp => { + d(&format!("local HEAD did not move. Nothing to do!")); + Ok(SyncReport::NoChanges) + }, + + SyncAction::PopulateRemote => { + d(&format!("empty remote!")); + Syncer::fast_forward_remote(&mut ip.transaction, None, remote_client, &remote_head)?; + Ok(SyncReport::RemoteFastForward) + }, + + SyncAction::RemoteFastForward => { + d(&format!("local HEAD moved.")); + let upload_from_tx = Syncer::local_tx_for_uuid( + &mut ip.transaction, &locally_known_remote_head + )?; + + d(&format!("Fast-forwarding the remote.")); + + // TODO it's possible that we've successfully advanced remote head previously, + // but failed to advance our own local head. If that's the case, and we can recognize it, + // our sync becomes just bumping our local head. AFAICT below would currently fail. + Syncer::fast_forward_remote( + &mut ip.transaction, Some(upload_from_tx), remote_client, &remote_head + )?; + Ok(SyncReport::RemoteFastForward) + }, + + SyncAction::LocalFastForward => { + d(&format!("fast-forwarding local store.")); + Syncer::fast_forward_local( + ip, + remote_client.transactions_after(&locally_known_remote_head)? + )?; + Ok(SyncReport::LocalFastForward) + }, + + SyncAction::CombineChanges => { + d(&format!("combining changes from local and remote stores.")); + // Get the starting point for out local set of txs to merge. + let combine_local_from_tx = Syncer::local_tx_for_uuid( + &mut ip.transaction, &locally_known_remote_head + )?; + let local_txs = Processor::process( + &mut ip.transaction, + Some(combine_local_from_tx), + LocalTxSet::new() + )?; + // Merge! + Syncer::merge( + ip, + // Remote txs to merge... + remote_client.transactions_after(&locally_known_remote_head)?, + // ... with the local txs. + local_txs + ) + }, + } } } #[cfg(test)] mod tests { use super::*; - use std::borrow::Borrow; - use std::str::FromStr; - - use edn; - - use mentat_db::debug::{TestConn}; #[test] - fn test_remote_client_bound_uri() { - let user_uuid = Uuid::from_str(&"316ea470-ce35-4adf-9c61-e0de6e289c59").expect("uuid"); - let server_uri = String::from("https://example.com/api/0.1"); - let remote_client = RemoteClient::new(server_uri, user_uuid); - assert_eq!("https://example.com/api/0.1/316ea470-ce35-4adf-9c61-e0de6e289c59", remote_client.bound_base_uri()); - } + fn test_what_do() { + assert_eq!(SyncAction::PopulateRemote, Syncer::what_do(RemoteDataState::Empty, LocalDataState::Unchanged)); + assert_eq!(SyncAction::PopulateRemote, Syncer::what_do(RemoteDataState::Empty, LocalDataState::Changed)); - #[test] - fn test_add() { - let mut conn = TestConn::default(); + assert_eq!(SyncAction::NoOp, Syncer::what_do(RemoteDataState::Unchanged, LocalDataState::Unchanged)); + assert_eq!(SyncAction::RemoteFastForward, Syncer::what_do(RemoteDataState::Unchanged, LocalDataState::Changed)); - // Test inserting :db.cardinality/one elements. - assert_transact!(conn, "[[:db/add 100 :db.schema/version 1] - [:db/add 101 :db.schema/version 2]]"); - assert_matches!(conn.last_transaction(), - "[[100 :db.schema/version 1 ?tx true] - [101 :db.schema/version 2 ?tx true] - [?tx :db/txInstant ?ms ?tx true]]"); - assert_matches!(conn.datoms(), - "[[100 :db.schema/version 1] - [101 :db.schema/version 2]]"); - - // Test inserting :db.cardinality/many elements. - assert_transact!(conn, "[[:db/add 200 :db.schema/attribute 100] - [:db/add 200 :db.schema/attribute 101]]"); - assert_matches!(conn.last_transaction(), - "[[200 :db.schema/attribute 100 ?tx true] - [200 :db.schema/attribute 101 ?tx true] - [?tx :db/txInstant ?ms ?tx true]]"); - assert_matches!(conn.datoms(), - "[[100 :db.schema/version 1] - [101 :db.schema/version 2] - [200 :db.schema/attribute 100] - [200 :db.schema/attribute 101]]"); - - // Test replacing existing :db.cardinality/one elements. - assert_transact!(conn, "[[:db/add 100 :db.schema/version 11] - [:db/add 101 :db.schema/version 22]]"); - assert_matches!(conn.last_transaction(), - "[[100 :db.schema/version 1 ?tx false] - [100 :db.schema/version 11 ?tx true] - [101 :db.schema/version 2 ?tx false] - [101 :db.schema/version 22 ?tx true] - [?tx :db/txInstant ?ms ?tx true]]"); - assert_matches!(conn.datoms(), - "[[100 :db.schema/version 11] - [101 :db.schema/version 22] - [200 :db.schema/attribute 100] - [200 :db.schema/attribute 101]]"); - - - // Test that asserting existing :db.cardinality/one elements doesn't change the store. - assert_transact!(conn, "[[:db/add 100 :db.schema/version 11] - [:db/add 101 :db.schema/version 22]]"); - assert_matches!(conn.last_transaction(), - "[[?tx :db/txInstant ?ms ?tx true]]"); - assert_matches!(conn.datoms(), - "[[100 :db.schema/version 11] - [101 :db.schema/version 22] - [200 :db.schema/attribute 100] - [200 :db.schema/attribute 101]]"); - - - // Test that asserting existing :db.cardinality/many elements doesn't change the store. - assert_transact!(conn, "[[:db/add 200 :db.schema/attribute 100] - [:db/add 200 :db.schema/attribute 101]]"); - assert_matches!(conn.last_transaction(), - "[[?tx :db/txInstant ?ms ?tx true]]"); - assert_matches!(conn.datoms(), - "[[100 :db.schema/version 11] - [101 :db.schema/version 22] - [200 :db.schema/attribute 100] - [200 :db.schema/attribute 101]]"); + assert_eq!(SyncAction::LocalFastForward, Syncer::what_do(RemoteDataState::Changed, LocalDataState::Unchanged)); + assert_eq!(SyncAction::CombineChanges, Syncer::what_do(RemoteDataState::Changed, LocalDataState::Changed)); } } diff --git a/tolstoy/src/tx_mapper.rs b/tolstoy/src/tx_mapper.rs index a66fbe88..f80bc13d 100644 --- a/tolstoy/src/tx_mapper.rs +++ b/tolstoy/src/tx_mapper.rs @@ -8,7 +8,6 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -use std::collections::HashMap; use rusqlite; use uuid::Uuid; @@ -16,26 +15,37 @@ use core_traits::{ Entid, }; +use public_traits::errors::{ + Result, +}; + use tolstoy_traits::errors::{ TolstoyError, - Result, +}; + +use types::{ + LocalGlobalTxMapping, }; // Exposes a tx<->uuid mapping interface. pub struct TxMapper {} impl TxMapper { - pub fn set_bulk(db_tx: &mut rusqlite::Transaction, tx_uuid_map: &HashMap) -> Result<()> { + pub fn set_lg_mappings(db_tx: &mut rusqlite::Transaction, mappings: Vec) -> Result<()> { let mut stmt = db_tx.prepare_cached( "INSERT OR REPLACE INTO tolstoy_tu (tx, uuid) VALUES (?, ?)" )?; - for (tx, uuid) in tx_uuid_map.iter() { - let uuid_bytes = uuid.as_bytes().to_vec(); - stmt.execute(&[tx, &uuid_bytes])?; + for mapping in mappings.iter() { + let uuid_bytes = mapping.remote.as_bytes().to_vec(); + stmt.execute(&[&mapping.local, &uuid_bytes])?; } Ok(()) } + pub fn set_lg_mapping(db_tx: &mut rusqlite::Transaction, mapping: LocalGlobalTxMapping) -> Result<()> { + TxMapper::set_lg_mappings(db_tx, vec![mapping]) + } + // TODO for when we're downloading, right? pub fn get_or_set_uuid_for_tx(db_tx: &mut rusqlite::Transaction, tx: Entid) -> Result { match TxMapper::get(db_tx, tx)? { @@ -95,8 +105,8 @@ pub mod tests { #[test] fn test_getters() { - let mut conn = schema::tests::setup_conn(); - let mut tx = conn.transaction().expect("db tx"); + let mut conn = schema::tests::setup_conn_bare(); + let mut tx = schema::tests::setup_tx(&mut conn); assert_eq!(None, TxMapper::get(&mut tx, 1).expect("success")); let set_uuid = TxMapper::get_or_set_uuid_for_tx(&mut tx, 1).expect("success"); assert_eq!(Some(set_uuid), TxMapper::get(&mut tx, 1).expect("success")); @@ -104,27 +114,29 @@ pub mod tests { #[test] fn test_bulk_setter() { - let mut conn = schema::tests::setup_conn(); - let mut tx = conn.transaction().expect("db tx"); - let mut map = HashMap::new(); + let mut conn = schema::tests::setup_conn_bare(); + let mut tx = schema::tests::setup_tx(&mut conn); + - TxMapper::set_bulk(&mut tx, &map).expect("empty map success"); + TxMapper::set_lg_mappings(&mut tx, vec![]).expect("empty map success"); let uuid1 = Uuid::new_v4(); let uuid2 = Uuid::new_v4(); - map.insert(1, uuid1); - map.insert(2, uuid2); - TxMapper::set_bulk(&mut tx, &map).expect("map success"); + TxMapper::set_lg_mappings( + &mut tx, + vec![(1, &uuid1).into(), (2, &uuid2).into()] + ).expect("map success"); assert_eq!(Some(uuid1), TxMapper::get(&mut tx, 1).expect("success")); assert_eq!(Some(uuid2), TxMapper::get(&mut tx, 2).expect("success")); - // Now let's replace one of mappings. - map.remove(&1); + // Now let's replace one of the mappings. let new_uuid2 = Uuid::new_v4(); - map.insert(2, new_uuid2); - TxMapper::set_bulk(&mut tx, &map).expect("map success"); + TxMapper::set_lg_mappings( + &mut tx, + vec![(1, &uuid1).into(), (2, &new_uuid2).into()] + ).expect("map success"); assert_eq!(Some(uuid1), TxMapper::get(&mut tx, 1).expect("success")); assert_eq!(Some(new_uuid2), TxMapper::get(&mut tx, 2).expect("success")); } diff --git a/tolstoy/src/tx_processor.rs b/tolstoy/src/tx_processor.rs index fcf482a4..430d512e 100644 --- a/tolstoy/src/tx_processor.rs +++ b/tolstoy/src/tx_processor.rs @@ -11,10 +11,6 @@ use std::iter::Peekable; use rusqlite; -use tolstoy_traits::errors::{ - Result, -}; - use mentat_db::{ TypedSQLValue, }; @@ -24,19 +20,21 @@ use core_traits::{ TypedValue, }; -#[derive(Debug,Clone,Serialize,Deserialize)] -pub struct TxPart { - pub e: Entid, - pub a: Entid, - pub v: TypedValue, - pub tx: Entid, - pub added: bool, -} +use public_traits::errors::{ + Result, +}; -pub trait TxReceiver { - fn tx(&mut self, tx_id: Entid, d: &mut T) -> Result<()> - where T: Iterator; - fn done(&mut self) -> Result<()>; +use types::{ + TxPart, +}; + +/// Implementors must specify type of the "receiver report" which +/// they will produce once processor is finished. +pub trait TxReceiver { + /// Called for each transaction, with an iterator over its datoms. + fn tx>(&mut self, tx_id: Entid, d: &mut T) -> Result<()>; + /// Called once processor is finished, consuming this receiver and producing a report. + fn done(self) -> RR; } pub struct Processor {} @@ -101,6 +99,7 @@ where T: Sized + Iterator> + 't { Err(_) => None, Ok(datom) => { Some(TxPart { + partitions: None, e: datom.e, a: datom.a, v: datom.v.clone(), @@ -118,25 +117,31 @@ where T: Sized + Iterator> + 't { fn to_tx_part(row: &rusqlite::Row) -> Result { Ok(TxPart { - e: row.get(0), - a: row.get(1), - v: TypedValue::from_sql_value_pair(row.get(2), row.get(3))?, - tx: row.get(4), - added: row.get(5), + partitions: None, + e: row.get_checked(0)?, + a: row.get_checked(1)?, + v: TypedValue::from_sql_value_pair(row.get_checked(2)?, row.get_checked(3)?)?, + tx: row.get_checked(4)?, + added: row.get_checked(5)?, }) } impl Processor { - pub fn process(sqlite: &rusqlite::Transaction, from_tx: Option, receiver: &mut R) -> Result<()> - where R: TxReceiver { + pub fn process> + (sqlite: &rusqlite::Transaction, from_tx: Option, mut receiver: R) -> Result { + let tx_filter = match from_tx { - Some(tx) => format!(" WHERE tx > {} ", tx), - None => format!("") + Some(tx) => format!(" WHERE timeline = 0 AND tx > {} ", tx), + None => format!("WHERE timeline = 0") }; - let select_query = format!("SELECT e, a, v, value_type_tag, tx, added FROM transactions {} ORDER BY tx", tx_filter); + let select_query = format!("SELECT e, a, v, value_type_tag, tx, added FROM timelined_transactions {} ORDER BY tx", tx_filter); let mut stmt = sqlite.prepare(&select_query)?; let mut rows = stmt.query_and_then(&[], to_tx_part)?.peekable(); + + // Walk the transaction table, keeping track of the current "tx". + // Whenever "tx" changes, construct a datoms iterator and pass it to the receiver. + // NB: this logic depends on data coming out of the rows iterator to be sorted by "tx". let mut current_tx = None; while let Some(row) = rows.next() { let datom = row?; @@ -160,7 +165,8 @@ impl Processor { } } } - receiver.done()?; - Ok(()) + // Consume the receiver, letting it produce a "receiver report" + // as defined by generic type RR. + Ok(receiver.done()) } } diff --git a/tolstoy/src/tx_uploader.rs b/tolstoy/src/tx_uploader.rs new file mode 100644 index 00000000..0965c75c --- /dev/null +++ b/tolstoy/src/tx_uploader.rs @@ -0,0 +1,220 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::collections::HashMap; + +use uuid::Uuid; + +use core_traits::{ + Entid, +}; + +use mentat_db::{ + PartitionMap, + V1_PARTS, +}; + +use public_traits::errors::{ + Result, +}; + +use tx_processor::{ + TxReceiver, +}; + +use types::{ + TxPart, + GlobalTransactionLog, +}; + +use logger::d; + +pub struct UploaderReport { + pub temp_uuids: HashMap, + pub head: Option, +} + +pub(crate) struct TxUploader<'c> { + tx_temp_uuids: HashMap, + remote_client: &'c mut GlobalTransactionLog, + remote_head: &'c Uuid, + rolling_temp_head: Option, + local_partitions: PartitionMap, +} + +impl<'c> TxUploader<'c> { + pub fn new(client: &'c mut GlobalTransactionLog, remote_head: &'c Uuid, local_partitions: PartitionMap) -> TxUploader<'c> { + TxUploader { + tx_temp_uuids: HashMap::new(), + remote_client: client, + remote_head: remote_head, + rolling_temp_head: None, + local_partitions: local_partitions, + } + } +} + +/// Given a set of entids and a partition map, returns a new PartitionMap that would result from +/// expanding the partitions to fit the entids. +fn allocate_partition_map_for_entids(entids: T, local_partitions: &PartitionMap) -> PartitionMap +where T: Iterator { + let mut parts = HashMap::new(); + for name in V1_PARTS.iter().map(|&(ref part, ..)| part.to_string()) { + // This shouldn't fail: locally-sourced partitions must be present within with V1_PARTS. + let p = local_partitions.get(&name).unwrap(); + parts.insert(name, (p, p.clone())); + } + + // For a given partition, set its index to one greater than the largest encountered entid within its partition space. + for entid in entids { + for (p, new_p) in parts.values_mut() { + if p.allows_entid(entid) && entid >= new_p.next_entid() { + new_p.set_next_entid(entid + 1); + } + } + } + + let mut m = PartitionMap::default(); + for (name, (_, new_p)) in parts { + m.insert(name, new_p); + } + m +} + +impl<'c> TxReceiver for TxUploader<'c> { + fn tx(&mut self, tx_id: Entid, datoms: &mut T) -> Result<()> + where T: Iterator { + // Yes, we generate a new UUID for a given Tx, even if we might + // already have one mapped locally. Pre-existing local mapping will + // be replaced if this sync succeeds entirely. + // If we're seeing this tx again, it implies that previous attempt + // to sync didn't update our local head. Something went wrong last time, + // and it's unwise to try to re-use these remote tx mappings. + // We just leave garbage txs to be GC'd on the server. + let tx_uuid = Uuid::new_v4(); + self.tx_temp_uuids.insert(tx_id, tx_uuid); + let mut tx_chunks = vec![]; + + // TODO separate bits of network work should be combined into single 'future' + + let mut datoms: Vec = datoms.collect(); + + // TODO this should live within a transaction, once server support is in place. + // For now, we're uploading the PartitionMap in transaction's first chunk. + datoms[0].partitions = Some(allocate_partition_map_for_entids(datoms.iter().map(|d| d.e), &self.local_partitions)); + + // Upload all chunks. + for datom in &datoms { + let datom_uuid = Uuid::new_v4(); + tx_chunks.push(datom_uuid); + d(&format!("putting chunk: {:?}, {:?}", &datom_uuid, &datom)); + // TODO switch over to CBOR once we're past debugging stuff. + // See https://github.com/mozilla/mentat/issues/570 + // let cbor_val = serde_cbor::to_value(&datom)?; + // self.remote_client.put_chunk(&datom_uuid, &serde_cbor::ser::to_vec_sd(&cbor_val)?)?; + self.remote_client.put_chunk(&datom_uuid, &datom)?; + } + + // Upload tx. + // NB: At this point, we may choose to update remote & local heads. + // Depending on how much we're uploading, and how unreliable our connection + // is, this might be a good thing to do to ensure we make at least some progress. + // Comes at a cost of possibly increasing racing against other clients. + let tx_parent = match self.rolling_temp_head { + Some(p) => p, + None => *self.remote_head, + }; + d(&format!("putting transaction: {:?}, {:?}, {:?}", &tx_uuid, &tx_parent, &tx_chunks)); + self.remote_client.put_transaction(&tx_uuid, &tx_parent, &tx_chunks)?; + + d(&format!("updating rolling head: {:?}", tx_uuid)); + self.rolling_temp_head = Some(tx_uuid.clone()); + + Ok(()) + } + + fn done(self) -> UploaderReport { + UploaderReport { + temp_uuids: self.tx_temp_uuids, + head: self.rolling_temp_head, + } + } +} + +#[cfg(test)] +pub mod tests { + use super::*; + + use mentat_db::{ + Partition, + V1_PARTS, + }; + + use schema::{ + PARTITION_USER, + PARTITION_TX, + PARTITION_DB, + }; + + fn bootstrap_partition_map() -> PartitionMap { + V1_PARTS.iter() + .map(|&(ref part, start, end, index, allow_excision)| (part.to_string(), Partition::new(start, end, index, allow_excision))) + .collect() + } + + #[test] + fn test_allocate_partition_map_for_entids() { + let bootstrap_map = bootstrap_partition_map(); + + // Empty list of entids should not allocate any space in partitions. + let entids: Vec = vec![]; + let no_op_map = allocate_partition_map_for_entids(entids.into_iter(), &bootstrap_map); + assert_eq!(bootstrap_map, no_op_map); + + // Only user partition. + let entids = vec![65536]; + let new_map = allocate_partition_map_for_entids(entids.into_iter(), &bootstrap_map); + assert_eq!(65537, new_map.get(PARTITION_USER).unwrap().next_entid()); + // Other partitions are untouched. + assert_eq!(41, new_map.get(PARTITION_DB).unwrap().next_entid()); + assert_eq!(268435456, new_map.get(PARTITION_TX).unwrap().next_entid()); + + // Only tx partition. + let entids = vec![268435666]; + let new_map = allocate_partition_map_for_entids(entids.into_iter(), &bootstrap_map); + assert_eq!(268435667, new_map.get(PARTITION_TX).unwrap().next_entid()); + // Other partitions are untouched. + assert_eq!(65536, new_map.get(PARTITION_USER).unwrap().next_entid()); + assert_eq!(41, new_map.get(PARTITION_DB).unwrap().next_entid()); + + // Only DB partition. + let entids = vec![41]; + let new_map = allocate_partition_map_for_entids(entids.into_iter(), &bootstrap_map); + assert_eq!(42, new_map.get(PARTITION_DB).unwrap().next_entid()); + // Other partitions are untouched. + assert_eq!(65536, new_map.get(PARTITION_USER).unwrap().next_entid()); + assert_eq!(268435456, new_map.get(PARTITION_TX).unwrap().next_entid()); + + // User and tx partitions. + let entids = vec![65537, 268435456]; + let new_map = allocate_partition_map_for_entids(entids.into_iter(), &bootstrap_map); + assert_eq!(65538, new_map.get(PARTITION_USER).unwrap().next_entid()); + assert_eq!(268435457, new_map.get(PARTITION_TX).unwrap().next_entid()); + // DB partition is untouched. + assert_eq!(41, new_map.get(PARTITION_DB).unwrap().next_entid()); + + // DB, user and tx partitions. + let entids = vec![41, 65666, 268435457]; + let new_map = allocate_partition_map_for_entids(entids.into_iter(), &bootstrap_map); + assert_eq!(65667, new_map.get(PARTITION_USER).unwrap().next_entid()); + assert_eq!(268435458, new_map.get(PARTITION_TX).unwrap().next_entid()); + assert_eq!(42, new_map.get(PARTITION_DB).unwrap().next_entid()); + } +} diff --git a/tolstoy/src/types.rs b/tolstoy/src/types.rs new file mode 100644 index 00000000..40cc66cc --- /dev/null +++ b/tolstoy/src/types.rs @@ -0,0 +1,101 @@ +// Copyright 2018 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +use std::cmp::Ordering; +use uuid::Uuid; + +use core_traits::{ + Entid, + TypedValue, +}; + +use mentat_db::PartitionMap; + +use public_traits::errors::{ + Result, +}; + +pub struct LocalGlobalTxMapping<'a> { + pub local: Entid, + pub remote: &'a Uuid, +} + +impl<'a> From<(Entid, &'a Uuid)> for LocalGlobalTxMapping<'a> { + fn from((local, remote): (Entid, &'a Uuid)) -> LocalGlobalTxMapping { + LocalGlobalTxMapping { + local: local, + remote: remote, + } + } +} + +impl<'a> LocalGlobalTxMapping<'a> { + pub fn new(local: Entid, remote: &'a Uuid) -> LocalGlobalTxMapping<'a> { + LocalGlobalTxMapping { + local: local, + remote: remote + } + } +} + +// TODO unite these around something like `enum TxIdentifier {Global(Uuid), Local(Entid)}`? +#[derive(Debug, Clone)] +pub struct LocalTx { + pub tx: Entid, + pub parts: Vec, +} + + +impl PartialOrd for LocalTx { + fn partial_cmp(&self, other: &LocalTx) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for LocalTx { + fn cmp(&self, other: &LocalTx) -> Ordering { + self.tx.cmp(&other.tx) + } +} + +impl PartialEq for LocalTx { + fn eq(&self, other: &LocalTx) -> bool { + self.tx == other.tx + } +} + +impl Eq for LocalTx {} + +// For returning out of the downloader as an ordered list. +#[derive(Debug, Clone, PartialEq)] +pub struct Tx { + pub tx: Uuid, + pub parts: Vec, +} + +#[derive(Debug,Clone,Serialize,Deserialize,PartialEq)] +pub struct TxPart { + // TODO this is a temporary for development. Only first TxPart in a chunk series should have a non-None 'parts'. + // 'parts' should actually live in a transaction, but we do this now to avoid changing the server until dust settles. + pub partitions: Option, + pub e: Entid, + pub a: Entid, + pub v: TypedValue, + pub tx: Entid, + pub added: bool, +} + +pub trait GlobalTransactionLog { + fn head(&self) -> Result; + fn transactions_after(&self, tx: &Uuid) -> Result>; + fn set_head(&mut self, tx: &Uuid) -> Result<()>; + fn put_transaction(&mut self, tx: &Uuid, parent_tx: &Uuid, chunk_txs: &Vec) -> Result<()>; + fn put_chunk(&mut self, tx: &Uuid, payload: &TxPart) -> Result<()>; +} diff --git a/tools/cli/src/mentat_cli/repl.rs b/tools/cli/src/mentat_cli/repl.rs index 46aee828..9965ac07 100644 --- a/tools/cli/src/mentat_cli/repl.rs +++ b/tools/cli/src/mentat_cli/repl.rs @@ -47,11 +47,6 @@ use mentat::{ TypedValue, }; -#[cfg(feature = "syncable")] -use mentat::{ - Syncable, -}; - use command_parser::{ Command, }; @@ -356,7 +351,7 @@ impl Repl { #[cfg(feature = "syncable")] Command::Sync(args) => { match self.store.sync(&args[0], &args[1]) { - Ok(_) => println!("Synced!"), + Ok(report) => println!("Sync report: {}", report), Err(e) => eprintln!("{:?}", e) }; }, @@ -403,7 +398,7 @@ impl Repl { if self.path.is_empty() || path != self.path { let next = match encryption_key { #[cfg(not(feature = "sqlcipher"))] - Some(_) => return Err(::mentat::MentatError::RusqliteError(".open_encrypted requires the sqlcipher Mentat feature".into())), + Some(_) => return Err(::mentat::MentatError::RusqliteError(".open_encrypted requires the sqlcipher Mentat feature".into(), "".into())), #[cfg(feature = "sqlcipher")] Some(k) => { Store::open_with_key(path.as_str(), k)? diff --git a/transaction/src/entity_builder.rs b/transaction/src/entity_builder.rs index af4ac1ac..05ce8bb9 100644 --- a/transaction/src/entity_builder.rs +++ b/transaction/src/entity_builder.rs @@ -84,6 +84,7 @@ use public_traits::errors::{ pub type Terms = (Vec>, InternSet); +#[derive(Debug)] pub struct TermBuilder { tempids: InternSet, terms: Vec>, diff --git a/transaction/src/lib.rs b/transaction/src/lib.rs index 6260d38e..73b90d2d 100644 --- a/transaction/src/lib.rs +++ b/transaction/src/lib.rs @@ -320,6 +320,21 @@ impl<'a, 'c> InProgress<'a, 'c> { pub fn last_tx_id(&self) -> Entid { self.partition_map[":db.part/tx"].next_entid() - 1 } + + pub fn savepoint(&self, name: &str) -> Result<()> { + self.transaction.execute(&format!("SAVEPOINT {}", name), &[])?; + Ok(()) + } + + pub fn rollback_savepoint(&self, name: &str) -> Result<()> { + self.transaction.execute(&format!("ROLLBACK TO {}", name), &[])?; + Ok(()) + } + + pub fn release_savepoint(&self, name: &str) -> Result<()> { + self.transaction.execute(&format!("RELEASE {}", name), &[])?; + Ok(()) + } } impl<'a, 'c> InProgressRead<'a, 'c> {