mentat/db/src/bootstrap.rs
Nick Alexander 5369f03464 Improve parsing of nested edn::ValueAndSpan streams. r=rnewman (#393)
* Pre: Expose more in edn.

* Pre: Make it easier to work with ValueAndSpan.

with_spans() is a temporary hack, needed only because I don't care to
parse the bootstrap assertions from text right now.

* Part 1a: Add `value_and_span` for parsing nested `edn::ValueAndSpan` instances.

I wasn't able to abstract over `edn::Value` and `edn::ValueAndSpan`;
there are multiple obstacles.  I chose to roll with
`edn::ValueAndSpan` since it exposes the additional span information
that we will want to form good error messages in the future.

* Part 1b: Add keyword_map() parsing an `edn::Value::Vector` into an `edn::Value::map`.

* Part 1c: Add `Log`/`.log(...)` for logging parser progress.

This is a terrible hack, but it sure helps to debug complicated nested
parsers.  I don't even know what a principled approach would look
like; since our parser combinators are so frequently expressed in
code, it's hard to imagine a data-driven interpreter that can help
debug things.

* Part 2: Use `value_and_span` apparatus in tx-parser/.

I break an abstraction boundary by returning a value column
`edn::ValueAndSpan` rather than just an `edn::Value`.  That is, the
transaction processor shouldn't care where the `edn::Value` it is
processing arose -- even we care to track that information we should
bake it into the `Entity` type.  We do this because we need to
dynamically parse the value column to support nested maps, and parsing
requires a full `edn::ValueAndSpan`.  Alternately, we could cheat and
fake the spans when parsing nested maps, but that's potentially
expensive.

* Part 3: Use `value_and_span` apparatus in query-parser/.

* Part 4: Use `value_and_span` apparatus in root crate.

* Review comment: Make Span and SpanPosition Copy.

* Review comment: nits.

* Review comment: Make `or` be `or_exactly`.

I baked the eof checking directly into the parser, rather than using
the skip and eof parsers.  I also took the time to restore some tests
that were mistakenly commented out.

* Review comment: Extract and use def_matches_* macros.

* Review comment: .map() as late as possible.
2017-04-06 10:06:28 -07:00

278 lines
13 KiB
Rust

// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#![allow(dead_code)]
use edn;
use errors::{ErrorKind, Result};
use edn::types::Value;
use edn::symbols;
use entids;
use db::TypedSQLValue;
use mentat_tx::entities::Entity;
use mentat_tx_parser;
use mentat_core::{
IdentMap,
Schema,
TypedValue,
values,
};
use schema::SchemaBuilding;
use types::{Partition, PartitionMap};
/// The first transaction ID applied to the knowledge base.
///
/// This is the start of the :db.part/tx partition.
pub const TX0: i64 = 0x10000000;
lazy_static! {
static ref V1_IDENTS: Vec<(symbols::NamespacedKeyword, i64)> = {
vec![(ns_keyword!("db", "ident"), entids::DB_IDENT),
(ns_keyword!("db.part", "db"), entids::DB_PART_DB),
(ns_keyword!("db", "txInstant"), entids::DB_TX_INSTANT),
(ns_keyword!("db.install", "partition"), entids::DB_INSTALL_PARTITION),
(ns_keyword!("db.install", "valueType"), entids::DB_INSTALL_VALUE_TYPE),
(ns_keyword!("db.install", "attribute"), entids::DB_INSTALL_ATTRIBUTE),
(ns_keyword!("db", "valueType"), entids::DB_VALUE_TYPE),
(ns_keyword!("db", "cardinality"), entids::DB_CARDINALITY),
(ns_keyword!("db", "unique"), entids::DB_UNIQUE),
(ns_keyword!("db", "isComponent"), entids::DB_IS_COMPONENT),
(ns_keyword!("db", "index"), entids::DB_INDEX),
(ns_keyword!("db", "fulltext"), entids::DB_FULLTEXT),
(ns_keyword!("db", "noHistory"), entids::DB_NO_HISTORY),
(ns_keyword!("db", "add"), entids::DB_ADD),
(ns_keyword!("db", "retract"), entids::DB_RETRACT),
(ns_keyword!("db.part", "user"), entids::DB_PART_USER),
(ns_keyword!("db.part", "tx"), entids::DB_PART_TX),
(ns_keyword!("db", "excise"), entids::DB_EXCISE),
(ns_keyword!("db.excise", "attrs"), entids::DB_EXCISE_ATTRS),
(ns_keyword!("db.excise", "beforeT"), entids::DB_EXCISE_BEFORE_T),
(ns_keyword!("db.excise", "before"), entids::DB_EXCISE_BEFORE),
(ns_keyword!("db.alter", "attribute"), entids::DB_ALTER_ATTRIBUTE),
(ns_keyword!("db.type", "ref"), entids::DB_TYPE_REF),
(ns_keyword!("db.type", "keyword"), entids::DB_TYPE_KEYWORD),
(ns_keyword!("db.type", "long"), entids::DB_TYPE_LONG),
(ns_keyword!("db.type", "double"), entids::DB_TYPE_DOUBLE),
(ns_keyword!("db.type", "string"), entids::DB_TYPE_STRING),
(ns_keyword!("db.type", "boolean"), entids::DB_TYPE_BOOLEAN),
(ns_keyword!("db.type", "instant"), entids::DB_TYPE_INSTANT),
(ns_keyword!("db.type", "bytes"), entids::DB_TYPE_BYTES),
(ns_keyword!("db.cardinality", "one"), entids::DB_CARDINALITY_ONE),
(ns_keyword!("db.cardinality", "many"), entids::DB_CARDINALITY_MANY),
(ns_keyword!("db.unique", "value"), entids::DB_UNIQUE_VALUE),
(ns_keyword!("db.unique", "identity"), entids::DB_UNIQUE_IDENTITY),
(ns_keyword!("db", "doc"), entids::DB_DOC),
]
};
static ref V2_IDENTS: Vec<(symbols::NamespacedKeyword, i64)> = {
[(*V1_IDENTS).clone(),
vec![(ns_keyword!("db.schema", "version"), entids::DB_SCHEMA_VERSION),
(ns_keyword!("db.schema", "attribute"), entids::DB_SCHEMA_ATTRIBUTE),
]].concat()
};
static ref V1_PARTS: Vec<(symbols::NamespacedKeyword, i64, i64)> = {
vec![(ns_keyword!("db.part", "db"), 0, (1 + V1_IDENTS.len()) as i64),
(ns_keyword!("db.part", "user"), 0x10000, 0x10000),
(ns_keyword!("db.part", "tx"), TX0, TX0),
]
};
static ref V2_PARTS: Vec<(symbols::NamespacedKeyword, i64, i64)> = {
vec![(ns_keyword!("db.part", "db"), 0, (1 + V2_IDENTS.len()) as i64),
(ns_keyword!("db.part", "user"), 0x10000, 0x10000),
(ns_keyword!("db.part", "tx"), TX0, TX0),
]
};
static ref V1_SYMBOLIC_SCHEMA: Value = {
let s = r#"
{:db/ident {:db/valueType :db.type/keyword
:db/cardinality :db.cardinality/one
:db/index true
:db/unique :db.unique/identity}
:db.install/partition {:db/valueType :db.type/ref
:db/cardinality :db.cardinality/many}
:db.install/valueType {:db/valueType :db.type/ref
:db/cardinality :db.cardinality/many}
:db.install/attribute {:db/valueType :db.type/ref
:db/cardinality :db.cardinality/many}
;; TODO: support user-specified functions in the future.
;; :db.install/function {:db/valueType :db.type/ref
;; :db/cardinality :db.cardinality/many}
:db/txInstant {:db/valueType :db.type/long
:db/cardinality :db.cardinality/one
:db/index true}
:db/valueType {:db/valueType :db.type/ref
:db/cardinality :db.cardinality/one}
:db/cardinality {:db/valueType :db.type/ref
:db/cardinality :db.cardinality/one}
:db/doc {:db/valueType :db.type/string
:db/cardinality :db.cardinality/one}
:db/unique {:db/valueType :db.type/ref
:db/cardinality :db.cardinality/one}
:db/isComponent {:db/valueType :db.type/boolean
:db/cardinality :db.cardinality/one}
:db/index {:db/valueType :db.type/boolean
:db/cardinality :db.cardinality/one}
:db/fulltext {:db/valueType :db.type/boolean
:db/cardinality :db.cardinality/one}
:db/noHistory {:db/valueType :db.type/boolean
:db/cardinality :db.cardinality/one}}"#;
edn::parse::value(s)
.map(|v| v.without_spans())
.map_err(|_| ErrorKind::BadBootstrapDefinition("Unable to parse V1_SYMBOLIC_SCHEMA".into()))
.unwrap()
};
static ref V2_SYMBOLIC_SCHEMA: Value = {
let s = r#"
{:db.alter/attribute {:db/valueType :db.type/ref
:db/cardinality :db.cardinality/many}
:db.schema/version {:db/valueType :db.type/long
:db/cardinality :db.cardinality/one}
;; unique-value because an attribute can only belong to a single
;; schema fragment.
:db.schema/attribute {:db/valueType :db.type/ref
:db/index true
:db/unique :db.unique/value
:db/cardinality :db.cardinality/many}}"#;
let right = edn::parse::value(s)
.map(|v| v.without_spans())
.map_err(|_| ErrorKind::BadBootstrapDefinition("Unable to parse V2_SYMBOLIC_SCHEMA".into()))
.unwrap();
edn::utils::merge(&V1_SYMBOLIC_SCHEMA, &right)
.ok_or(ErrorKind::BadBootstrapDefinition("Unable to parse V2_SYMBOLIC_SCHEMA".into()))
.unwrap()
};
}
/// Convert (ident, entid) pairs into [:db/add IDENT :db/ident IDENT] `Value` instances.
fn idents_to_assertions(idents: &[(symbols::NamespacedKeyword, i64)]) -> Vec<Value> {
idents
.into_iter()
.map(|&(ref ident, _)| {
let value = Value::NamespacedKeyword(ident.clone());
Value::Vector(vec![values::DB_ADD.clone(), value.clone(), values::DB_IDENT.clone(), value.clone()])
})
.collect()
}
/// Convert {:ident {:key :value ...} ...} to
/// vec![(symbols::NamespacedKeyword(:ident), symbols::NamespacedKeyword(:key), TypedValue(:value)), ...].
///
/// Such triples are closer to what the transactor will produce when processing attribute
/// assertions.
fn symbolic_schema_to_triples(ident_map: &IdentMap, symbolic_schema: &Value) -> Result<Vec<(symbols::NamespacedKeyword, symbols::NamespacedKeyword, TypedValue)>> {
// Failure here is a coding error, not a runtime error.
let mut triples: Vec<(symbols::NamespacedKeyword, symbols::NamespacedKeyword, TypedValue)> = vec![];
// TODO: Consider `flat_map` and `map` rather than loop.
match *symbolic_schema {
Value::Map(ref m) => {
for (ident, mp) in m {
let ident = match ident {
&Value::NamespacedKeyword(ref ident) => ident,
_ => bail!(ErrorKind::BadBootstrapDefinition(format!("Expected namespaced keyword for ident but got '{:?}'", ident)))
};
match *mp {
Value::Map(ref mpp) => {
for (attr, value) in mpp {
let attr = match attr {
&Value::NamespacedKeyword(ref attr) => attr,
_ => bail!(ErrorKind::BadBootstrapDefinition(format!("Expected namespaced keyword for attr but got '{:?}'", attr)))
};
// We have symbolic idents but the transactor handles entids. Ad-hoc
// convert right here. This is a fundamental limitation on the
// bootstrap symbolic schema format; we can't represent "real" keywords
// at this time.
//
// TODO: remove this limitation, perhaps by including a type tag in the
// bootstrap symbolic schema, or by representing the initial bootstrap
// schema directly as Rust data.
let typed_value = match TypedValue::from_edn_value(value) {
Some(TypedValue::Keyword(ref k)) => {
ident_map.get(k)
.map(|entid| TypedValue::Ref(*entid))
.ok_or(ErrorKind::UnrecognizedIdent(k.to_string()))?
},
Some(v) => v,
_ => bail!(ErrorKind::BadBootstrapDefinition(format!("Expected Mentat typed value for value but got '{:?}'", value)))
};
triples.push((ident.clone(), attr.clone(), typed_value));
}
},
_ => bail!(ErrorKind::BadBootstrapDefinition("Expected {:db/ident {:db/attr value ...} ...}".into()))
}
}
},
_ => bail!(ErrorKind::BadBootstrapDefinition("Expected {...}".into()))
}
Ok(triples)
}
/// Convert {IDENT {:key :value ...} ...} to [[:db/add IDENT :key :value] ...].
fn symbolic_schema_to_assertions(symbolic_schema: &Value) -> Result<Vec<Value>> {
// Failure here is a coding error, not a runtime error.
let mut assertions: Vec<Value> = vec![];
match *symbolic_schema {
Value::Map(ref m) => {
for (ident, mp) in m {
match *mp {
Value::Map(ref mpp) => {
for (attr, value) in mpp {
assertions.push(Value::Vector(vec![values::DB_ADD.clone(),
ident.clone(),
attr.clone(),
value.clone()]));
}
},
_ => bail!(ErrorKind::BadBootstrapDefinition("Expected {:db/ident {:db/attr value ...} ...}".into()))
}
}
},
_ => bail!(ErrorKind::BadBootstrapDefinition("Expected {...}".into()))
}
Ok(assertions)
}
pub fn bootstrap_partition_map() -> PartitionMap {
V2_PARTS[..].iter()
.map(|&(ref part, start, index)| (part.to_string(), Partition::new(start, index)))
.collect()
}
pub fn bootstrap_ident_map() -> IdentMap {
V2_IDENTS[..].iter()
.map(|&(ref ident, entid)| (ident.clone(), entid))
.collect()
}
pub fn bootstrap_schema() -> Schema {
let ident_map = bootstrap_ident_map();
let bootstrap_triples = symbolic_schema_to_triples(&ident_map, &V2_SYMBOLIC_SCHEMA).unwrap();
Schema::from_ident_map_and_triples(ident_map, bootstrap_triples).unwrap()
}
pub fn bootstrap_entities() -> Vec<Entity> {
let bootstrap_assertions: Value = Value::Vector([
symbolic_schema_to_assertions(&V2_SYMBOLIC_SCHEMA).unwrap(),
idents_to_assertions(&V2_IDENTS[..]),
].concat());
// Failure here is a coding error (since the inputs are fixed), not a runtime error.
// TODO: represent these bootstrap data errors rather than just panicing.
let bootstrap_entities: Vec<Entity> = mentat_tx_parser::Tx::parse(bootstrap_assertions.with_spans()).unwrap();
return bootstrap_entities;
}