mentat/edn/src/edn.rustpeg
Nick Alexander 76507623ac Part 4: Prepare EDN Entity type for interning tempids during parsing.
This is all part of moving the entity builder away from building term
instances and toward building entity instances.  One of the nice
things that the existing term interface does is allow consumers to use
lightweight reference counted tempid handles; I don't want to lose
that, so we'll build it into the entity data structures directly.
2018-07-05 11:17:20 -07:00

491 lines
18 KiB
Text

/* -*- comment-start: "//"; -*- */
/* vim: set filetype=rust.rustpeg */
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use std::collections::{BTreeSet, BTreeMap, LinkedList};
use std::iter::FromIterator;
use std::f64::{NAN, INFINITY, NEG_INFINITY};
use chrono::{
DateTime,
TimeZone,
Utc
};
use num::BigInt;
use ordered_float::OrderedFloat;
use uuid::Uuid;
use entities::*;
use query;
use query::FromValue;
use symbols::*;
use types::{SpannedValue, Span, ValueAndSpan};
// Goal: Be able to parse https://github.com/edn-format/edn
// Also extensible to help parse http://docs.datomic.com/query.html
// Debugging hint: test using `cargo test --features peg/trace -- --nocapture`
// to trace where the parser is failing
// TODO: Support tagged elements
// TODO: Support discard
pub nil -> SpannedValue = "nil" { SpannedValue::Nil }
pub nan -> SpannedValue = "#f" whitespace+ "NaN" { SpannedValue::Float(OrderedFloat(NAN)) }
pub infinity -> SpannedValue = "#f" whitespace+ s:$(sign) "Infinity"
{ SpannedValue::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY })) }
pub boolean -> SpannedValue
= "true" { SpannedValue::Boolean(true) }
/ "false" { SpannedValue::Boolean(false) }
digit = [0-9]
alphanumeric = [0-9a-zA-Z]
octaldigit = [0-7]
validbase = [3][0-6] / [12][0-9] / [2-9]
hex = [0-9a-fA-F]
sign = [+-]
pub raw_bigint -> BigInt = b:$( sign? digit+ ) "N"
{ b.parse::<BigInt>().unwrap() }
pub raw_octalinteger -> i64 = "0" i:$( octaldigit+ )
{ i64::from_str_radix(i, 8).unwrap() }
pub raw_hexinteger -> i64 = "0x" i:$( hex+ )
{ i64::from_str_radix(i, 16).unwrap() }
pub raw_basedinteger -> i64 = b:$( validbase ) "r" i:$( alphanumeric+ )
{ i64::from_str_radix(i, b.parse::<u32>().unwrap()).unwrap() }
pub raw_integer -> i64 = i:$( sign? digit+ ) !("." / ([eE]))
{ i.parse::<i64>().unwrap() }
pub raw_float -> OrderedFloat<f64> = f:$(sign? digit+ ("." digit+)? ([eE] sign? digit+)?)
{ OrderedFloat(f.parse::<f64>().unwrap()) }
pub bigint -> SpannedValue = v:raw_bigint { SpannedValue::BigInteger(v) }
pub octalinteger -> SpannedValue = v:raw_octalinteger { SpannedValue::Integer(v) }
pub hexinteger -> SpannedValue = v:raw_hexinteger { SpannedValue::Integer(v) }
pub basedinteger -> SpannedValue = v:raw_basedinteger { SpannedValue::Integer(v) }
pub integer -> SpannedValue = v:raw_integer { SpannedValue::Integer(v) }
pub float -> SpannedValue = v:raw_float { SpannedValue::Float(v) }
number -> SpannedValue = ( bigint / basedinteger / hexinteger / octalinteger / integer / float )
// TODO: standalone characters: \<char>, \newline, \return, \space and \tab.
string_special_char -> &'input str = "\\" $([\\"ntr])
string_normal_chars -> &'input str = $([^"\\]+)
// This is what we need to do in order to unescape. We can't just match the entire string slice:
// we get a Vec<&str> from rust-peg, where some of the parts might be unescaped special characters,
// and we join it together to form an output string.
// E.g., input = r#"\"foo\\\\bar\""#
// output = [quote, "foo", backslash, "bar", quote]
// result = r#""foo\\bar""#
// For the typical case, string_normal_chars will match multiple, leading to a single-element vec.
pub raw_text -> String = "\"" t:((string_special_char / string_normal_chars)*) "\""
{ t.join(&"").to_string() }
pub text -> SpannedValue
= v:raw_text { SpannedValue::Text(v) }
// RFC 3339 timestamps. #inst "1985-04-12T23:20:50.52Z"
// We accept an arbitrary depth of decimals.
// Note that we discard the timezone information -- all times are translated to UTC.
inst_string -> DateTime<Utc> =
"#inst" whitespace+ "\"" d:$( [0-9]*<4> "-" [0-2][0-9] "-" [0-3][0-9]
"T"
[0-2][0-9] ":" [0-5][0-9] ":" [0-6][0-9]
("." [0-9]+)?
("Z" / (("+" / "-") [0-2][0-9] ":" [0-5][0-9]))
)
"\"" {?
DateTime::parse_from_rfc3339(d)
.map(|t| t.with_timezone(&Utc))
.map_err(|_| "invalid datetime") // Oh, rustpeg.
}
inst_micros -> DateTime<Utc> =
"#instmicros" whitespace+ d:$( digit+ ) {
let micros = d.parse::<i64>().unwrap();
let seconds: i64 = micros / 1000000;
let nanos: u32 = ((micros % 1000000).abs() as u32) * 1000;
Utc.timestamp(seconds, nanos)
}
inst_millis -> DateTime<Utc> =
"#instmillis" whitespace+ d:$( digit+ ) {
let millis = d.parse::<i64>().unwrap();
let seconds: i64 = millis / 1000;
let nanos: u32 = ((millis % 1000).abs() as u32) * 1000000;
Utc.timestamp(seconds, nanos)
}
inst -> SpannedValue = t:(inst_millis / inst_micros / inst_string)
{ SpannedValue::Instant(t) }
uuid_string -> Uuid =
"\"" u:$( [a-f0-9]*<8> "-" [a-f0-9]*<4> "-" [a-f0-9]*<4> "-" [a-f0-9]*<4> "-" [a-f0-9]*<12> ) "\"" {
Uuid::parse_str(u).expect("this is a valid UUID string")
}
pub uuid -> SpannedValue = "#uuid" whitespace+ u:uuid_string
{ SpannedValue::Uuid(u) }
namespace_divider = "."
namespace_separator = "/"
// TODO: Be more picky here
// Keywords follow the rules of symbols, except they can (and must) begin with :
// e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
symbol_char_initial = [a-zA-Z0-9*!_?$%&=<>]
symbol_char_subsequent = [a-zA-Z0-9*!_?$%&=<>-]
symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
symbol_name = ( symbol_char_initial+ symbol_char_subsequent* )
plain_symbol_name = symbol_name / "..." / "."
keyword_prefix = ":"
pub symbol -> SpannedValue =
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(plain_symbol_name)
{ SpannedValue::from_symbol(ns, n) }
/ #expected("symbol")
pub keyword -> SpannedValue =
keyword_prefix
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name)
{ SpannedValue::from_keyword(ns, n) }
/ #expected("keyword")
pub list -> SpannedValue = "(" __ v:(value)* __ ")"
{ SpannedValue::List(LinkedList::from_iter(v)) }
pub vector -> SpannedValue = "[" __ v:(value)* __ "]"
{ SpannedValue::Vector(v) }
pub set -> SpannedValue = "#{" __ v:(value)* __ "}"
{ SpannedValue::Set(BTreeSet::from_iter(v)) }
pair -> (ValueAndSpan, ValueAndSpan) =
k:(value) v:(value) {
(k, v)
}
pub map -> SpannedValue = "{" __ v:(pair)* __ "}"
{ SpannedValue::Map(BTreeMap::from_iter(v)) }
// It's important that float comes before integer or the parser assumes that
// floats are integers and fails to parse
pub value -> ValueAndSpan =
__ start:#position v:(nil / nan / infinity / boolean / number / inst / uuid / text / keyword / symbol / list / vector / map / set) end:#position __ {
ValueAndSpan {
inner: v,
span: Span::new(start, end)
}
}
/ #expected("value")
atom -> ValueAndSpan
= v:value {? if v.is_atom() { Ok(v) } else { Err("expected atom") } }
// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
whitespace = #quiet<[ \r\n\t,]>
comment = #quiet<";" [^\r\n]* [\r\n]?>
__ = (whitespace / comment)*
// Transaction entity parser starts here.
pub op -> OpType
= ":db/add" { OpType::Add }
/ ":db/retract" { OpType::Retract }
raw_keyword -> Keyword =
keyword_prefix
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name) {
match ns {
Some(ns) => Keyword::namespaced(ns, n),
None => Keyword::plain(n),
}
}
/ #expected("keyword")
raw_forward_keyword -> Keyword
= v:raw_keyword {? if v.is_forward() { Ok(v) } else { Err("expected :forward or :forward/keyword") } }
raw_backward_keyword -> Keyword
= v:raw_keyword {? if v.is_backward() { Ok(v) } else { Err("expected :_backword or :backward/_keyword") } }
raw_namespaced_keyword -> Keyword
= keyword_prefix ns:$(symbol_namespace) namespace_separator n:$(symbol_name) { Keyword::namespaced(ns, n) }
/ #expected("namespaced keyword")
raw_forward_namespaced_keyword -> Keyword
= v:raw_namespaced_keyword {? if v.is_forward() { Ok(v) } else { Err("expected namespaced :forward/keyword") } }
raw_backward_namespaced_keyword -> Keyword
= v:raw_namespaced_keyword {? if v.is_backward() { Ok(v) } else { Err("expected namespaced :backward/_keyword") } }
entid -> EntidOrIdent
= v:( raw_basedinteger / raw_hexinteger / raw_octalinteger / raw_integer ) { EntidOrIdent::Entid(v) }
/ v:raw_namespaced_keyword { EntidOrIdent::Ident(v) }
/ #expected("entid")
forward_entid -> EntidOrIdent
= v:( raw_basedinteger / raw_hexinteger / raw_octalinteger / raw_integer ) { EntidOrIdent::Entid(v) }
/ v:raw_forward_namespaced_keyword { EntidOrIdent::Ident(v) }
/ #expected("forward entid")
backward_entid -> EntidOrIdent
= v:raw_backward_namespaced_keyword { EntidOrIdent::Ident(v.to_reversed()) }
/ #expected("backward entid")
lookup_ref -> LookupRef<ValueAndSpan>
= "(" __ "lookup-ref" __ a:(entid) __ v:(value) __ ")" { LookupRef { a: AttributePlace::Entid(a), v } }
/ #expected("lookup-ref")
tx_function -> TxFunction
= "(" __ n:$(symbol_name) __ ")" { TxFunction { op: PlainSymbol::plain(n) } }
entity_place -> EntityPlace<ValueAndSpan>
= v:raw_text { EntityPlace::TempId(TempId::External(v).into()) }
/ v:entid { EntityPlace::Entid(v) }
/ v:lookup_ref { EntityPlace::LookupRef(v) }
/ v:tx_function { EntityPlace::TxFunction(v) }
value_place_pair -> (EntidOrIdent, ValuePlace<ValueAndSpan>)
= k:(entid) __ v:(value_place) { (k, v) }
map_notation -> MapNotation<ValueAndSpan>
= "{" __ kvs:(value_place_pair*) __ "}" { kvs.into_iter().collect() }
value_place -> ValuePlace<ValueAndSpan>
= __ v:lookup_ref __ { ValuePlace::LookupRef(v) }
/ __ v:tx_function __ { ValuePlace::TxFunction(v) }
/ __ "[" __ vs:(value_place*) __ "]" __ { ValuePlace::Vector(vs) }
/ __ v:map_notation __ { ValuePlace::MapNotation(v) }
/ __ v:atom __ { ValuePlace::Atom(v) }
pub entity -> Entity<ValueAndSpan>
= __ "[" __ op:(op) __ e:(entity_place) __ a:(forward_entid) __ v:(value_place) __ "]" __ { Entity::AddOrRetract { op, e: e, a: AttributePlace::Entid(a), v: v } }
/ __ "[" __ op:(op) __ e:(value_place) __ a:(backward_entid) __ v:(entity_place) __ "]" __ { Entity::AddOrRetract { op, e: v, a: AttributePlace::Entid(a), v: e } }
/ __ map:map_notation __ { Entity::MapNotation(map) }
/ #expected("entity")
pub entities -> Vec<Entity<ValueAndSpan>>
= __ "[" __ es:(entity*) __ "]" __ { es }
// Query parser starts here.
//
// We expect every rule except the `raw_*` rules to eat whitespace
// (with `__`) at its start and finish. That means that every string
// pattern (say "[") should be bracketed on either side with either a
// whitespace-eating rule or an explicit whitespace eating `__`.
query_function -> query::QueryFunction
= __ n:$(symbol_name) __ {? query::QueryFunction::from_symbol(&PlainSymbol::plain(n)).ok_or("expected query function") }
fn_arg -> query::FnArg
= v:value {? query::FnArg::from_value(&v).ok_or("expected query function argument") }
/ __ "[" args:fn_arg+ "]" __ { query::FnArg::Vector(args) }
find_elem -> query::Element
= __ v:variable __ { query::Element::Variable(v) }
/ __ "(" __ "the" v:variable ")" __ { query::Element::Corresponding(v) }
/ __ "(" __ "pull" var:variable "[" patterns:pull_attribute+ "]" __ ")" __ { query::Element::Pull(query::Pull { var, patterns }) }
/ __ "(" func:query_function args:fn_arg* ")" __ { query::Element::Aggregate(query::Aggregate { func, args }) }
find_spec -> query::FindSpec
= f:find_elem "." __ { query::FindSpec::FindScalar(f) }
/ fs:find_elem+ { query::FindSpec::FindRel(fs) }
/ __ "[" f:find_elem __ "..." __ "]" __ { query::FindSpec::FindColl(f) }
/ __ "[" fs:find_elem+ "]" __ { query::FindSpec::FindTuple(fs) }
pull_attribute -> query::PullAttributeSpec
= __ "*" __ { query::PullAttributeSpec::Wildcard }
/ __ k:raw_forward_namespaced_keyword __ alias:(":as" __ alias:raw_forward_keyword __ { alias })? {
let attribute = query::PullConcreteAttribute::Ident(::std::rc::Rc::new(k));
let alias = alias.map(|alias| ::std::rc::Rc::new(alias));
query::PullAttributeSpec::Attribute(
query::NamedPullAttribute {
attribute,
alias: alias,
})
}
limit -> query::Limit
= __ v:variable __ { query::Limit::Variable(v) }
/ __ n:(raw_octalinteger / raw_hexinteger / raw_basedinteger / raw_integer) __ {?
if n > 0 {
Ok(query::Limit::Fixed(n as u64))
} else {
Err("expected positive integer")
}
}
order -> query::Order
= __ "(" __ "asc" v:variable ")" __ { query::Order(query::Direction::Ascending, v) }
/ __ "(" __ "desc" v:variable ")" __ { query::Order(query::Direction::Descending, v) }
/ v:variable { query::Order(query::Direction::Ascending, v) }
pattern_value_place -> query::PatternValuePlace
= v:value {? query::PatternValuePlace::from_value(&v).ok_or("expected pattern_value_place") }
pattern_non_value_place -> query::PatternNonValuePlace
= v:value {? query::PatternNonValuePlace::from_value(&v).ok_or("expected pattern_non_value_place") }
pattern -> query::WhereClause
= __ "["
src:src_var?
e:pattern_non_value_place
a:pattern_non_value_place
v:pattern_value_place?
tx:pattern_non_value_place?
"]" __
{?
let v = v.unwrap_or(query::PatternValuePlace::Placeholder);
let tx = tx.unwrap_or(query::PatternNonValuePlace::Placeholder);
// Pattern::new takes care of reversal of reversed
// attributes: [?x :foo/_bar ?y] turns into
// [?y :foo/bar ?x].
//
// This is a bit messy: the inner conversion to a Pattern can
// fail if the input is something like
//
// ```edn
// [?x :foo/_reversed 23.4]
// ```
//
// because
//
// ```edn
// [23.4 :foo/reversed ?x]
// ```
//
// is nonsense. That leaves us with a nested optional, which we unwrap here.
query::Pattern::new(src, e, a, v, tx)
.map(query::WhereClause::Pattern)
.ok_or("expected pattern")
}
// TODO: this shouldn't be checked at parse time.
rule_vars -> BTreeSet<query::Variable>
= vs:variable+ {?
let given = vs.len();
let set: BTreeSet<query::Variable> = vs.into_iter().collect();
if given != set.len() {
Err("expected unique variables")
} else {
Ok(set)
}
}
or_pattern_clause -> query::OrWhereClause
= clause:where_clause { query::OrWhereClause::Clause(clause) }
or_and_clause -> query::OrWhereClause
= __ "(" __ "and" clauses:where_clause+ ")" __ { query::OrWhereClause::And(clauses) }
or_where_clause -> query::OrWhereClause
= or_pattern_clause
/ or_and_clause
or_clause -> query::WhereClause
= __ "(" __ "or" clauses:or_where_clause+ ")" __ {
query::WhereClause::OrJoin(query::OrJoin::new(query::UnifyVars::Implicit, clauses))
}
or_join_clause -> query::WhereClause
= __ "(" __ "or-join" __ "[" vars:rule_vars "]" clauses:or_where_clause+ ")" __ {
query::WhereClause::OrJoin(query::OrJoin::new(query::UnifyVars::Explicit(vars), clauses))
}
not_clause -> query::WhereClause
= __ "(" __ "not" clauses:where_clause+ ")" __ {
query::WhereClause::NotJoin(query::NotJoin::new(query::UnifyVars::Implicit, clauses))
}
not_join_clause -> query::WhereClause
= __ "(" __ "not-join" __ "[" vars:rule_vars "]" clauses:where_clause+ ")" __ {
query::WhereClause::NotJoin(query::NotJoin::new(query::UnifyVars::Explicit(vars), clauses))
}
type_annotation -> query::WhereClause
= __ "[" __ "(" __ "type" var:variable __ ty:raw_keyword __ ")" __ "]" __ {
query::WhereClause::TypeAnnotation(
query::TypeAnnotation {
value_type: ty,
variable: var,
})
}
pred -> query::WhereClause
= __ "[" __ "(" func:query_function args:fn_arg* ")" __ "]" __ {
query::WhereClause::Pred(
query::Predicate {
operator: func.0,
args: args,
})
}
pub where_fn -> query::WhereClause
= __ "[" __ "(" func:query_function args:fn_arg* ")" __ binding:binding "]" __ {
query::WhereClause::WhereFn(
query::WhereFn {
operator: func.0,
args: args,
binding,
})
}
where_clause -> query::WhereClause
// Right now we only support patterns and predicates. See #239 for more.
= pattern
/ or_join_clause
/ or_clause
/ not_join_clause
/ not_clause
/ type_annotation
/ pred
/ where_fn
query_part -> query::QueryPart
= __ ":find" fs:find_spec { query::QueryPart::FindSpec(fs) }
/ __ ":in" in_vars:variable+ { query::QueryPart::InVars(in_vars) }
/ __ ":limit" l:limit { query::QueryPart::Limit(l) }
/ __ ":order" os:order+ { query::QueryPart::Order(os) }
/ __ ":where" ws:where_clause+ { query::QueryPart::WhereClauses(ws) }
/ __ ":with" with_vars:variable+ { query::QueryPart::WithVars(with_vars) }
pub parse_query -> query::ParsedQuery
= __ "[" qps:query_part+ "]" __ {? query::ParsedQuery::from_parts(qps) }
variable -> query::Variable
= v:value {? query::Variable::from_value(&v).ok_or("expected variable") }
src_var -> query::SrcVar
= v:value {? query::SrcVar::from_value(&v).ok_or("expected src_var") }
variable_or_placeholder -> query::VariableOrPlaceholder
= v:variable { query::VariableOrPlaceholder::Variable(v) }
/ __ "_" __ { query::VariableOrPlaceholder::Placeholder }
binding -> query::Binding
= __ "[" __ "[" vs:variable_or_placeholder+ "]" __ "]" __ { query::Binding::BindRel(vs) }
/ __ "[" v:variable "..." __ "]" __ { query::Binding::BindColl(v) }
/ __ "[" vs:variable_or_placeholder+ "]" __ { query::Binding::BindTuple(vs) }
/ v:variable { query::Binding::BindScalar(v) }