mentat/edn/src/edn.rustpeg

492 lines
18 KiB
Text
Raw Normal View History

/* -*- comment-start: "//"; -*- */
/* vim: set filetype=rust.rustpeg */
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
use std::collections::{BTreeSet, BTreeMap, LinkedList};
use std::iter::FromIterator;
use std::f64::{NAN, INFINITY, NEG_INFINITY};
use chrono::{
DateTime,
TimeZone,
Utc
};
use num::BigInt;
use ordered_float::OrderedFloat;
use uuid::Uuid;
use entities::*;
use query;
use query::FromValue;
use symbols::*;
use types::{SpannedValue, Span, ValueAndSpan};
// Goal: Be able to parse https://github.com/edn-format/edn
// Also extensible to help parse http://docs.datomic.com/query.html
// Debugging hint: test using `cargo test --features peg/trace -- --nocapture`
// to trace where the parser is failing
// TODO: Support tagged elements
// TODO: Support discard
pub nil -> SpannedValue = "nil" { SpannedValue::Nil }
pub nan -> SpannedValue = "#f" whitespace+ "NaN" { SpannedValue::Float(OrderedFloat(NAN)) }
pub infinity -> SpannedValue = "#f" whitespace+ s:$(sign) "Infinity"
{ SpannedValue::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY })) }
pub boolean -> SpannedValue
= "true" { SpannedValue::Boolean(true) }
/ "false" { SpannedValue::Boolean(false) }
digit = [0-9]
alphanumeric = [0-9a-zA-Z]
octaldigit = [0-7]
validbase = [3][0-6] / [12][0-9] / [2-9]
hex = [0-9a-fA-F]
sign = [+-]
pub raw_bigint -> BigInt = b:$( sign? digit+ ) "N"
{ b.parse::<BigInt>().unwrap() }
pub raw_octalinteger -> i64 = "0" i:$( octaldigit+ )
{ i64::from_str_radix(i, 8).unwrap() }
pub raw_hexinteger -> i64 = "0x" i:$( hex+ )
{ i64::from_str_radix(i, 16).unwrap() }
pub raw_basedinteger -> i64 = b:$( validbase ) "r" i:$( alphanumeric+ )
{ i64::from_str_radix(i, b.parse::<u32>().unwrap()).unwrap() }
pub raw_integer -> i64 = i:$( sign? digit+ ) !("." / ([eE]))
{ i.parse::<i64>().unwrap() }
pub raw_float -> OrderedFloat<f64> = f:$(sign? digit+ ("." digit+)? ([eE] sign? digit+)?)
{ OrderedFloat(f.parse::<f64>().unwrap()) }
pub bigint -> SpannedValue = v:raw_bigint { SpannedValue::BigInteger(v) }
pub octalinteger -> SpannedValue = v:raw_octalinteger { SpannedValue::Integer(v) }
pub hexinteger -> SpannedValue = v:raw_hexinteger { SpannedValue::Integer(v) }
pub basedinteger -> SpannedValue = v:raw_basedinteger { SpannedValue::Integer(v) }
pub integer -> SpannedValue = v:raw_integer { SpannedValue::Integer(v) }
pub float -> SpannedValue = v:raw_float { SpannedValue::Float(v) }
number -> SpannedValue = ( bigint / basedinteger / hexinteger / octalinteger / integer / float )
// TODO: standalone characters: \<char>, \newline, \return, \space and \tab.
string_special_char -> &'input str = "\\" $([\\"ntr])
string_normal_chars -> &'input str = $([^"\\]+)
// This is what we need to do in order to unescape. We can't just match the entire string slice:
// we get a Vec<&str> from rust-peg, where some of the parts might be unescaped special characters,
// and we join it together to form an output string.
// E.g., input = r#"\"foo\\\\bar\""#
// output = [quote, "foo", backslash, "bar", quote]
// result = r#""foo\\bar""#
// For the typical case, string_normal_chars will match multiple, leading to a single-element vec.
pub raw_text -> String = "\"" t:((string_special_char / string_normal_chars)*) "\""
{ t.join(&"").to_string() }
pub text -> SpannedValue
= v:raw_text { SpannedValue::Text(v) }
// RFC 3339 timestamps. #inst "1985-04-12T23:20:50.52Z"
// We accept an arbitrary depth of decimals.
// Note that we discard the timezone information -- all times are translated to UTC.
inst_string -> DateTime<Utc> =
"#inst" whitespace+ "\"" d:$( [0-9]*<4> "-" [0-2][0-9] "-" [0-3][0-9]
"T"
[0-2][0-9] ":" [0-5][0-9] ":" [0-6][0-9]
("." [0-9]+)?
("Z" / (("+" / "-") [0-2][0-9] ":" [0-5][0-9]))
)
"\"" {?
DateTime::parse_from_rfc3339(d)
.map(|t| t.with_timezone(&Utc))
.map_err(|_| "invalid datetime") // Oh, rustpeg.
}
inst_micros -> DateTime<Utc> =
"#instmicros" whitespace+ d:$( digit+ ) {
let micros = d.parse::<i64>().unwrap();
let seconds: i64 = micros / 1000000;
let nanos: u32 = ((micros % 1000000).abs() as u32) * 1000;
Utc.timestamp(seconds, nanos)
}
inst_millis -> DateTime<Utc> =
"#instmillis" whitespace+ d:$( digit+ ) {
let millis = d.parse::<i64>().unwrap();
let seconds: i64 = millis / 1000;
let nanos: u32 = ((millis % 1000).abs() as u32) * 1000000;
Utc.timestamp(seconds, nanos)
}
inst -> SpannedValue = t:(inst_millis / inst_micros / inst_string)
{ SpannedValue::Instant(t) }
uuid_string -> Uuid =
"\"" u:$( [a-f0-9]*<8> "-" [a-f0-9]*<4> "-" [a-f0-9]*<4> "-" [a-f0-9]*<4> "-" [a-f0-9]*<12> ) "\"" {
Uuid::parse_str(u).expect("this is a valid UUID string")
}
pub uuid -> SpannedValue = "#uuid" whitespace+ u:uuid_string
{ SpannedValue::Uuid(u) }
namespace_divider = "."
namespace_separator = "/"
// TODO: Be more picky here
// Keywords follow the rules of symbols, except they can (and must) begin with :
// e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
symbol_char_initial = [a-zA-Z0-9*!_?$%&=<>]
symbol_char_subsequent = [a-zA-Z0-9*!_?$%&=<>-]
symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
symbol_name = ( symbol_char_initial+ symbol_char_subsequent* )
plain_symbol_name = symbol_name / "..." / "."
keyword_prefix = ":"
pub symbol -> SpannedValue =
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(plain_symbol_name)
{ SpannedValue::from_symbol(ns, n) }
/ #expected("symbol")
pub keyword -> SpannedValue =
keyword_prefix
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name)
{ SpannedValue::from_keyword(ns, n) }
/ #expected("keyword")
pub list -> SpannedValue = "(" __ v:(value)* __ ")"
{ SpannedValue::List(LinkedList::from_iter(v)) }
pub vector -> SpannedValue = "[" __ v:(value)* __ "]"
{ SpannedValue::Vector(v) }
pub set -> SpannedValue = "#{" __ v:(value)* __ "}"
{ SpannedValue::Set(BTreeSet::from_iter(v)) }
pair -> (ValueAndSpan, ValueAndSpan) =
k:(value) v:(value) {
(k, v)
}
pub map -> SpannedValue = "{" __ v:(pair)* __ "}"
{ SpannedValue::Map(BTreeMap::from_iter(v)) }
// It's important that float comes before integer or the parser assumes that
// floats are integers and fails to parse
pub value -> ValueAndSpan =
__ start:#position v:(nil / nan / infinity / boolean / number / inst / uuid / text / keyword / symbol / list / vector / map / set) end:#position __ {
ValueAndSpan {
inner: v,
span: Span::new(start, end)
}
}
/ #expected("value")
atom -> ValueAndSpan
= v:value {? if v.is_atom() { Ok(v) } else { Err("expected atom") } }
// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
whitespace = #quiet<[ \r\n\t,]>
comment = #quiet<";" [^\r\n]* [\r\n]?>
__ = (whitespace / comment)*
// Transaction entity parser starts here.
pub op -> OpType
= ":db/add" { OpType::Add }
/ ":db/retract" { OpType::Retract }
raw_keyword -> Keyword =
keyword_prefix
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name) {
match ns {
Some(ns) => Keyword::namespaced(ns, n),
None => Keyword::plain(n),
}
}
/ #expected("keyword")
raw_forward_keyword -> Keyword
= v:raw_keyword {? if v.is_forward() { Ok(v) } else { Err("expected :forward or :forward/keyword") } }
raw_backward_keyword -> Keyword
= v:raw_keyword {? if v.is_backward() { Ok(v) } else { Err("expected :_backword or :backward/_keyword") } }
raw_namespaced_keyword -> Keyword
= keyword_prefix ns:$(symbol_namespace) namespace_separator n:$(symbol_name) { Keyword::namespaced(ns, n) }
/ #expected("namespaced keyword")
raw_forward_namespaced_keyword -> Keyword
= v:raw_namespaced_keyword {? if v.is_forward() { Ok(v) } else { Err("expected namespaced :forward/keyword") } }
raw_backward_namespaced_keyword -> Keyword
= v:raw_namespaced_keyword {? if v.is_backward() { Ok(v) } else { Err("expected namespaced :backward/_keyword") } }
entid -> Entid
= v:( raw_basedinteger / raw_hexinteger / raw_octalinteger / raw_integer ) { Entid::Entid(v) }
/ v:raw_namespaced_keyword { Entid::Ident(v) }
/ #expected("entid")
forward_entid -> Entid
= v:( raw_basedinteger / raw_hexinteger / raw_octalinteger / raw_integer ) { Entid::Entid(v) }
/ v:raw_forward_namespaced_keyword { Entid::Ident(v) }
/ #expected("forward entid")
backward_entid -> Entid
= v:raw_backward_namespaced_keyword { Entid::Ident(v.to_reversed()) }
/ #expected("backward entid")
lookup_ref -> LookupRef<ValueAndSpan>
= "(" __ "lookup-ref" __ a:(entid) __ v:(value) __ ")" { LookupRef { a: AttributePlace::Entid(a), v } }
/ #expected("lookup-ref")
tx_function -> TxFunction
= "(" __ n:$(symbol_name) __ ")" { TxFunction { op: PlainSymbol::plain(n) } }
entity_place -> EntityPlace<ValueAndSpan>
= v:raw_text { EntityPlace::TempId(TempId::External(v)) }
/ v:entid { EntityPlace::Entid(v) }
/ v:lookup_ref { EntityPlace::LookupRef(v) }
/ v:tx_function { EntityPlace::TxFunction(v) }
value_place_pair -> (Entid, ValuePlace<ValueAndSpan>)
= k:(entid) __ v:(value_place) { (k, v) }
map_notation -> MapNotation<ValueAndSpan>
= "{" __ kvs:(value_place_pair*) __ "}" { kvs.into_iter().collect() }
value_place -> ValuePlace<ValueAndSpan>
= __ v:lookup_ref __ { ValuePlace::LookupRef(v) }
/ __ v:tx_function __ { ValuePlace::TxFunction(v) }
/ __ "[" __ vs:(value_place*) __ "]" __ { ValuePlace::Vector(vs) }
/ __ v:map_notation __ { ValuePlace::MapNotation(v) }
/ __ v:atom __ { ValuePlace::Atom(v) }
pub entity -> Entity<ValueAndSpan>
= __ "[" __ op:(op) __ e:(entity_place) __ a:(forward_entid) __ v:(value_place) __ "]" __ { Entity::AddOrRetract { op, e: e, a: AttributePlace::Entid(a), v: v } }
/ __ "[" __ op:(op) __ e:(value_place) __ a:(backward_entid) __ v:(entity_place) __ "]" __ { Entity::AddOrRetract { op, e: v, a: AttributePlace::Entid(a), v: e } }
/ __ map:map_notation __ { Entity::MapNotation(map) }
/ #expected("entity")
pub entities -> Vec<Entity<ValueAndSpan>>
= __ "[" __ es:(entity*) __ "]" __ { es }
// Query parser starts here.
//
// We expect every rule except the `raw_*` rules to eat whitespace
// (with `__`) at its start and finish. That means that every string
// pattern (say "[") should be bracketed on either side with either a
// whitespace-eating rule or an explicit whitespace eating `__`.
query_function -> query::QueryFunction
= __ n:$(symbol_name) __ {? query::QueryFunction::from_symbol(&PlainSymbol::plain(n)).ok_or("expected query function") }
fn_arg -> query::FnArg
= v:value {? query::FnArg::from_value(&v).ok_or("expected query function argument") }
/ __ "[" args:fn_arg+ "]" __ { query::FnArg::Vector(args) }
find_elem -> query::Element
= __ v:variable __ { query::Element::Variable(v) }
/ __ "(" __ "the" v:variable ")" __ { query::Element::Corresponding(v) }
/ __ "(" __ "pull" var:variable "[" patterns:pull_attribute+ "]" __ ")" __ { query::Element::Pull(query::Pull { var, patterns }) }
/ __ "(" func:query_function args:fn_arg* ")" __ { query::Element::Aggregate(query::Aggregate { func, args }) }
find_spec -> query::FindSpec
= f:find_elem "." __ { query::FindSpec::FindScalar(f) }
/ fs:find_elem+ { query::FindSpec::FindRel(fs) }
/ __ "[" f:find_elem __ "..." __ "]" __ { query::FindSpec::FindColl(f) }
/ __ "[" fs:find_elem+ "]" __ { query::FindSpec::FindTuple(fs) }
pull_attribute -> query::PullAttributeSpec
= __ "*" __ { query::PullAttributeSpec::Wildcard }
/ __ k:raw_forward_namespaced_keyword __ alias:(":as" __ alias:raw_forward_keyword __ { alias })? {
let attribute = query::PullConcreteAttribute::Ident(::std::rc::Rc::new(k));
let alias = alias.map(|alias| ::std::rc::Rc::new(alias));
query::PullAttributeSpec::Attribute(
query::NamedPullAttribute {
attribute,
alias: alias,
})
}
limit -> query::Limit
= __ v:variable __ { query::Limit::Variable(v) }
/ __ n:(raw_octalinteger / raw_hexinteger / raw_basedinteger / raw_integer) __ {?
if n > 0 {
Ok(query::Limit::Fixed(n as u64))
} else {
Err("expected positive integer")
}
}
order -> query::Order
= __ "(" __ "asc" v:variable ")" __ { query::Order(query::Direction::Ascending, v) }
/ __ "(" __ "desc" v:variable ")" __ { query::Order(query::Direction::Descending, v) }
/ v:variable { query::Order(query::Direction::Ascending, v) }
pattern_value_place -> query::PatternValuePlace
= v:value {? query::PatternValuePlace::from_value(&v).ok_or("expected pattern_value_place") }
pattern_non_value_place -> query::PatternNonValuePlace
= v:value {? query::PatternNonValuePlace::from_value(&v).ok_or("expected pattern_non_value_place") }
pattern -> query::WhereClause
= __ "["
src:src_var?
e:pattern_non_value_place
a:pattern_non_value_place
v:pattern_value_place?
tx:pattern_non_value_place?
"]" __
{?
let v = v.unwrap_or(query::PatternValuePlace::Placeholder);
let tx = tx.unwrap_or(query::PatternNonValuePlace::Placeholder);
// Pattern::new takes care of reversal of reversed
// attributes: [?x :foo/_bar ?y] turns into
// [?y :foo/bar ?x].
//
// This is a bit messy: the inner conversion to a Pattern can
// fail if the input is something like
//
// ```edn
// [?x :foo/_reversed 23.4]
// ```
//
// because
//
// ```edn
// [23.4 :foo/reversed ?x]
// ```
//
// is nonsense. That leaves us with a nested optional, which we unwrap here.
query::Pattern::new(src, e, a, v, tx)
.map(query::WhereClause::Pattern)
.ok_or("expected pattern")
}
// TODO: this shouldn't be checked at parse time.
rule_vars -> BTreeSet<query::Variable>
= vs:variable+ {?
let given = vs.len();
let set: BTreeSet<query::Variable> = vs.into_iter().collect();
if given != set.len() {
Err("expected unique variables")
} else {
Ok(set)
}
}
or_pattern_clause -> query::OrWhereClause
= clause:where_clause { query::OrWhereClause::Clause(clause) }
or_and_clause -> query::OrWhereClause
= __ "(" __ "and" clauses:where_clause+ ")" __ { query::OrWhereClause::And(clauses) }
or_where_clause -> query::OrWhereClause
= or_pattern_clause
/ or_and_clause
or_clause -> query::WhereClause
= __ "(" __ "or" clauses:or_where_clause+ ")" __ {
query::WhereClause::OrJoin(query::OrJoin::new(query::UnifyVars::Implicit, clauses))
}
or_join_clause -> query::WhereClause
= __ "(" __ "or-join" __ "[" vars:rule_vars "]" clauses:or_where_clause+ ")" __ {
query::WhereClause::OrJoin(query::OrJoin::new(query::UnifyVars::Explicit(vars), clauses))
}
not_clause -> query::WhereClause
= __ "(" __ "not" clauses:where_clause+ ")" __ {
query::WhereClause::NotJoin(query::NotJoin::new(query::UnifyVars::Implicit, clauses))
}
not_join_clause -> query::WhereClause
= __ "(" __ "not-join" __ "[" vars:rule_vars "]" clauses:where_clause+ ")" __ {
query::WhereClause::NotJoin(query::NotJoin::new(query::UnifyVars::Explicit(vars), clauses))
}
type_annotation -> query::WhereClause
= __ "[" __ "(" __ "type" var:variable __ ty:raw_keyword __ ")" __ "]" __ {
query::WhereClause::TypeAnnotation(
query::TypeAnnotation {
value_type: ty,
variable: var,
})
}
pred -> query::WhereClause
= __ "[" __ "(" func:query_function args:fn_arg* ")" __ "]" __ {
query::WhereClause::Pred(
query::Predicate {
operator: func.0,
args: args,
})
}
pub where_fn -> query::WhereClause
= __ "[" __ "(" func:query_function args:fn_arg* ")" __ binding:binding "]" __ {
query::WhereClause::WhereFn(
query::WhereFn {
operator: func.0,
args: args,
binding,
})
}
where_clause -> query::WhereClause
// Right now we only support patterns and predicates. See #239 for more.
= pattern
/ or_join_clause
/ or_clause
/ not_join_clause
/ not_clause
/ type_annotation
/ pred
/ where_fn
query_part -> query::QueryPart
= __ ":find" fs:find_spec { query::QueryPart::FindSpec(fs) }
/ __ ":in" in_vars:variable+ { query::QueryPart::InVars(in_vars) }
/ __ ":limit" l:limit { query::QueryPart::Limit(l) }
/ __ ":order" os:order+ { query::QueryPart::Order(os) }
/ __ ":where" ws:where_clause+ { query::QueryPart::WhereClauses(ws) }
/ __ ":with" with_vars:variable+ { query::QueryPart::WithVars(with_vars) }
pub query -> query::ParsedFindQuery
= __ "[" qps:query_part+ "]" __ {? query::ParsedFindQuery::from_parts(qps) }
variable -> query::Variable
= v:value {? query::Variable::from_value(&v).ok_or("expected variable") }
src_var -> query::SrcVar
= v:value {? query::SrcVar::from_value(&v).ok_or("expected src_var") }
variable_or_placeholder -> query::VariableOrPlaceholder
= v:variable { query::VariableOrPlaceholder::Variable(v) }
/ __ "_" __ { query::VariableOrPlaceholder::Placeholder }
binding -> query::Binding
= __ "[" __ "[" vs:variable_or_placeholder+ "]" __ "]" __ { query::Binding::BindRel(vs) }
/ __ "[" v:variable "..." __ "]" __ { query::Binding::BindColl(v) }
/ __ "[" vs:variable_or_placeholder+ "]" __ { query::Binding::BindTuple(vs) }
/ v:variable { query::Binding::BindScalar(v) }