Part 4: Parse queries with rust-peg.

There's an unfortunate conflation here between implementing the query
parser in `rust-peg` and moving some validation that now happens at
parse time to happen later.  The result is that we introduce
`ParsedFindQuery` as a less-processed `FindQuery`, and that we only
use string errors (which is all `rust-peg` supports) instead of the
structured errors in query-parser's errors module.  The next commit
will address this, on the road to removing the `query-parser` module
entirely.
This commit is contained in:
Nick Alexander 2018-05-28 15:27:19 -07:00
parent a8073056f2
commit 09f1d633b5
4 changed files with 372 additions and 1239 deletions

View file

@ -1,3 +1,4 @@
/* -*- comment-start: "//"; -*- */
/* vim: set filetype=rust.rustpeg */
// Copyright 2016 Mozilla
@ -24,6 +25,8 @@ use ordered_float::OrderedFloat;
use uuid::Uuid;
use entities::*;
use query;
use query::FromValue;
use symbols::*;
use types::{SpannedValue, Span, ValueAndSpan};
@ -155,12 +158,14 @@ pub symbol -> SpannedValue =
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(plain_symbol_name)
{ SpannedValue::from_symbol(ns, n) }
/ #expected("symbol")
pub keyword -> SpannedValue =
keyword_prefix
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name)
{ SpannedValue::from_keyword(ns, n) }
/ #expected("keyword")
pub list -> SpannedValue = "(" __ v:(value)* __ ")"
{ SpannedValue::List(LinkedList::from_iter(v)) }
@ -188,6 +193,7 @@ pub value -> ValueAndSpan =
span: Span::new(start, end)
}
}
/ #expected("value")
atom -> ValueAndSpan
= v:value {? if v.is_atom() { Ok(v) } else { Err("expected atom") } }
@ -199,10 +205,29 @@ comment = #quiet<";" [^\r\n]* [\r\n]?>
__ = (whitespace / comment)*
// Transaction entity parser starts here.
pub op -> OpType
= ":db/add" { OpType::Add }
/ ":db/retract" { OpType::Retract }
raw_keyword -> Keyword =
keyword_prefix
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
n:$(symbol_name) {
match ns {
Some(ns) => Keyword::namespaced(ns, n),
None => Keyword::plain(n),
}
}
/ #expected("keyword")
raw_forward_keyword -> Keyword
= v:raw_keyword {? if v.is_forward() { Ok(v) } else { Err("expected :forward or :forward/keyword") } }
raw_backward_keyword -> Keyword
= v:raw_keyword {? if v.is_backward() { Ok(v) } else { Err("expected :_backword or :backward/_keyword") } }
raw_namespaced_keyword -> Keyword
= keyword_prefix ns:$(symbol_namespace) namespace_separator n:$(symbol_name) { Keyword::namespaced(ns, n) }
/ #expected("namespaced keyword")
@ -216,16 +241,20 @@ raw_backward_namespaced_keyword -> Keyword
entid -> Entid
= v:( raw_basedinteger / raw_hexinteger / raw_octalinteger / raw_integer ) { Entid::Entid(v) }
/ v:raw_namespaced_keyword { Entid::Ident(v) }
/ #expected("entid")
forward_entid -> Entid
= v:( raw_basedinteger / raw_hexinteger / raw_octalinteger / raw_integer ) { Entid::Entid(v) }
/ v:raw_forward_namespaced_keyword { Entid::Ident(v) }
/ #expected("forward entid")
backward_entid -> Entid
= v:raw_backward_namespaced_keyword { Entid::Ident(v.to_reversed()) }
/ #expected("backward entid")
lookup_ref -> LookupRef<ValueAndSpan>
= "(" __ "lookup-ref" __ a:(entid) __ v:(value) __ ")" { LookupRef { a: AttributePlace::Entid(a), v } }
/ #expected("lookup-ref")
tx_function -> TxFunction
= "(" __ n:$(symbol_name) __ ")" { TxFunction { op: PlainSymbol::plain(n) } }
@ -253,6 +282,210 @@ pub entity -> Entity<ValueAndSpan>
= __ "[" __ op:(op) __ e:(entity_place) __ a:(forward_entid) __ v:(value_place) __ "]" __ { Entity::AddOrRetract { op, e: e, a: AttributePlace::Entid(a), v: v } }
/ __ "[" __ op:(op) __ e:(value_place) __ a:(backward_entid) __ v:(entity_place) __ "]" __ { Entity::AddOrRetract { op, e: v, a: AttributePlace::Entid(a), v: e } }
/ __ map:map_notation __ { Entity::MapNotation(map) }
/ #expected("entity")
pub entities -> Vec<Entity<ValueAndSpan>>
= __ "[" __ es:(entity*) __ "]" __ { es }
// Query parser starts here.
//
// We expect every rule except the `raw_*` rules to eat whitespace
// (with `__`) at its start and finish. That means that every string
// pattern (say "[") should be bracketed on either side with either a
// whitespace-eating rule or an explicit whitespace eating `__`.
query_function -> query::QueryFunction
= __ n:$(symbol_name) __ {? query::QueryFunction::from_symbol(&PlainSymbol::plain(n)).ok_or("expected query function") }
fn_arg -> query::FnArg
= v:value {? query::FnArg::from_value(&v).ok_or("expected query function argument") }
/ __ "[" args:fn_arg+ "]" __ { query::FnArg::Vector(args) }
find_elem -> query::Element
= __ v:variable __ { query::Element::Variable(v) }
/ __ "(" __ "the" v:variable ")" __ { query::Element::Corresponding(v) }
/ __ "(" __ "pull" var:variable "[" patterns:pull_attribute+ "]" __ ")" __ { query::Element::Pull(query::Pull { var, patterns }) }
/ __ "(" func:query_function args:fn_arg* ")" __ { query::Element::Aggregate(query::Aggregate { func, args }) }
find_spec -> query::FindSpec
= f:find_elem "." __ { query::FindSpec::FindScalar(f) }
/ fs:find_elem+ { query::FindSpec::FindRel(fs) }
/ __ "[" f:find_elem __ "..." __ "]" __ { query::FindSpec::FindColl(f) }
/ __ "[" fs:find_elem+ "]" __ { query::FindSpec::FindTuple(fs) }
pull_attribute -> query::PullAttributeSpec
= __ "*" __ { query::PullAttributeSpec::Wildcard }
/ __ k:raw_forward_namespaced_keyword __ alias:(":as" __ alias:raw_forward_keyword __ { alias })? {
let attribute = query::PullConcreteAttribute::Ident(::std::rc::Rc::new(k));
let alias = alias.map(|alias| ::std::rc::Rc::new(alias));
query::PullAttributeSpec::Attribute(
query::NamedPullAttribute {
attribute,
alias: alias,
})
}
limit -> query::Limit
= __ v:variable __ { query::Limit::Variable(v) }
/ __ n:(raw_octalinteger / raw_hexinteger / raw_basedinteger / raw_integer) __ {?
if n > 0 {
Ok(query::Limit::Fixed(n as u64))
} else {
Err("expected positive integer")
}
}
order -> query::Order
= __ "(" __ "asc" v:variable ")" __ { query::Order(query::Direction::Ascending, v) }
/ __ "(" __ "desc" v:variable ")" __ { query::Order(query::Direction::Descending, v) }
/ v:variable { query::Order(query::Direction::Ascending, v) }
pattern_value_place -> query::PatternValuePlace
= v:value {? query::PatternValuePlace::from_value(&v).ok_or("expected pattern_value_place") }
pattern_non_value_place -> query::PatternNonValuePlace
= v:value {? query::PatternNonValuePlace::from_value(&v).ok_or("expected pattern_non_value_place") }
pattern -> query::WhereClause
= __ "["
src:src_var?
e:pattern_non_value_place
a:pattern_non_value_place
v:pattern_value_place?
tx:pattern_non_value_place?
"]" __
{?
let v = v.unwrap_or(query::PatternValuePlace::Placeholder);
let tx = tx.unwrap_or(query::PatternNonValuePlace::Placeholder);
// Pattern::new takes care of reversal of reversed
// attributes: [?x :foo/_bar ?y] turns into
// [?y :foo/bar ?x].
//
// This is a bit messy: the inner conversion to a Pattern can
// fail if the input is something like
//
// ```edn
// [?x :foo/_reversed 23.4]
// ```
//
// because
//
// ```edn
// [23.4 :foo/reversed ?x]
// ```
//
// is nonsense. That leaves us with a nested optional, which we unwrap here.
query::Pattern::new(src, e, a, v, tx)
.map(query::WhereClause::Pattern)
.ok_or("expected pattern")
}
// TODO: this shouldn't be checked at parse time.
rule_vars -> BTreeSet<query::Variable>
= vs:variable+ {?
let given = vs.len();
let set: BTreeSet<query::Variable> = vs.into_iter().collect();
if given != set.len() {
Err("expected unique variables")
} else {
Ok(set)
}
}
or_pattern_clause -> query::OrWhereClause
= clause:where_clause { query::OrWhereClause::Clause(clause) }
or_and_clause -> query::OrWhereClause
= __ "(" __ "and" clauses:where_clause+ ")" __ { query::OrWhereClause::And(clauses) }
or_where_clause -> query::OrWhereClause
= or_pattern_clause
/ or_and_clause
or_clause -> query::WhereClause
= __ "(" __ "or" clauses:or_where_clause+ ")" __ {
query::WhereClause::OrJoin(query::OrJoin::new(query::UnifyVars::Implicit, clauses))
}
or_join_clause -> query::WhereClause
= __ "(" __ "or-join" __ "[" vars:rule_vars "]" clauses:or_where_clause+ ")" __ {
query::WhereClause::OrJoin(query::OrJoin::new(query::UnifyVars::Explicit(vars), clauses))
}
not_clause -> query::WhereClause
= __ "(" __ "not" clauses:where_clause+ ")" __ {
query::WhereClause::NotJoin(query::NotJoin::new(query::UnifyVars::Implicit, clauses))
}
not_join_clause -> query::WhereClause
= __ "(" __ "not-join" __ "[" vars:rule_vars "]" clauses:where_clause+ ")" __ {
query::WhereClause::NotJoin(query::NotJoin::new(query::UnifyVars::Explicit(vars), clauses))
}
type_annotation -> query::WhereClause
= __ "[" __ "(" __ "type" var:variable __ ty:raw_keyword __ ")" __ "]" __ {
query::WhereClause::TypeAnnotation(
query::TypeAnnotation {
value_type: ty,
variable: var,
})
}
pred -> query::WhereClause
= __ "[" __ "(" func:query_function args:fn_arg* ")" __ "]" __ {
query::WhereClause::Pred(
query::Predicate {
operator: func.0,
args: args,
})
}
pub where_fn -> query::WhereClause
= __ "[" __ "(" func:query_function args:fn_arg* ")" __ binding:binding "]" __ {
query::WhereClause::WhereFn(
query::WhereFn {
operator: func.0,
args: args,
binding,
})
}
where_clause -> query::WhereClause
// Right now we only support patterns and predicates. See #239 for more.
= pattern
/ or_join_clause
/ or_clause
/ not_join_clause
/ not_clause
/ type_annotation
/ pred
/ where_fn
query_part -> query::QueryPart
= __ ":find" fs:find_spec { query::QueryPart::FindSpec(fs) }
/ __ ":in" in_vars:variable+ { query::QueryPart::InVars(in_vars) }
/ __ ":limit" l:limit { query::QueryPart::Limit(l) }
/ __ ":order" os:order+ { query::QueryPart::Order(os) }
/ __ ":where" ws:where_clause+ { query::QueryPart::WhereClauses(ws) }
/ __ ":with" with_vars:variable+ { query::QueryPart::WithVars(with_vars) }
pub query -> query::ParsedFindQuery
= __ "[" qps:query_part+ "]" __ {? query::ParsedFindQuery::from_parts(qps) }
variable -> query::Variable
= v:value {? query::Variable::from_value(&v).ok_or("expected variable") }
src_var -> query::SrcVar
= v:value {? query::SrcVar::from_value(&v).ok_or("expected src_var") }
variable_or_placeholder -> query::VariableOrPlaceholder
= v:variable { query::VariableOrPlaceholder::Variable(v) }
/ __ "_" __ { query::VariableOrPlaceholder::Placeholder }
binding -> query::Binding
= __ "[" __ "[" vs:variable_or_placeholder+ "]" __ "]" __ { query::Binding::BindRel(vs) }
/ __ "[" v:variable "..." __ "]" __ { query::Binding::BindColl(v) }
/ __ "[" vs:variable_or_placeholder+ "]" __ { query::Binding::BindTuple(vs) }
/ v:variable { query::Binding::BindScalar(v) }

View file

@ -949,6 +949,15 @@ pub struct NotJoin {
pub clauses: Vec<WhereClause>,
}
impl NotJoin {
pub fn new(unify_vars: UnifyVars, clauses: Vec<WhereClause>) -> NotJoin {
NotJoin {
unify_vars: unify_vars,
clauses: clauses,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct TypeAnnotation {
pub value_type: Keyword,
@ -981,6 +990,131 @@ pub struct FindQuery {
// TODO: in_rules;
}
#[allow(dead_code)]
#[derive(Debug, Eq, PartialEq)]
pub struct ParsedFindQuery {
pub find_spec: FindSpec,
pub default_source: SrcVar,
pub with: Vec<Variable>,
pub in_vars: Vec<Variable>,
pub in_sources: BTreeSet<SrcVar>,
pub limit: Limit,
pub where_clauses: Vec<WhereClause>,
pub order: Option<Vec<Order>>,
// TODO: in_rules;
}
pub(crate) enum QueryPart {
FindSpec(FindSpec),
WithVars(Vec<Variable>),
InVars(Vec<Variable>),
Limit(Limit),
WhereClauses(Vec<WhereClause>),
Order(Vec<Order>),
}
impl ParsedFindQuery {
pub(crate) fn from_parts(parts: Vec<QueryPart>) -> std::result::Result<ParsedFindQuery, &'static str> {
let mut find_spec: Option<FindSpec> = None;
let mut with: Option<Vec<Variable>> = None;
let mut in_vars: Option<Vec<Variable>> = None;
let mut limit: Option<Limit> = None;
let mut where_clauses: Option<Vec<WhereClause>> = None;
let mut order: Option<Vec<Order>> = None;
for part in parts.into_iter() {
match part {
QueryPart::FindSpec(x) => {
if find_spec.is_some() {
return Err("find query has repeated :find");
}
find_spec = Some(x)
},
QueryPart::WithVars(x) => {
if with.is_some() {
return Err("find query has repeated :with");
}
with = Some(x)
},
QueryPart::InVars(x) => {
if in_vars.is_some() {
return Err("find query has repeated :in");
}
in_vars = Some(x)
},
QueryPart::Limit(x) => {
if limit.is_some() {
return Err("find query has repeated :limit");
}
limit = Some(x)
},
QueryPart::WhereClauses(x) => {
if where_clauses.is_some() {
return Err("find query has repeated :where");
}
where_clauses = Some(x)
},
QueryPart::Order(x) => {
if order.is_some() {
return Err("find query has repeated :order");
}
order = Some(x)
},
}
}
Ok(ParsedFindQuery {
find_spec: find_spec.ok_or("expected :find")?,
default_source: SrcVar::DefaultSrc,
with: with.unwrap_or(vec![]),
in_vars: in_vars.unwrap_or(vec![]),
in_sources: BTreeSet::default(),
limit: limit.unwrap_or(Limit::None),
where_clauses: where_clauses.ok_or("expected :where")?,
order,
})
}
pub fn into_find_query(self: ParsedFindQuery) -> Result<FindQuery, &'static str> {
let in_vars = {
let len = self.in_vars.len();
let set: BTreeSet<Variable> = self.in_vars.into_iter().collect();
if len != set.len() {
return Err("find query has repeated :in variable".into());
}
set
};
let with = {
let len = self.with.len();
let set: BTreeSet<Variable> = self.with.into_iter().collect();
if len != set.len() {
return Err("find query has repeated :with variable".into());
}
set
};
// Make sure that if we have `:limit ?x`, `?x` appears in `:in`.
if let Limit::Variable(ref v) = self.limit {
if !in_vars.contains(v) {
return Err("limit var not present in :in");
}
}
Ok(FindQuery {
find_spec: self.find_spec,
default_source: self.default_source,
with,
in_vars,
in_sources: self.in_sources,
limit: self.limit,
where_clauses: self.where_clauses,
order: self.order,
})
}
}
impl FindQuery {
pub fn simple(spec: FindSpec, where_clauses: Vec<WhereClause>) -> FindQuery {
FindQuery {

View file

@ -22,7 +22,6 @@ extern crate edn;
extern crate mentat_parser_utils;
mod errors;
mod parse;
pub use errors::{
Error,
@ -31,6 +30,8 @@ pub use errors::{
ResultExt,
};
pub use parse::{
parse_find_string,
};
pub fn parse_find_string(string: &str) -> Result<edn::query::FindQuery> {
edn::parse::query(string)
.map_err(|e| e.into())
.and_then(|parsed| parsed.into_find_query().map_err(|e| e.into()))
}

File diff suppressed because it is too large Load diff