From 2592506288b4cfe2720ecb6e2f8ded99f8481871 Mon Sep 17 00:00:00 2001 From: Richard Newman Date: Wed, 25 Jan 2017 14:06:19 -0800 Subject: [PATCH] Implement parsing of simple :find expressions. (#196) r=nalexander * Test the mentat_query directory on Travis. * Export common types from edn. This allows you to write use edn::{PlainSymbol,Keyword}; instead of use edn::symbols::{PlainSymbol,Keyword}; * Add an edn::Value::is_keyword predicate. * Clean up query, preparing for query-parser. * Make EDN keywords and symbols take Into arguments. * Implement parsing of simple :find lists. * Rustfmt query-parser. Split find and query. * Review comment: values_to_variables now returns a NotAVariableError on failure. * Review comment: rename gimme to to_parsed_value. * Review comment: add comments. --- .travis.yml | 1 + edn/src/lib.rs | 3 + edn/src/symbols.rs | 14 +- edn/src/types.rs | 9 ++ query-parser/Cargo.toml | 2 + query-parser/README.md | 2 + query-parser/src/error.rs | 35 ++++ query-parser/src/find.rs | 138 ++++++++++++++++ query-parser/src/lib.rs | 17 +- query-parser/src/parse.rs | 265 +++++++++++++++++++++++++++++++ query-parser/src/util.rs | 187 ++++++++++++++++++++++ query-parser/tests/find_tests.rs | 35 ++++ query/Cargo.toml | 3 + query/src/find.rs | 135 ---------------- query/src/lib.rs | 234 ++++++++++++++++++++++++++- src/lib.rs | 10 -- 16 files changed, 925 insertions(+), 165 deletions(-) create mode 100644 query-parser/README.md create mode 100644 query-parser/src/error.rs create mode 100644 query-parser/src/find.rs create mode 100644 query-parser/src/parse.rs create mode 100644 query-parser/src/util.rs create mode 100644 query-parser/tests/find_tests.rs delete mode 100644 query/src/find.rs diff --git a/.travis.yml b/.travis.yml index e2b872d8..4be52570 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,5 +3,6 @@ script: - cargo build --verbose - cargo test --verbose - cargo test --verbose -p edn + - cargo test --verbose -p mentat_query - cargo test --verbose -p mentat_query_parser - cargo test --verbose -p mentat_tx_parser diff --git a/edn/src/lib.rs b/edn/src/lib.rs index 1d835e40..4faf6853 100644 --- a/edn/src/lib.rs +++ b/edn/src/lib.rs @@ -19,3 +19,6 @@ pub mod types; pub mod parse { include!(concat!(env!("OUT_DIR"), "/edn.rs")); } + +pub use self::types::Value; +pub use self::symbols::{Keyword, NamespacedKeyword, PlainSymbol, NamespacedSymbol}; diff --git a/edn/src/symbols.rs b/edn/src/symbols.rs index 3186d0c4..e4395f3b 100644 --- a/edn/src/symbols.rs +++ b/edn/src/symbols.rs @@ -69,10 +69,11 @@ pub struct NamespacedKeyword { } impl PlainSymbol { - pub fn new(name: &str) -> Self { - assert!(!name.is_empty(), "Symbols cannot be unnamed."); + pub fn new(name: T) -> Self where T: Into { + let n = name.into(); + assert!(!n.is_empty(), "Symbols cannot be unnamed."); - return PlainSymbol(name.to_string()); + return PlainSymbol(n); } } @@ -86,10 +87,11 @@ impl NamespacedSymbol { } impl Keyword { - pub fn new(name: &str) -> Self { - assert!(!name.is_empty(), "Keywords cannot be unnamed."); + pub fn new(name: T) -> Self where T: Into{ + let n = name.into(); + assert!(!n.is_empty(), "Keywords cannot be unnamed."); - return Keyword(name.to_string()); + return Keyword(n); } } diff --git a/edn/src/types.rs b/edn/src/types.rs index 7f969d07..8d3e8427 100644 --- a/edn/src/types.rs +++ b/edn/src/types.rs @@ -41,6 +41,15 @@ pub enum Value { use self::Value::*; +impl Value { + pub fn is_keyword(&self) -> bool { + match *self { + Keyword(_) => true, + _ => false, + } + } +} + impl PartialOrd for Value { fn partial_cmp(&self, other: &Value) -> Option { Some(self.cmp(other)) diff --git a/query-parser/Cargo.toml b/query-parser/Cargo.toml index d2ad101c..3202966a 100644 --- a/query-parser/Cargo.toml +++ b/query-parser/Cargo.toml @@ -3,6 +3,8 @@ name = "mentat_query_parser" version = "0.0.1" [dependencies] +combine = "2.1.1" + [dependencies.edn] path = "../edn" diff --git a/query-parser/README.md b/query-parser/README.md new file mode 100644 index 00000000..feb48f62 --- /dev/null +++ b/query-parser/README.md @@ -0,0 +1,2 @@ +See for a description of +what's going on in this crate, as well as `query` and `query-executor`. diff --git a/query-parser/src/error.rs b/query-parser/src/error.rs new file mode 100644 index 00000000..6704e0d4 --- /dev/null +++ b/query-parser/src/error.rs @@ -0,0 +1,35 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate combine; +extern crate edn; +extern crate mentat_query; + +use self::mentat_query::{FindSpec, FindQuery}; + +#[derive(Clone,Debug,Eq,PartialEq)] +pub struct NotAVariableError(pub edn::Value); + +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum FindParseError { + Err, +} + +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum QueryParseError { + InvalidInput(edn::Value), + EdnParseError(edn::parse::ParseError), + MissingField(edn::Keyword), + FindParseError(FindParseError), +} + +pub type FindParseResult = Result; +pub type QueryParseResult = Result; + diff --git a/query-parser/src/find.rs b/query-parser/src/find.rs new file mode 100644 index 00000000..dfaa5474 --- /dev/null +++ b/query-parser/src/find.rs @@ -0,0 +1,138 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +/// ! This module defines the interface and implementation for parsing an EDN +/// ! input into a structured Datalog query. +/// ! +/// ! The query types are defined in the `query` crate, because they +/// ! are shared between the parser (EDN -> query), the translator +/// ! (query -> SQL), and the executor (query, SQL -> running code). +/// ! +/// ! The query input can be in two forms: a 'flat' human-oriented +/// ! sequence: +/// ! +/// ! ```clojure +/// ! [:find ?y :in $ ?x :where [?x :foaf/knows ?y]] +/// ! ``` +/// ! +/// ! or a more programmatically generable map: +/// ! +/// ! ```clojure +/// ! {:find [?y] +/// ! :in [$] +/// ! :where [[?x :foaf/knows ?y]]} +/// ! ``` +/// ! +/// ! We parse by expanding the array format into four parts, treating them as the four +/// ! parts of the map. + +extern crate edn; +extern crate mentat_query; + +use std::collections::BTreeMap; + +use self::mentat_query::{FindQuery, SrcVar}; + +use super::error::{QueryParseError, QueryParseResult}; +use super::util::{values_to_variables, vec_to_keyword_map}; + +#[allow(unused_variables)] +fn parse_find_parts(find: &[edn::Value], + ins: Option<&[edn::Value]>, + with: Option<&[edn::Value]>, + wheres: &[edn::Value]) + -> QueryParseResult { + // :find must be an array of plain var symbols (?foo), pull expressions, and aggregates. + // For now we only support variables and the annotations necessary to declare which + // flavor of :find we want: + // ?x ?y ?z = FindRel + // [?x ...] = FindColl + // ?x . = FindScalar + // [?x ?y ?z] = FindTuple + // + // :in must be an array of sources ($), rules (%), and vars (?). For now we only support the + // default source. :in can be omitted, in which case the default is equivalent to `:in $`. + // TODO: process `ins`. + let source = SrcVar::DefaultSrc; + + // :with is an array of variables. This is simple, so we don't use a parser. + let with_vars = with.map(values_to_variables); + // :wheres is a whole datastructure. + + super::parse::find_seq_to_find_spec(find) + .map(|spec| { + FindQuery { + find_spec: spec, + default_source: source, + } + }) + .map_err(QueryParseError::FindParseError) + + +} + +fn parse_find_map(map: BTreeMap>) -> QueryParseResult { + // Eagerly awaiting `const fn`. + let kw_find = edn::Keyword::new("find"); + let kw_in = edn::Keyword::new("in"); + let kw_with = edn::Keyword::new("with"); + let kw_where = edn::Keyword::new("where"); + + // Oh, if only we had `guard`. + if let Some(find) = map.get(&kw_find) { + if let Some(wheres) = map.get(&kw_where) { + return parse_find_parts(find, + map.get(&kw_in).map(|x| x.as_slice()), + map.get(&kw_with).map(|x| x.as_slice()), + wheres); + } else { + return Err(QueryParseError::MissingField(kw_where)); + } + } else { + return Err(QueryParseError::MissingField(kw_find)); + } +} + +fn parse_find_edn_map(map: BTreeMap) -> QueryParseResult { + // Every key must be a Keyword. Every value must be a Vec. + let mut m = BTreeMap::new(); + + if map.is_empty() { + return parse_find_map(m); + } + + for (k, v) in map { + if let edn::Value::Keyword(kw) = k { + if let edn::Value::Vector(vec) = v { + m.insert(kw, vec); + continue; + } else { + return Err(QueryParseError::InvalidInput(v)); + } + } else { + return Err(QueryParseError::InvalidInput(k)); + } + } + + parse_find_map(m) +} + +pub fn parse_find(expr: edn::Value) -> QueryParseResult { + // No `match` because scoping and use of `expr` in error handling is nuts. + if let edn::Value::Map(m) = expr { + return parse_find_edn_map(m); + } + if let edn::Value::Vector(ref v) = expr { + if let Some(m) = vec_to_keyword_map(v) { + return parse_find_map(m); + } + } + return Err(QueryParseError::InvalidInput(expr)); +} diff --git a/query-parser/src/lib.rs b/query-parser/src/lib.rs index 4589b6b8..7bd90c78 100644 --- a/query-parser/src/lib.rs +++ b/query-parser/src/lib.rs @@ -8,17 +8,8 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -// This file is just a stub -pub fn get_name() -> String { - return String::from("mentat-query-parser"); -} +mod error; +mod util; +mod parse; +pub mod find; -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - assert_eq!(String::from("mentat-query-parser"), get_name()); - } -} diff --git a/query-parser/src/parse.rs b/query-parser/src/parse.rs new file mode 100644 index 00000000..b5823bbf --- /dev/null +++ b/query-parser/src/parse.rs @@ -0,0 +1,265 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate combine; +extern crate edn; +extern crate mentat_query; + +use self::combine::{eof, many1, parser, satisfy_map, Parser, ParseResult, Stream}; +use self::combine::combinator::{Expected, FnParser, choice, try}; +use self::edn::Value::PlainSymbol; +use self::mentat_query::{Element, FindSpec, Variable}; + +use super::error::{FindParseError, FindParseResult}; + +pub struct FindSp(::std::marker::PhantomData I>); + +type FindSpParser = Expected ParseResult>>; + +fn fn_parser(f: fn(I) -> ParseResult, err: &'static str) -> FindSpParser + where I: Stream +{ + parser(f).expected(err) +} + +/// `satisfy_unwrap!` makes it a little easier to implement a `satisfy_map` +/// body that matches a particular `Value` enum case, otherwise returning `None`. +macro_rules! satisfy_unwrap { + ( $cas: path, $var: ident, $body: block ) => { + satisfy_map(|x: edn::Value| if let $cas($var) = x $body else { None }) + } +} + +impl FindSp + where I: Stream +{ + fn variable() -> FindSpParser { + fn_parser(FindSp::::variable_, "variable") + } + + fn variable_(input: I) -> ParseResult { + satisfy_map(|x: edn::Value| super::util::value_to_variable(&x)).parse_stream(input) + } + + fn period() -> FindSpParser<(), I> { + fn_parser(FindSp::::period_, "period") + } + + fn period_(input: I) -> ParseResult<(), I> { + satisfy_map(|x: edn::Value| { + if let PlainSymbol(ref s) = x { + if s.0.as_str() == "." { + return Some(()); + } + } + return None; + }) + .parse_stream(input) + } + + fn ellipsis() -> FindSpParser<(), I> { + fn_parser(FindSp::::ellipsis_, "ellipsis") + } + + fn ellipsis_(input: I) -> ParseResult<(), I> { + satisfy_map(|x: edn::Value| { + if let PlainSymbol(ref s) = x { + if s.0.as_str() == "..." { + return Some(()); + } + } + return None; + }) + .parse_stream(input) + } + + fn find_scalar() -> FindSpParser { + fn_parser(FindSp::::find_scalar_, "find_scalar") + } + + fn find_scalar_(input: I) -> ParseResult { + (FindSp::variable(), FindSp::period(), eof()) + .map(|(var, _, _)| FindSpec::FindScalar(Element::Variable(var))) + .parse_stream(input) + } + + fn find_coll() -> FindSpParser { + fn_parser(FindSp::::find_coll_, "find_coll") + } + + fn find_coll_(input: I) -> ParseResult { + satisfy_unwrap!(edn::Value::Vector, y, { + let mut p = (FindSp::variable(), FindSp::ellipsis(), eof()) + .map(|(var, _, _)| FindSpec::FindColl(Element::Variable(var))); + let r: ParseResult = p.parse_lazy(&y[..]).into(); + FindSp::to_parsed_value(r) + }) + .parse_stream(input) + } + + fn elements() -> FindSpParser, I> { + fn_parser(FindSp::::elements_, "elements") + } + + fn elements_(input: I) -> ParseResult, I> { + (many1::, _>(FindSp::variable()), eof()) + .map(|(vars, _)| { + vars.into_iter() + .map(Element::Variable) + .collect() + }) + .parse_stream(input) + } + + fn find_rel() -> FindSpParser { + fn_parser(FindSp::::find_rel_, "find_rel") + } + + fn find_rel_(input: I) -> ParseResult { + FindSp::elements().map(FindSpec::FindRel).parse_stream(input) + } + + fn find_tuple() -> FindSpParser { + fn_parser(FindSp::::find_tuple_, "find_tuple") + } + + fn find_tuple_(input: I) -> ParseResult { + satisfy_unwrap!(edn::Value::Vector, y, { + let r: ParseResult = + FindSp::elements().map(FindSpec::FindTuple).parse_lazy(&y[..]).into(); + FindSp::to_parsed_value(r) + }) + .parse_stream(input) + } + + fn find() -> FindSpParser { + fn_parser(FindSp::::find_, "find") + } + + fn find_(input: I) -> ParseResult { + // Any one of the four specs might apply, so we combine them with `choice`. + // Our parsers consume input, so we need to wrap them in `try` so that they + // operate independently. + choice::<[&mut Parser; 4], + _>([&mut try(FindSp::find_scalar()), + &mut try(FindSp::find_coll()), + &mut try(FindSp::find_tuple()), + &mut try(FindSp::find_rel())]) + .parse_stream(input) + } + + fn to_parsed_value(r: ParseResult) -> Option { + r.ok().map(|x| x.0) + } +} + +macro_rules! assert_parses_to { + ( $parser: path, $input: expr, $expected: expr ) => {{ + let mut par = $parser(); + let result = par.parse(&$input[..]); + assert_eq!(result, Ok(($expected, &[][..]))); + }} +} + +#[test] +fn test_find_sp_variable() { + let sym = edn::PlainSymbol::new("?x"); + let input = [edn::Value::PlainSymbol(sym.clone())]; + assert_parses_to!(FindSp::variable, input, Variable(sym)); +} + +#[test] +fn test_find_scalar() { + let sym = edn::PlainSymbol::new("?x"); + let period = edn::PlainSymbol::new("."); + let input = [edn::Value::PlainSymbol(sym.clone()), edn::Value::PlainSymbol(period.clone())]; + assert_parses_to!(FindSp::find_scalar, + input, + FindSpec::FindScalar(Element::Variable(Variable(sym)))); +} + +#[test] +fn test_find_coll() { + let sym = edn::PlainSymbol::new("?x"); + let period = edn::PlainSymbol::new("..."); + let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone()), + edn::Value::PlainSymbol(period.clone())])]; + assert_parses_to!(FindSp::find_coll, + input, + FindSpec::FindColl(Element::Variable(Variable(sym)))); +} + +#[test] +fn test_find_rel() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let input = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())]; + assert_parses_to!(FindSp::find_rel, + input, + FindSpec::FindRel(vec![Element::Variable(Variable(vx)), + Element::Variable(Variable(vy))])); +} + +#[test] +fn test_find_tuple() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), + edn::Value::PlainSymbol(vy.clone())])]; + assert_parses_to!(FindSp::find_tuple, + input, + FindSpec::FindTuple(vec![Element::Variable(Variable(vx)), + Element::Variable(Variable(vy))])); +} + +// Parse a sequence of values into one of four find specs. +// +// `:find` must be an array of plain var symbols (?foo), pull expressions, and aggregates. +// For now we only support variables and the annotations necessary to declare which +// flavor of :find we want: +// +// +// `?x ?y ?z ` = FindRel +// `[?x ...] ` = FindColl +// `?x . ` = FindScalar +// `[?x ?y ?z]` = FindTuple +// +pub fn find_seq_to_find_spec(find: &[edn::Value]) -> FindParseResult { + FindSp::find() + .parse(find) + .map(|x| x.0) + .map_err(|_| FindParseError::Err) +} + +#[test] +fn test_find_processing() { + let vx = edn::PlainSymbol::new("?x"); + let vy = edn::PlainSymbol::new("?y"); + let ellipsis = edn::PlainSymbol::new("..."); + let period = edn::PlainSymbol::new("."); + + let scalar = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(period.clone())]; + let tuple = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), + edn::Value::PlainSymbol(vy.clone())])]; + let coll = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()), + edn::Value::PlainSymbol(ellipsis.clone())])]; + let rel = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())]; + + assert_eq!(Ok(FindSpec::FindScalar(Element::Variable(Variable(vx.clone())))), + find_seq_to_find_spec(&scalar)); + assert_eq!(Ok(FindSpec::FindTuple(vec![Element::Variable(Variable(vx.clone())), + Element::Variable(Variable(vy.clone()))])), + find_seq_to_find_spec(&tuple)); + assert_eq!(Ok(FindSpec::FindColl(Element::Variable(Variable(vx.clone())))), + find_seq_to_find_spec(&coll)); + assert_eq!(Ok(FindSpec::FindRel(vec![Element::Variable(Variable(vx.clone())), + Element::Variable(Variable(vy.clone()))])), + find_seq_to_find_spec(&rel)); +} diff --git a/query-parser/src/util.rs b/query-parser/src/util.rs new file mode 100644 index 00000000..e3f24657 --- /dev/null +++ b/query-parser/src/util.rs @@ -0,0 +1,187 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate edn; +extern crate mentat_query; + +use std::collections::BTreeMap; + +use self::edn::Value::PlainSymbol; +use self::mentat_query::Variable; +use super::error::NotAVariableError; + +/// If the provided EDN value is a PlainSymbol beginning with '?', return +/// it wrapped in a Variable. If not, return None. +pub fn value_to_variable(v: &edn::Value) -> Option { + if let PlainSymbol(ref sym) = *v { + if sym.0.starts_with('?') { + return Some(Variable(sym.clone())); + } + } + return None; +} + +/// If the provided slice of EDN values are all variables as +/// defined by `value_to_variable`, return a Vec of Variables. +/// Otherwise, return the unrecognized Value. +pub fn values_to_variables(vals: &[edn::Value]) -> Result, NotAVariableError> { + let mut out: Vec = Vec::with_capacity(vals.len()); + for v in vals { + if let Some(var) = value_to_variable(v) { + out.push(var); + continue; + } + return Err(NotAVariableError(v.clone())); + } + return Ok(out); +} + +#[test] +fn test_values_to_variables() { + // TODO +} + +/// Take a slice of EDN values, as would be extracted from an +/// `edn::Value::Vector`, and turn it into a map. +/// +/// The slice must consist of subsequences of an initial plain +/// keyword, followed by one or more non-plain-keyword values. +/// +/// The plain keywords are used as keys into the resulting map. +/// The values are accumulated into vectors. +/// +/// Invalid input causes this function to return `None`. +/// +/// TODO: this function can be generalized to take an arbitrary +/// destructuring/break function, yielding a map with a custom +/// key type and splitting in the right places. +pub fn vec_to_keyword_map(vec: &[edn::Value]) -> Option>> { + let mut m = BTreeMap::new(); + + if vec.is_empty() { + return Some(m); + } + + if vec.len() == 1 { + return None; + } + + // Turn something like + // + // `[:foo 1 2 3 :bar 4 5 6]` + // + // into + // + // `Some((:foo, [1 2 3]))` + fn step(slice: &[edn::Value]) -> Option<(edn::Keyword, Vec)> { + // [:foo 1 2 3 :bar] is invalid: nothing follows `:bar`. + if slice.len() < 2 { + return None; + } + + // The first item must be a keyword. + if let edn::Value::Keyword(ref k) = slice[0] { + + // The second can't be: [:foo :bar 1 2 3] is invalid. + if slice[1].is_keyword() { + return None; + } + + // Accumulate items until we reach the next keyword. + let mut acc = Vec::new(); + for v in &slice[1..] { + if v.is_keyword() { + break; + } + acc.push(v.clone()); + } + return Some((k.clone(), acc)); + } + + None + } + + let mut bits = vec; + while !bits.is_empty() { + match step(bits) { + Some((k, v)) => { + bits = &bits[(v.len() + 1)..]; + + // Duplicate keys aren't allowed. + if m.contains_key(&k) { + return None; + } + m.insert(k, v); + }, + None => return None, + } + } + return Some(m); +} + +#[test] +fn test_vec_to_keyword_map() { + let foo = edn::symbols::Keyword("foo".to_string()); + let bar = edn::symbols::Keyword("bar".to_string()); + let baz = edn::symbols::Keyword("baz".to_string()); + + // [:foo 1 2 3 :bar 4] + let input = vec!(edn::Value::Keyword(foo.clone()), + edn::Value::Integer(1), + edn::Value::Integer(2), + edn::Value::Integer(3), + edn::Value::Keyword(bar.clone()), + edn::Value::Integer(4)); + + let m = vec_to_keyword_map(&input).unwrap(); + + assert!(m.contains_key(&foo)); + assert!(m.contains_key(&bar)); + assert!(!m.contains_key(&baz)); + + let onetwothree = vec!(edn::Value::Integer(1), + edn::Value::Integer(2), + edn::Value::Integer(3)); + let four = vec!(edn::Value::Integer(4)); + + assert_eq!(m.get(&foo).unwrap(), &onetwothree); + assert_eq!(m.get(&bar).unwrap(), &four); + + // Trailing keywords aren't allowed. + assert_eq!(None, + vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone())))); + assert_eq!(None, + vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()), + edn::Value::Integer(2), + edn::Value::Keyword(bar.clone())))); + + // Duplicate keywords aren't allowed. + assert_eq!(None, + vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()), + edn::Value::Integer(2), + edn::Value::Keyword(foo.clone()), + edn::Value::Integer(1)))); + + // Starting with anything but a keyword isn't allowed. + assert_eq!(None, + vec_to_keyword_map(&vec!(edn::Value::Integer(2), + edn::Value::Keyword(foo.clone()), + edn::Value::Integer(1)))); + + // Consecutive keywords aren't allowed. + assert_eq!(None, + vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()), + edn::Value::Keyword(bar.clone()), + edn::Value::Integer(1)))); + + // Empty lists return an empty map. + assert_eq!(BTreeMap::new(), vec_to_keyword_map(&vec!()).unwrap()); +} + diff --git a/query-parser/tests/find_tests.rs b/query-parser/tests/find_tests.rs new file mode 100644 index 00000000..cdf2d0da --- /dev/null +++ b/query-parser/tests/find_tests.rs @@ -0,0 +1,35 @@ +// Copyright 2016 Mozilla +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use +// this file except in compliance with the License. You may obtain a copy of the +// License at http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +extern crate mentat_query_parser; +extern crate mentat_query; +extern crate edn; + +use mentat_query::FindSpec::*; +use mentat_query::Element; +use mentat_query::Variable; +use edn::PlainSymbol; + +///! N.B., parsing a query can be done without reference to a DB. +///! Processing the parsed query into something we can work with +///! for planning involves interrogating the schema and idents in +///! the store. +///! See for more. + +#[test] +fn can_parse_trivial_find() { + let find = FindScalar(Element::Variable(Variable(PlainSymbol("?foo".to_string())))); + + if let FindScalar(Element::Variable(Variable(PlainSymbol(name)))) = find { + assert_eq!("?foo", name); + } else { + panic!() + } +} diff --git a/query/Cargo.toml b/query/Cargo.toml index a58371a9..b3379943 100644 --- a/query/Cargo.toml +++ b/query/Cargo.toml @@ -3,5 +3,8 @@ name = "mentat_query" version = "0.0.1" [dependencies] +[dependencies.num] # For EDN value usage. +[dependencies.ordered-float] + [dependencies.edn] path = "../edn" diff --git a/query/src/find.rs b/query/src/find.rs deleted file mode 100644 index 8f5f8cd8..00000000 --- a/query/src/find.rs +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2016 Mozilla -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use -// this file except in compliance with the License. You may obtain a copy of the -// License at http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -///! This module defines some core types that support find expressions: sources, -///! variables, expressions, etc. -///! These are produced as 'fuel' by the query parser, consumed by the query -///! translator and executor. -///! -///! Many of these types are defined as simple structs that are little more than -///! a richer type alias: a variable, for example, is really just a fancy kind -///! of string. -///! -///! At some point in the future, we might consider reducing copying and memory -///! usage by recasting all of these string-holding structs and enums in terms -///! of string references, with those references being slices of some parsed -///! input query string, and valid for the lifetime of that string. -///! -///! For now, for the sake of simplicity, all of these strings are heap-allocated. -///! -///! Furthermore, we might cut out some of the chaff here: each time a 'tagged' -///! type is used within an enum, we have an opportunity to simplify and use the -///! inner type directly in conjunction with matching on the enum. Before diving -///! deeply into this it's worth recognizing that this loss of 'sovereignty' is -///! a tradeoff against well-typed function signatures and other such boundaries. - - -// TODO: support other kinds of sources. -#[derive(Clone,Debug,Eq,PartialEq)] -pub enum SrcVar { - DefaultSrc, -} - -pub enum Constant {} // This is essentially Box. TODO: flesh out. - -#[derive(Clone,Debug,Eq,PartialEq)] -pub struct Variable { - pub name: String, -} - -pub enum FnArg { - Constant { constant: Constant }, - Variable { variable: Variable }, - Src { src: SrcVar }, -} - -pub enum PullPattern { - Constant { constant: Constant }, - Variable { variable: Variable }, -} - -pub struct Pull { - pub src: SrcVar, - pub var: Variable, - pub pattern: PullPattern, // Constant, variable, or plain variable. -} - -pub struct Aggregate { - pub fn_name: String, - pub args: Vec, -} - -// TODO: look up the idiomatic way to express these kinds of type -// combinations in Rust. It must be common in ASTs. Trait objects -// presumably aren't the answer… -pub enum Element { - Variable { variable: Variable }, - Pull { expression: Pull }, - Aggregate { expression: Aggregate }, -} - -/// A definition of the first part of a find query: the -/// `[:find ?foo ?bar…]` bit. -/// -/// There are four different kinds of find specs, allowing you to query for -/// a single value, a collection of values from different entities, a single -/// tuple (relation), or a collection of tuples. -/// -/// Examples: -/// -/// ```rust -/// # use mentat_query::find::{FindSpec, Element, Variable}; -/// -/// // TODO: this feels clunky. -/// let foo = Variable { name: "foo".to_string() }; -/// let bar = Variable { name: "bar".to_string() }; -/// let elements = vec![ -/// Element::Variable { variable: foo }, -/// Element::Variable { variable: bar }, -/// ]; -/// let rel = FindSpec::FindRel { elements: elements }; -/// -/// if let FindSpec::FindRel { elements } = rel { -/// assert_eq!(2, elements.len()); -/// } -/// ``` -/// -pub enum FindSpec { - /// Returns an array of arrays. - FindRel { elements: Vec }, - - /// Returns an array of scalars, usually homogeneous. - /// This is equivalent to mapping over the results of a `FindRel`, - /// returning the first value of each. - FindColl { element: Element }, - - /// Returns a single tuple: a heterogeneous array of scalars. Equivalent to - /// taking the first result from a `FindRel`. - FindTuple { elements: Vec }, - - /// Returns a single scalar value. Equivalent to taking the first result - /// from a `FindColl`. - FindScalar { element: Element }, -} - -/// Returns true if the provided `FindSpec` returns at most one result. -pub fn is_unit_limited(spec: &FindSpec) -> bool { - match spec { - &FindSpec::FindScalar { .. } => true, - &FindSpec::FindTuple { .. } => true, - &FindSpec::FindRel { .. } => false, - &FindSpec::FindColl { .. } => false, - } -} - -/// Returns true if the provided `FindSpec` cares about distinct results. -pub fn requires_distinct(spec: &FindSpec) -> bool { - return !is_unit_limited(spec); -} diff --git a/query/src/lib.rs b/query/src/lib.rs index d0406f59..c1b3cf14 100644 --- a/query/src/lib.rs +++ b/query/src/lib.rs @@ -8,4 +8,236 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -pub mod find; +///! This module defines some core types that support find expressions: sources, +///! variables, expressions, etc. +///! These are produced as 'fuel' by the query parser, consumed by the query +///! translator and executor. +///! +///! Many of these types are defined as simple structs that are little more than +///! a richer type alias: a variable, for example, is really just a fancy kind +///! of string. +///! +///! At some point in the future, we might consider reducing copying and memory +///! usage by recasting all of these string-holding structs and enums in terms +///! of string references, with those references being slices of some parsed +///! input query string, and valid for the lifetime of that string. +///! +///! For now, for the sake of simplicity, all of these strings are heap-allocated. +///! +///! Furthermore, we might cut out some of the chaff here: each time a 'tagged' +///! type is used within an enum, we have an opportunity to simplify and use the +///! inner type directly in conjunction with matching on the enum. Before diving +///! deeply into this it's worth recognizing that this loss of 'sovereignty' is +///! a tradeoff against well-typed function signatures and other such boundaries. + +extern crate edn; +extern crate num; +extern crate ordered_float; + +use num::BigInt; +use ordered_float::OrderedFloat; +use edn::{NamespacedKeyword, PlainSymbol}; + +pub type SrcVarName = String; // Do not include the required syntactic '$'. + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Variable(pub PlainSymbol); + +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum SrcVar { + DefaultSrc, + NamedSrc(SrcVarName), +} + +/// These are the scalar values representable in EDN. +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum NonIntegerConstant { + Boolean(bool), + BigInteger(BigInt), + Float(OrderedFloat), + Text(String), +} + +pub enum FnArg { + Variable(Variable), + SrcVar(SrcVar), + EntidOrInteger(i64), + Ident(NamespacedKeyword), + Constant(NonIntegerConstant), +} + +/// e, a, tx can't be values -- no strings, no floats -- and so +/// they can only be variables, entity IDs, ident keywords, or +/// placeholders. +/// This encoding allows us to represent integers that aren't +/// entity IDs. That'll get filtered out in the context of the +/// database. +pub enum PatternNonValuePlace { + Placeholder, + Variable(Variable), + Entid(u64), // Note unsigned. See #190. + Ident(NamespacedKeyword), +} + +/// The `v` part of a pattern can be much broader: it can represent +/// integers that aren't entity IDs (particularly negative integers), +/// strings, and all the rest. We group those under `Constant`. +pub enum PatternValuePlace { + Placeholder, + Variable(Variable), + EntidOrInteger(i64), + Ident(NamespacedKeyword), + Constant(NonIntegerConstant), +} + +/* +pub enum PullPattern { + Constant(Constant), + Variable(Variable), +} + +pub struct Pull { + pub src: SrcVar, + pub var: Variable, + pub pattern: PullPattern, // Constant, variable, or plain variable. +} +*/ + +/* +pub struct Aggregate { + pub fn_name: String, + pub args: Vec, +} +*/ + +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum Element { + Variable(Variable), + // Aggregate(Aggregate), // TODO + // Pull(Pull), // TODO +} + +/// A definition of the first part of a find query: the +/// `[:find ?foo ?bar…]` bit. +/// +/// There are four different kinds of find specs, allowing you to query for +/// a single value, a collection of values from different entities, a single +/// tuple (relation), or a collection of tuples. +/// +/// Examples: +/// +/// ```rust +/// # extern crate edn; +/// # extern crate mentat_query; +/// # use edn::PlainSymbol; +/// # use mentat_query::{Element, FindSpec, Variable}; +/// +/// # fn main() { +/// +/// let elements = vec![ +/// Element::Variable(Variable(PlainSymbol("?foo".to_string()))), +/// Element::Variable(Variable(PlainSymbol("?bar".to_string()))), +/// ]; +/// let rel = FindSpec::FindRel(elements); +/// +/// if let FindSpec::FindRel(elements) = rel { +/// assert_eq!(2, elements.len()); +/// } +/// +/// # } +/// ``` +/// +#[derive(Clone,Debug,Eq,PartialEq)] +pub enum FindSpec { + /// Returns an array of arrays. + FindRel(Vec), + + /// Returns an array of scalars, usually homogeneous. + /// This is equivalent to mapping over the results of a `FindRel`, + /// returning the first value of each. + FindColl(Element), + + /// Returns a single tuple: a heterogeneous array of scalars. Equivalent to + /// taking the first result from a `FindRel`. + FindTuple(Vec), + + /// Returns a single scalar value. Equivalent to taking the first result + /// from a `FindColl`. + FindScalar(Element), +} + +#[derive(Clone,Debug,Eq,PartialEq)] +#[allow(dead_code)] +pub struct FindQuery { + pub find_spec: FindSpec, + pub default_source: SrcVar, +} + +/// Returns true if the provided `FindSpec` returns at most one result. +pub fn is_unit_limited(spec: &FindSpec) -> bool { + match spec { + &FindSpec::FindScalar(..) => true, + &FindSpec::FindTuple(..) => true, + &FindSpec::FindRel(..) => false, + &FindSpec::FindColl(..) => false, + } +} + +/// Returns true if the provided `FindSpec` cares about distinct results. +/// +/// I use the words "cares about" because find is generally defined in terms of producing distinct +/// results at the Datalog level. +/// +/// Two of the find specs (scalar and tuple) produce only a single result. Those don't need to be +/// run with `SELECT DISTINCT`, because we're only consuming a single result. Those queries will be +/// run with `LIMIT 1`. +/// +/// Additionally, some projections cannot produce duplicate results: `[:find (max ?x) …]`, for +/// example. +/// +/// This function gives us the hook to add that logic when we're ready. +/// +/// Beyond this, `DISTINCT` is not always needed. For example, in some kinds of accumulation or +/// sampling projections we might not need to do it at the SQL level because we're consuming into +/// a dupe-eliminating data structure like a Set, or we know that a particular query cannot produce +/// duplicate results. +pub fn requires_distinct(spec: &FindSpec) -> bool { + return !is_unit_limited(spec); +} + +// Note that the "implicit blank" rule applies. +// A pattern with a reversed attribute — :foo/_bar — is reversed +// at the point of parsing. These `Pattern` instances only represent +// one direction. +#[allow(dead_code)] +pub struct Pattern { + source: Option, + entity: PatternNonValuePlace, + attribute: PatternNonValuePlace, + value: PatternValuePlace, + tx: PatternNonValuePlace, +} + +#[allow(dead_code)] +pub enum WhereClause { + /* + Not, + NotJoin, + Or, + OrJoin, + Pred, + WhereFn, + RuleExpr, + */ + Pattern, +} + +#[allow(dead_code)] +pub struct Query { + find: FindSpec, + with: Vec, + in_vars: Vec, + in_sources: Vec, + where_clauses: Vec, + // TODO: in_rules; +} diff --git a/src/lib.rs b/src/lib.rs index 2f07abbb..aeb98506 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,11 +21,6 @@ pub fn get_name() -> String { return String::from("mentat"); } -// Just an example of using a dependency -pub fn get_parser_name() -> String { - return mentat_query_parser::get_name(); -} - // Will ultimately not return the sqlite connection directly pub fn get_connection() -> Connection { return Connection::open_in_memory().unwrap(); @@ -40,9 +35,4 @@ mod tests { fn can_import_edn() { assert_eq!("foo", Keyword::new("foo").0); } - - #[test] - fn can_import_parser() { - assert_eq!(String::from("mentat-query-parser"), get_parser_name()); - } }