Implement parsing of simple :find expressions. (#196) r=nalexander

* Test the mentat_query directory on Travis.

* Export common types from edn.

This allows you to write

  use edn::{PlainSymbol,Keyword};

instead of

  use edn:🔣:{PlainSymbol,Keyword};

* Add an edn::Value::is_keyword predicate.

* Clean up query, preparing for query-parser.

* Make EDN keywords and symbols take Into<String> arguments.

* Implement parsing of simple :find lists.

* Rustfmt query-parser. Split find and query.

* Review comment: values_to_variables now returns a NotAVariableError on failure.

* Review comment: rename gimme to to_parsed_value.

* Review comment: add comments.
This commit is contained in:
Richard Newman 2017-01-25 14:06:19 -08:00 committed by GitHub
parent b77d124152
commit 2592506288
16 changed files with 925 additions and 165 deletions

View file

@ -3,5 +3,6 @@ script:
- cargo build --verbose - cargo build --verbose
- cargo test --verbose - cargo test --verbose
- cargo test --verbose -p edn - cargo test --verbose -p edn
- cargo test --verbose -p mentat_query
- cargo test --verbose -p mentat_query_parser - cargo test --verbose -p mentat_query_parser
- cargo test --verbose -p mentat_tx_parser - cargo test --verbose -p mentat_tx_parser

View file

@ -19,3 +19,6 @@ pub mod types;
pub mod parse { pub mod parse {
include!(concat!(env!("OUT_DIR"), "/edn.rs")); include!(concat!(env!("OUT_DIR"), "/edn.rs"));
} }
pub use self::types::Value;
pub use self::symbols::{Keyword, NamespacedKeyword, PlainSymbol, NamespacedSymbol};

View file

@ -69,10 +69,11 @@ pub struct NamespacedKeyword {
} }
impl PlainSymbol { impl PlainSymbol {
pub fn new(name: &str) -> Self { pub fn new<T>(name: T) -> Self where T: Into<String> {
assert!(!name.is_empty(), "Symbols cannot be unnamed."); let n = name.into();
assert!(!n.is_empty(), "Symbols cannot be unnamed.");
return PlainSymbol(name.to_string()); return PlainSymbol(n);
} }
} }
@ -86,10 +87,11 @@ impl NamespacedSymbol {
} }
impl Keyword { impl Keyword {
pub fn new(name: &str) -> Self { pub fn new<T>(name: T) -> Self where T: Into<String>{
assert!(!name.is_empty(), "Keywords cannot be unnamed."); let n = name.into();
assert!(!n.is_empty(), "Keywords cannot be unnamed.");
return Keyword(name.to_string()); return Keyword(n);
} }
} }

View file

@ -41,6 +41,15 @@ pub enum Value {
use self::Value::*; use self::Value::*;
impl Value {
pub fn is_keyword(&self) -> bool {
match *self {
Keyword(_) => true,
_ => false,
}
}
}
impl PartialOrd for Value { impl PartialOrd for Value {
fn partial_cmp(&self, other: &Value) -> Option<Ordering> { fn partial_cmp(&self, other: &Value) -> Option<Ordering> {
Some(self.cmp(other)) Some(self.cmp(other))

View file

@ -3,6 +3,8 @@ name = "mentat_query_parser"
version = "0.0.1" version = "0.0.1"
[dependencies] [dependencies]
combine = "2.1.1"
[dependencies.edn] [dependencies.edn]
path = "../edn" path = "../edn"

2
query-parser/README.md Normal file
View file

@ -0,0 +1,2 @@
See <https://github.com/mozilla/mentat/wiki/Querying> for a description of
what's going on in this crate, as well as `query` and `query-executor`.

35
query-parser/src/error.rs Normal file
View file

@ -0,0 +1,35 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
extern crate combine;
extern crate edn;
extern crate mentat_query;
use self::mentat_query::{FindSpec, FindQuery};
#[derive(Clone,Debug,Eq,PartialEq)]
pub struct NotAVariableError(pub edn::Value);
#[derive(Clone,Debug,Eq,PartialEq)]
pub enum FindParseError {
Err,
}
#[derive(Clone,Debug,Eq,PartialEq)]
pub enum QueryParseError {
InvalidInput(edn::Value),
EdnParseError(edn::parse::ParseError),
MissingField(edn::Keyword),
FindParseError(FindParseError),
}
pub type FindParseResult = Result<FindSpec, FindParseError>;
pub type QueryParseResult = Result<FindQuery, QueryParseError>;

138
query-parser/src/find.rs Normal file
View file

@ -0,0 +1,138 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
/// ! This module defines the interface and implementation for parsing an EDN
/// ! input into a structured Datalog query.
/// !
/// ! The query types are defined in the `query` crate, because they
/// ! are shared between the parser (EDN -> query), the translator
/// ! (query -> SQL), and the executor (query, SQL -> running code).
/// !
/// ! The query input can be in two forms: a 'flat' human-oriented
/// ! sequence:
/// !
/// ! ```clojure
/// ! [:find ?y :in $ ?x :where [?x :foaf/knows ?y]]
/// ! ```
/// !
/// ! or a more programmatically generable map:
/// !
/// ! ```clojure
/// ! {:find [?y]
/// ! :in [$]
/// ! :where [[?x :foaf/knows ?y]]}
/// ! ```
/// !
/// ! We parse by expanding the array format into four parts, treating them as the four
/// ! parts of the map.
extern crate edn;
extern crate mentat_query;
use std::collections::BTreeMap;
use self::mentat_query::{FindQuery, SrcVar};
use super::error::{QueryParseError, QueryParseResult};
use super::util::{values_to_variables, vec_to_keyword_map};
#[allow(unused_variables)]
fn parse_find_parts(find: &[edn::Value],
ins: Option<&[edn::Value]>,
with: Option<&[edn::Value]>,
wheres: &[edn::Value])
-> QueryParseResult {
// :find must be an array of plain var symbols (?foo), pull expressions, and aggregates.
// For now we only support variables and the annotations necessary to declare which
// flavor of :find we want:
// ?x ?y ?z = FindRel
// [?x ...] = FindColl
// ?x . = FindScalar
// [?x ?y ?z] = FindTuple
//
// :in must be an array of sources ($), rules (%), and vars (?). For now we only support the
// default source. :in can be omitted, in which case the default is equivalent to `:in $`.
// TODO: process `ins`.
let source = SrcVar::DefaultSrc;
// :with is an array of variables. This is simple, so we don't use a parser.
let with_vars = with.map(values_to_variables);
// :wheres is a whole datastructure.
super::parse::find_seq_to_find_spec(find)
.map(|spec| {
FindQuery {
find_spec: spec,
default_source: source,
}
})
.map_err(QueryParseError::FindParseError)
}
fn parse_find_map(map: BTreeMap<edn::Keyword, Vec<edn::Value>>) -> QueryParseResult {
// Eagerly awaiting `const fn`.
let kw_find = edn::Keyword::new("find");
let kw_in = edn::Keyword::new("in");
let kw_with = edn::Keyword::new("with");
let kw_where = edn::Keyword::new("where");
// Oh, if only we had `guard`.
if let Some(find) = map.get(&kw_find) {
if let Some(wheres) = map.get(&kw_where) {
return parse_find_parts(find,
map.get(&kw_in).map(|x| x.as_slice()),
map.get(&kw_with).map(|x| x.as_slice()),
wheres);
} else {
return Err(QueryParseError::MissingField(kw_where));
}
} else {
return Err(QueryParseError::MissingField(kw_find));
}
}
fn parse_find_edn_map(map: BTreeMap<edn::Value, edn::Value>) -> QueryParseResult {
// Every key must be a Keyword. Every value must be a Vec.
let mut m = BTreeMap::new();
if map.is_empty() {
return parse_find_map(m);
}
for (k, v) in map {
if let edn::Value::Keyword(kw) = k {
if let edn::Value::Vector(vec) = v {
m.insert(kw, vec);
continue;
} else {
return Err(QueryParseError::InvalidInput(v));
}
} else {
return Err(QueryParseError::InvalidInput(k));
}
}
parse_find_map(m)
}
pub fn parse_find(expr: edn::Value) -> QueryParseResult {
// No `match` because scoping and use of `expr` in error handling is nuts.
if let edn::Value::Map(m) = expr {
return parse_find_edn_map(m);
}
if let edn::Value::Vector(ref v) = expr {
if let Some(m) = vec_to_keyword_map(v) {
return parse_find_map(m);
}
}
return Err(QueryParseError::InvalidInput(expr));
}

View file

@ -8,17 +8,8 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the // CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License. // specific language governing permissions and limitations under the License.
// This file is just a stub mod error;
pub fn get_name() -> String { mod util;
return String::from("mentat-query-parser"); mod parse;
} pub mod find;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
assert_eq!(String::from("mentat-query-parser"), get_name());
}
}

265
query-parser/src/parse.rs Normal file
View file

@ -0,0 +1,265 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
extern crate combine;
extern crate edn;
extern crate mentat_query;
use self::combine::{eof, many1, parser, satisfy_map, Parser, ParseResult, Stream};
use self::combine::combinator::{Expected, FnParser, choice, try};
use self::edn::Value::PlainSymbol;
use self::mentat_query::{Element, FindSpec, Variable};
use super::error::{FindParseError, FindParseResult};
pub struct FindSp<I>(::std::marker::PhantomData<fn(I) -> I>);
type FindSpParser<O, I> = Expected<FnParser<I, fn(I) -> ParseResult<O, I>>>;
fn fn_parser<O, I>(f: fn(I) -> ParseResult<O, I>, err: &'static str) -> FindSpParser<O, I>
where I: Stream<Item = edn::Value>
{
parser(f).expected(err)
}
/// `satisfy_unwrap!` makes it a little easier to implement a `satisfy_map`
/// body that matches a particular `Value` enum case, otherwise returning `None`.
macro_rules! satisfy_unwrap {
( $cas: path, $var: ident, $body: block ) => {
satisfy_map(|x: edn::Value| if let $cas($var) = x $body else { None })
}
}
impl<I> FindSp<I>
where I: Stream<Item = edn::Value>
{
fn variable() -> FindSpParser<Variable, I> {
fn_parser(FindSp::<I>::variable_, "variable")
}
fn variable_(input: I) -> ParseResult<Variable, I> {
satisfy_map(|x: edn::Value| super::util::value_to_variable(&x)).parse_stream(input)
}
fn period() -> FindSpParser<(), I> {
fn_parser(FindSp::<I>::period_, "period")
}
fn period_(input: I) -> ParseResult<(), I> {
satisfy_map(|x: edn::Value| {
if let PlainSymbol(ref s) = x {
if s.0.as_str() == "." {
return Some(());
}
}
return None;
})
.parse_stream(input)
}
fn ellipsis() -> FindSpParser<(), I> {
fn_parser(FindSp::<I>::ellipsis_, "ellipsis")
}
fn ellipsis_(input: I) -> ParseResult<(), I> {
satisfy_map(|x: edn::Value| {
if let PlainSymbol(ref s) = x {
if s.0.as_str() == "..." {
return Some(());
}
}
return None;
})
.parse_stream(input)
}
fn find_scalar() -> FindSpParser<FindSpec, I> {
fn_parser(FindSp::<I>::find_scalar_, "find_scalar")
}
fn find_scalar_(input: I) -> ParseResult<FindSpec, I> {
(FindSp::variable(), FindSp::period(), eof())
.map(|(var, _, _)| FindSpec::FindScalar(Element::Variable(var)))
.parse_stream(input)
}
fn find_coll() -> FindSpParser<FindSpec, I> {
fn_parser(FindSp::<I>::find_coll_, "find_coll")
}
fn find_coll_(input: I) -> ParseResult<FindSpec, I> {
satisfy_unwrap!(edn::Value::Vector, y, {
let mut p = (FindSp::variable(), FindSp::ellipsis(), eof())
.map(|(var, _, _)| FindSpec::FindColl(Element::Variable(var)));
let r: ParseResult<FindSpec, _> = p.parse_lazy(&y[..]).into();
FindSp::to_parsed_value(r)
})
.parse_stream(input)
}
fn elements() -> FindSpParser<Vec<Element>, I> {
fn_parser(FindSp::<I>::elements_, "elements")
}
fn elements_(input: I) -> ParseResult<Vec<Element>, I> {
(many1::<Vec<Variable>, _>(FindSp::variable()), eof())
.map(|(vars, _)| {
vars.into_iter()
.map(Element::Variable)
.collect()
})
.parse_stream(input)
}
fn find_rel() -> FindSpParser<FindSpec, I> {
fn_parser(FindSp::<I>::find_rel_, "find_rel")
}
fn find_rel_(input: I) -> ParseResult<FindSpec, I> {
FindSp::elements().map(FindSpec::FindRel).parse_stream(input)
}
fn find_tuple() -> FindSpParser<FindSpec, I> {
fn_parser(FindSp::<I>::find_tuple_, "find_tuple")
}
fn find_tuple_(input: I) -> ParseResult<FindSpec, I> {
satisfy_unwrap!(edn::Value::Vector, y, {
let r: ParseResult<FindSpec, _> =
FindSp::elements().map(FindSpec::FindTuple).parse_lazy(&y[..]).into();
FindSp::to_parsed_value(r)
})
.parse_stream(input)
}
fn find() -> FindSpParser<FindSpec, I> {
fn_parser(FindSp::<I>::find_, "find")
}
fn find_(input: I) -> ParseResult<FindSpec, I> {
// Any one of the four specs might apply, so we combine them with `choice`.
// Our parsers consume input, so we need to wrap them in `try` so that they
// operate independently.
choice::<[&mut Parser<Input = I, Output = FindSpec>; 4],
_>([&mut try(FindSp::find_scalar()),
&mut try(FindSp::find_coll()),
&mut try(FindSp::find_tuple()),
&mut try(FindSp::find_rel())])
.parse_stream(input)
}
fn to_parsed_value<T>(r: ParseResult<T, I>) -> Option<T> {
r.ok().map(|x| x.0)
}
}
macro_rules! assert_parses_to {
( $parser: path, $input: expr, $expected: expr ) => {{
let mut par = $parser();
let result = par.parse(&$input[..]);
assert_eq!(result, Ok(($expected, &[][..])));
}}
}
#[test]
fn test_find_sp_variable() {
let sym = edn::PlainSymbol::new("?x");
let input = [edn::Value::PlainSymbol(sym.clone())];
assert_parses_to!(FindSp::variable, input, Variable(sym));
}
#[test]
fn test_find_scalar() {
let sym = edn::PlainSymbol::new("?x");
let period = edn::PlainSymbol::new(".");
let input = [edn::Value::PlainSymbol(sym.clone()), edn::Value::PlainSymbol(period.clone())];
assert_parses_to!(FindSp::find_scalar,
input,
FindSpec::FindScalar(Element::Variable(Variable(sym))));
}
#[test]
fn test_find_coll() {
let sym = edn::PlainSymbol::new("?x");
let period = edn::PlainSymbol::new("...");
let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(sym.clone()),
edn::Value::PlainSymbol(period.clone())])];
assert_parses_to!(FindSp::find_coll,
input,
FindSpec::FindColl(Element::Variable(Variable(sym))));
}
#[test]
fn test_find_rel() {
let vx = edn::PlainSymbol::new("?x");
let vy = edn::PlainSymbol::new("?y");
let input = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())];
assert_parses_to!(FindSp::find_rel,
input,
FindSpec::FindRel(vec![Element::Variable(Variable(vx)),
Element::Variable(Variable(vy))]));
}
#[test]
fn test_find_tuple() {
let vx = edn::PlainSymbol::new("?x");
let vy = edn::PlainSymbol::new("?y");
let input = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()),
edn::Value::PlainSymbol(vy.clone())])];
assert_parses_to!(FindSp::find_tuple,
input,
FindSpec::FindTuple(vec![Element::Variable(Variable(vx)),
Element::Variable(Variable(vy))]));
}
// Parse a sequence of values into one of four find specs.
//
// `:find` must be an array of plain var symbols (?foo), pull expressions, and aggregates.
// For now we only support variables and the annotations necessary to declare which
// flavor of :find we want:
//
//
// `?x ?y ?z ` = FindRel
// `[?x ...] ` = FindColl
// `?x . ` = FindScalar
// `[?x ?y ?z]` = FindTuple
//
pub fn find_seq_to_find_spec(find: &[edn::Value]) -> FindParseResult {
FindSp::find()
.parse(find)
.map(|x| x.0)
.map_err(|_| FindParseError::Err)
}
#[test]
fn test_find_processing() {
let vx = edn::PlainSymbol::new("?x");
let vy = edn::PlainSymbol::new("?y");
let ellipsis = edn::PlainSymbol::new("...");
let period = edn::PlainSymbol::new(".");
let scalar = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(period.clone())];
let tuple = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()),
edn::Value::PlainSymbol(vy.clone())])];
let coll = [edn::Value::Vector(vec![edn::Value::PlainSymbol(vx.clone()),
edn::Value::PlainSymbol(ellipsis.clone())])];
let rel = [edn::Value::PlainSymbol(vx.clone()), edn::Value::PlainSymbol(vy.clone())];
assert_eq!(Ok(FindSpec::FindScalar(Element::Variable(Variable(vx.clone())))),
find_seq_to_find_spec(&scalar));
assert_eq!(Ok(FindSpec::FindTuple(vec![Element::Variable(Variable(vx.clone())),
Element::Variable(Variable(vy.clone()))])),
find_seq_to_find_spec(&tuple));
assert_eq!(Ok(FindSpec::FindColl(Element::Variable(Variable(vx.clone())))),
find_seq_to_find_spec(&coll));
assert_eq!(Ok(FindSpec::FindRel(vec![Element::Variable(Variable(vx.clone())),
Element::Variable(Variable(vy.clone()))])),
find_seq_to_find_spec(&rel));
}

187
query-parser/src/util.rs Normal file
View file

@ -0,0 +1,187 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
extern crate edn;
extern crate mentat_query;
use std::collections::BTreeMap;
use self::edn::Value::PlainSymbol;
use self::mentat_query::Variable;
use super::error::NotAVariableError;
/// If the provided EDN value is a PlainSymbol beginning with '?', return
/// it wrapped in a Variable. If not, return None.
pub fn value_to_variable(v: &edn::Value) -> Option<Variable> {
if let PlainSymbol(ref sym) = *v {
if sym.0.starts_with('?') {
return Some(Variable(sym.clone()));
}
}
return None;
}
/// If the provided slice of EDN values are all variables as
/// defined by `value_to_variable`, return a Vec of Variables.
/// Otherwise, return the unrecognized Value.
pub fn values_to_variables(vals: &[edn::Value]) -> Result<Vec<Variable>, NotAVariableError> {
let mut out: Vec<Variable> = Vec::with_capacity(vals.len());
for v in vals {
if let Some(var) = value_to_variable(v) {
out.push(var);
continue;
}
return Err(NotAVariableError(v.clone()));
}
return Ok(out);
}
#[test]
fn test_values_to_variables() {
// TODO
}
/// Take a slice of EDN values, as would be extracted from an
/// `edn::Value::Vector`, and turn it into a map.
///
/// The slice must consist of subsequences of an initial plain
/// keyword, followed by one or more non-plain-keyword values.
///
/// The plain keywords are used as keys into the resulting map.
/// The values are accumulated into vectors.
///
/// Invalid input causes this function to return `None`.
///
/// TODO: this function can be generalized to take an arbitrary
/// destructuring/break function, yielding a map with a custom
/// key type and splitting in the right places.
pub fn vec_to_keyword_map(vec: &[edn::Value]) -> Option<BTreeMap<edn::Keyword, Vec<edn::Value>>> {
let mut m = BTreeMap::new();
if vec.is_empty() {
return Some(m);
}
if vec.len() == 1 {
return None;
}
// Turn something like
//
// `[:foo 1 2 3 :bar 4 5 6]`
//
// into
//
// `Some((:foo, [1 2 3]))`
fn step(slice: &[edn::Value]) -> Option<(edn::Keyword, Vec<edn::Value>)> {
// [:foo 1 2 3 :bar] is invalid: nothing follows `:bar`.
if slice.len() < 2 {
return None;
}
// The first item must be a keyword.
if let edn::Value::Keyword(ref k) = slice[0] {
// The second can't be: [:foo :bar 1 2 3] is invalid.
if slice[1].is_keyword() {
return None;
}
// Accumulate items until we reach the next keyword.
let mut acc = Vec::new();
for v in &slice[1..] {
if v.is_keyword() {
break;
}
acc.push(v.clone());
}
return Some((k.clone(), acc));
}
None
}
let mut bits = vec;
while !bits.is_empty() {
match step(bits) {
Some((k, v)) => {
bits = &bits[(v.len() + 1)..];
// Duplicate keys aren't allowed.
if m.contains_key(&k) {
return None;
}
m.insert(k, v);
},
None => return None,
}
}
return Some(m);
}
#[test]
fn test_vec_to_keyword_map() {
let foo = edn::symbols::Keyword("foo".to_string());
let bar = edn::symbols::Keyword("bar".to_string());
let baz = edn::symbols::Keyword("baz".to_string());
// [:foo 1 2 3 :bar 4]
let input = vec!(edn::Value::Keyword(foo.clone()),
edn::Value::Integer(1),
edn::Value::Integer(2),
edn::Value::Integer(3),
edn::Value::Keyword(bar.clone()),
edn::Value::Integer(4));
let m = vec_to_keyword_map(&input).unwrap();
assert!(m.contains_key(&foo));
assert!(m.contains_key(&bar));
assert!(!m.contains_key(&baz));
let onetwothree = vec!(edn::Value::Integer(1),
edn::Value::Integer(2),
edn::Value::Integer(3));
let four = vec!(edn::Value::Integer(4));
assert_eq!(m.get(&foo).unwrap(), &onetwothree);
assert_eq!(m.get(&bar).unwrap(), &four);
// Trailing keywords aren't allowed.
assert_eq!(None,
vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()))));
assert_eq!(None,
vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()),
edn::Value::Integer(2),
edn::Value::Keyword(bar.clone()))));
// Duplicate keywords aren't allowed.
assert_eq!(None,
vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()),
edn::Value::Integer(2),
edn::Value::Keyword(foo.clone()),
edn::Value::Integer(1))));
// Starting with anything but a keyword isn't allowed.
assert_eq!(None,
vec_to_keyword_map(&vec!(edn::Value::Integer(2),
edn::Value::Keyword(foo.clone()),
edn::Value::Integer(1))));
// Consecutive keywords aren't allowed.
assert_eq!(None,
vec_to_keyword_map(&vec!(edn::Value::Keyword(foo.clone()),
edn::Value::Keyword(bar.clone()),
edn::Value::Integer(1))));
// Empty lists return an empty map.
assert_eq!(BTreeMap::new(), vec_to_keyword_map(&vec!()).unwrap());
}

View file

@ -0,0 +1,35 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
extern crate mentat_query_parser;
extern crate mentat_query;
extern crate edn;
use mentat_query::FindSpec::*;
use mentat_query::Element;
use mentat_query::Variable;
use edn::PlainSymbol;
///! N.B., parsing a query can be done without reference to a DB.
///! Processing the parsed query into something we can work with
///! for planning involves interrogating the schema and idents in
///! the store.
///! See <https://github.com/mozilla/mentat/wiki/Querying> for more.
#[test]
fn can_parse_trivial_find() {
let find = FindScalar(Element::Variable(Variable(PlainSymbol("?foo".to_string()))));
if let FindScalar(Element::Variable(Variable(PlainSymbol(name)))) = find {
assert_eq!("?foo", name);
} else {
panic!()
}
}

View file

@ -3,5 +3,8 @@ name = "mentat_query"
version = "0.0.1" version = "0.0.1"
[dependencies] [dependencies]
[dependencies.num] # For EDN value usage.
[dependencies.ordered-float]
[dependencies.edn] [dependencies.edn]
path = "../edn" path = "../edn"

View file

@ -1,135 +0,0 @@
// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
///! This module defines some core types that support find expressions: sources,
///! variables, expressions, etc.
///! These are produced as 'fuel' by the query parser, consumed by the query
///! translator and executor.
///!
///! Many of these types are defined as simple structs that are little more than
///! a richer type alias: a variable, for example, is really just a fancy kind
///! of string.
///!
///! At some point in the future, we might consider reducing copying and memory
///! usage by recasting all of these string-holding structs and enums in terms
///! of string references, with those references being slices of some parsed
///! input query string, and valid for the lifetime of that string.
///!
///! For now, for the sake of simplicity, all of these strings are heap-allocated.
///!
///! Furthermore, we might cut out some of the chaff here: each time a 'tagged'
///! type is used within an enum, we have an opportunity to simplify and use the
///! inner type directly in conjunction with matching on the enum. Before diving
///! deeply into this it's worth recognizing that this loss of 'sovereignty' is
///! a tradeoff against well-typed function signatures and other such boundaries.
// TODO: support other kinds of sources.
#[derive(Clone,Debug,Eq,PartialEq)]
pub enum SrcVar {
DefaultSrc,
}
pub enum Constant {} // This is essentially Box. TODO: flesh out.
#[derive(Clone,Debug,Eq,PartialEq)]
pub struct Variable {
pub name: String,
}
pub enum FnArg {
Constant { constant: Constant },
Variable { variable: Variable },
Src { src: SrcVar },
}
pub enum PullPattern {
Constant { constant: Constant },
Variable { variable: Variable },
}
pub struct Pull {
pub src: SrcVar,
pub var: Variable,
pub pattern: PullPattern, // Constant, variable, or plain variable.
}
pub struct Aggregate {
pub fn_name: String,
pub args: Vec<FnArg>,
}
// TODO: look up the idiomatic way to express these kinds of type
// combinations in Rust. It must be common in ASTs. Trait objects
// presumably aren't the answer…
pub enum Element {
Variable { variable: Variable },
Pull { expression: Pull },
Aggregate { expression: Aggregate },
}
/// A definition of the first part of a find query: the
/// `[:find ?foo ?bar…]` bit.
///
/// There are four different kinds of find specs, allowing you to query for
/// a single value, a collection of values from different entities, a single
/// tuple (relation), or a collection of tuples.
///
/// Examples:
///
/// ```rust
/// # use mentat_query::find::{FindSpec, Element, Variable};
///
/// // TODO: this feels clunky.
/// let foo = Variable { name: "foo".to_string() };
/// let bar = Variable { name: "bar".to_string() };
/// let elements = vec![
/// Element::Variable { variable: foo },
/// Element::Variable { variable: bar },
/// ];
/// let rel = FindSpec::FindRel { elements: elements };
///
/// if let FindSpec::FindRel { elements } = rel {
/// assert_eq!(2, elements.len());
/// }
/// ```
///
pub enum FindSpec {
/// Returns an array of arrays.
FindRel { elements: Vec<Element> },
/// Returns an array of scalars, usually homogeneous.
/// This is equivalent to mapping over the results of a `FindRel`,
/// returning the first value of each.
FindColl { element: Element },
/// Returns a single tuple: a heterogeneous array of scalars. Equivalent to
/// taking the first result from a `FindRel`.
FindTuple { elements: Vec<Element> },
/// Returns a single scalar value. Equivalent to taking the first result
/// from a `FindColl`.
FindScalar { element: Element },
}
/// Returns true if the provided `FindSpec` returns at most one result.
pub fn is_unit_limited(spec: &FindSpec) -> bool {
match spec {
&FindSpec::FindScalar { .. } => true,
&FindSpec::FindTuple { .. } => true,
&FindSpec::FindRel { .. } => false,
&FindSpec::FindColl { .. } => false,
}
}
/// Returns true if the provided `FindSpec` cares about distinct results.
pub fn requires_distinct(spec: &FindSpec) -> bool {
return !is_unit_limited(spec);
}

View file

@ -8,4 +8,236 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the // CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License. // specific language governing permissions and limitations under the License.
pub mod find; ///! This module defines some core types that support find expressions: sources,
///! variables, expressions, etc.
///! These are produced as 'fuel' by the query parser, consumed by the query
///! translator and executor.
///!
///! Many of these types are defined as simple structs that are little more than
///! a richer type alias: a variable, for example, is really just a fancy kind
///! of string.
///!
///! At some point in the future, we might consider reducing copying and memory
///! usage by recasting all of these string-holding structs and enums in terms
///! of string references, with those references being slices of some parsed
///! input query string, and valid for the lifetime of that string.
///!
///! For now, for the sake of simplicity, all of these strings are heap-allocated.
///!
///! Furthermore, we might cut out some of the chaff here: each time a 'tagged'
///! type is used within an enum, we have an opportunity to simplify and use the
///! inner type directly in conjunction with matching on the enum. Before diving
///! deeply into this it's worth recognizing that this loss of 'sovereignty' is
///! a tradeoff against well-typed function signatures and other such boundaries.
extern crate edn;
extern crate num;
extern crate ordered_float;
use num::BigInt;
use ordered_float::OrderedFloat;
use edn::{NamespacedKeyword, PlainSymbol};
pub type SrcVarName = String; // Do not include the required syntactic '$'.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Variable(pub PlainSymbol);
#[derive(Clone,Debug,Eq,PartialEq)]
pub enum SrcVar {
DefaultSrc,
NamedSrc(SrcVarName),
}
/// These are the scalar values representable in EDN.
#[derive(Clone,Debug,Eq,PartialEq)]
pub enum NonIntegerConstant {
Boolean(bool),
BigInteger(BigInt),
Float(OrderedFloat<f64>),
Text(String),
}
pub enum FnArg {
Variable(Variable),
SrcVar(SrcVar),
EntidOrInteger(i64),
Ident(NamespacedKeyword),
Constant(NonIntegerConstant),
}
/// e, a, tx can't be values -- no strings, no floats -- and so
/// they can only be variables, entity IDs, ident keywords, or
/// placeholders.
/// This encoding allows us to represent integers that aren't
/// entity IDs. That'll get filtered out in the context of the
/// database.
pub enum PatternNonValuePlace {
Placeholder,
Variable(Variable),
Entid(u64), // Note unsigned. See #190.
Ident(NamespacedKeyword),
}
/// The `v` part of a pattern can be much broader: it can represent
/// integers that aren't entity IDs (particularly negative integers),
/// strings, and all the rest. We group those under `Constant`.
pub enum PatternValuePlace {
Placeholder,
Variable(Variable),
EntidOrInteger(i64),
Ident(NamespacedKeyword),
Constant(NonIntegerConstant),
}
/*
pub enum PullPattern {
Constant(Constant),
Variable(Variable),
}
pub struct Pull {
pub src: SrcVar,
pub var: Variable,
pub pattern: PullPattern, // Constant, variable, or plain variable.
}
*/
/*
pub struct Aggregate {
pub fn_name: String,
pub args: Vec<FnArg>,
}
*/
#[derive(Clone,Debug,Eq,PartialEq)]
pub enum Element {
Variable(Variable),
// Aggregate(Aggregate), // TODO
// Pull(Pull), // TODO
}
/// A definition of the first part of a find query: the
/// `[:find ?foo ?bar…]` bit.
///
/// There are four different kinds of find specs, allowing you to query for
/// a single value, a collection of values from different entities, a single
/// tuple (relation), or a collection of tuples.
///
/// Examples:
///
/// ```rust
/// # extern crate edn;
/// # extern crate mentat_query;
/// # use edn::PlainSymbol;
/// # use mentat_query::{Element, FindSpec, Variable};
///
/// # fn main() {
///
/// let elements = vec![
/// Element::Variable(Variable(PlainSymbol("?foo".to_string()))),
/// Element::Variable(Variable(PlainSymbol("?bar".to_string()))),
/// ];
/// let rel = FindSpec::FindRel(elements);
///
/// if let FindSpec::FindRel(elements) = rel {
/// assert_eq!(2, elements.len());
/// }
///
/// # }
/// ```
///
#[derive(Clone,Debug,Eq,PartialEq)]
pub enum FindSpec {
/// Returns an array of arrays.
FindRel(Vec<Element>),
/// Returns an array of scalars, usually homogeneous.
/// This is equivalent to mapping over the results of a `FindRel`,
/// returning the first value of each.
FindColl(Element),
/// Returns a single tuple: a heterogeneous array of scalars. Equivalent to
/// taking the first result from a `FindRel`.
FindTuple(Vec<Element>),
/// Returns a single scalar value. Equivalent to taking the first result
/// from a `FindColl`.
FindScalar(Element),
}
#[derive(Clone,Debug,Eq,PartialEq)]
#[allow(dead_code)]
pub struct FindQuery {
pub find_spec: FindSpec,
pub default_source: SrcVar,
}
/// Returns true if the provided `FindSpec` returns at most one result.
pub fn is_unit_limited(spec: &FindSpec) -> bool {
match spec {
&FindSpec::FindScalar(..) => true,
&FindSpec::FindTuple(..) => true,
&FindSpec::FindRel(..) => false,
&FindSpec::FindColl(..) => false,
}
}
/// Returns true if the provided `FindSpec` cares about distinct results.
///
/// I use the words "cares about" because find is generally defined in terms of producing distinct
/// results at the Datalog level.
///
/// Two of the find specs (scalar and tuple) produce only a single result. Those don't need to be
/// run with `SELECT DISTINCT`, because we're only consuming a single result. Those queries will be
/// run with `LIMIT 1`.
///
/// Additionally, some projections cannot produce duplicate results: `[:find (max ?x) …]`, for
/// example.
///
/// This function gives us the hook to add that logic when we're ready.
///
/// Beyond this, `DISTINCT` is not always needed. For example, in some kinds of accumulation or
/// sampling projections we might not need to do it at the SQL level because we're consuming into
/// a dupe-eliminating data structure like a Set, or we know that a particular query cannot produce
/// duplicate results.
pub fn requires_distinct(spec: &FindSpec) -> bool {
return !is_unit_limited(spec);
}
// Note that the "implicit blank" rule applies.
// A pattern with a reversed attribute — :foo/_bar — is reversed
// at the point of parsing. These `Pattern` instances only represent
// one direction.
#[allow(dead_code)]
pub struct Pattern {
source: Option<SrcVar>,
entity: PatternNonValuePlace,
attribute: PatternNonValuePlace,
value: PatternValuePlace,
tx: PatternNonValuePlace,
}
#[allow(dead_code)]
pub enum WhereClause {
/*
Not,
NotJoin,
Or,
OrJoin,
Pred,
WhereFn,
RuleExpr,
*/
Pattern,
}
#[allow(dead_code)]
pub struct Query {
find: FindSpec,
with: Vec<Variable>,
in_vars: Vec<Variable>,
in_sources: Vec<SrcVar>,
where_clauses: Vec<WhereClause>,
// TODO: in_rules;
}

View file

@ -21,11 +21,6 @@ pub fn get_name() -> String {
return String::from("mentat"); return String::from("mentat");
} }
// Just an example of using a dependency
pub fn get_parser_name() -> String {
return mentat_query_parser::get_name();
}
// Will ultimately not return the sqlite connection directly // Will ultimately not return the sqlite connection directly
pub fn get_connection() -> Connection { pub fn get_connection() -> Connection {
return Connection::open_in_memory().unwrap(); return Connection::open_in_memory().unwrap();
@ -40,9 +35,4 @@ mod tests {
fn can_import_edn() { fn can_import_edn() {
assert_eq!("foo", Keyword::new("foo").0); assert_eq!("foo", Keyword::new("foo").0);
} }
#[test]
fn can_import_parser() {
assert_eq!(String::from("mentat-query-parser"), get_parser_name());
}
} }