// Copyright 2016 Mozilla // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use // this file except in compliance with the License. You may obtain a copy of the // License at http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. #[macro_use] extern crate mentat_core; extern crate core_traits; extern crate edn; extern crate mentat_query_algebrizer; extern crate mentat_sql; extern crate sql_traits; use std::boxed::Box; use core_traits::{Entid, TypedValue, ValueType}; use mentat_core::SQLTypeAffinity; use edn::query::{Direction, Limit, Variable}; use mentat_query_algebrizer::{ Column, OrderBy, QualifiedAlias, QueryValue, SourceAlias, TableAlias, VariableColumn, }; use sql_traits::errors::{BuildQueryResult, SQLError}; use mentat_sql::{QueryBuilder, QueryFragment, SQLQuery, SQLiteQueryBuilder}; //--------------------------------------------------------- // A Mentat-focused representation of a SQL query. /// One of the things that can appear in a projection or a constraint. Note that we use /// `TypedValue` here; it's not pure SQL, but it avoids us having to concern ourselves at this /// point with the translation between a `TypedValue` and the storage-layer representation. /// /// Eventually we might allow different translations by providing a different `QueryBuilder` /// implementation for each storage backend. Passing `TypedValue`s here allows for that. pub enum ColumnOrExpression { Column(QualifiedAlias), ExistingColumn(Name), Entid(Entid), // Because it's so common. Integer(i32), // We use these for type codes etc. Long(i64), Value(TypedValue), // Some aggregates (`min`, `max`, `avg`) can be over 0 rows, and therefore can be `NULL`; that // needs special treatment. NullableAggregate(Box, ValueType), // Track the return type. Expression(Box, ValueType), // Track the return type. } pub enum Expression { Unary { sql_op: &'static str, arg: ColumnOrExpression, }, } /// `QueryValue` and `ColumnOrExpression` are almost identical… merge somehow? impl From for ColumnOrExpression { fn from(v: QueryValue) -> Self { match v { QueryValue::Column(c) => ColumnOrExpression::Column(c), QueryValue::Entid(e) => ColumnOrExpression::Entid(e), QueryValue::PrimitiveLong(v) => ColumnOrExpression::Long(v), QueryValue::TypedValue(v) => ColumnOrExpression::Value(v), } } } pub type Name = String; pub struct ProjectedColumn(pub ColumnOrExpression, pub Name); pub enum Projection { Columns(Vec), Star, One, } #[derive(Debug, PartialEq, Eq)] pub enum GroupBy { ProjectedColumn(Name), QueryColumn(QualifiedAlias), // TODO: non-projected expressions, etc. } impl QueryFragment for GroupBy { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { match self { GroupBy::ProjectedColumn(ref name) => out.push_identifier(name.as_str()), GroupBy::QueryColumn(ref qa) => qualified_alias_push_sql(out, qa), } } } #[derive(Copy, Clone)] pub struct Op(pub &'static str); // TODO: we can do better than this! pub enum Constraint { Infix { op: Op, left: ColumnOrExpression, right: ColumnOrExpression, }, Or { constraints: Vec, }, And { constraints: Vec, }, In { left: ColumnOrExpression, list: Vec, }, IsNull { value: ColumnOrExpression, }, IsNotNull { value: ColumnOrExpression, }, NotExists { subquery: TableOrSubquery, }, TypeCheck { value: ColumnOrExpression, affinity: SQLTypeAffinity, }, } impl Constraint { pub fn not_equal(left: ColumnOrExpression, right: ColumnOrExpression) -> Constraint { Constraint::Infix { op: Op("<>"), // ANSI SQL for future-proofing! left, right, } } pub fn equal(left: ColumnOrExpression, right: ColumnOrExpression) -> Constraint { Constraint::Infix { op: Op("="), left, right, } } pub fn fulltext_match(left: ColumnOrExpression, right: ColumnOrExpression) -> Constraint { Constraint::Infix { op: Op("MATCH"), // SQLite specific! left, right, } } } #[allow(dead_code)] enum JoinOp { Inner, } // Short-hand for a list of tables all inner-joined. pub struct TableList(pub Vec); impl TableList { fn is_empty(&self) -> bool { self.0.is_empty() } } pub struct Join { left: TableOrSubquery, op: JoinOp, right: TableOrSubquery, // TODO: constraints (ON, USING). } #[allow(dead_code)] pub enum TableOrSubquery { Table(SourceAlias), Union(Vec, TableAlias), Subquery(Box), Values(Values, TableAlias), } pub enum Values { /// Like "VALUES (0, 1), (2, 3), ...". /// The vector must be of a length that is a multiple of the given size. Unnamed(usize, Vec), /// Like "SELECT 0 AS x, SELECT 0 AS y WHERE 0 UNION ALL VALUES (0, 1), (2, 3), ...". /// The vector of values must be of a length that is a multiple of the length /// of the vector of names. Named(Vec, Vec), } pub enum FromClause { TableList(TableList), // Short-hand for a pile of inner joins. Join(Join), Nothing, } pub struct SelectQuery { pub distinct: bool, pub projection: Projection, pub from: FromClause, pub constraints: Vec, pub group_by: Vec, pub order: Vec, pub limit: Limit, } fn push_variable_column(qb: &mut dyn QueryBuilder, vc: &VariableColumn) -> BuildQueryResult { match vc { VariableColumn::Variable(ref v) => qb.push_identifier(v.as_str()), VariableColumn::VariableTypeTag(ref v) => { qb.push_identifier(format!("{}_value_type_tag", v.name()).as_str()) } } } fn push_column(qb: &mut dyn QueryBuilder, col: &Column) -> BuildQueryResult { match col { Column::Fixed(ref d) => { qb.push_sql(d.as_str()); Ok(()) } Column::Fulltext(ref d) => { qb.push_sql(d.as_str()); Ok(()) } Column::Variable(ref vc) => push_variable_column(qb, vc), Column::Transactions(ref d) => { qb.push_sql(d.as_str()); Ok(()) } } } //--------------------------------------------------------- // Turn that representation into SQL. impl QueryFragment for ColumnOrExpression { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { use self::ColumnOrExpression::*; match self { Column(ref qa) => qualified_alias_push_sql(out, qa), ExistingColumn(ref alias) => out.push_identifier(alias.as_str()), Entid(entid) => { out.push_sql(entid.to_string().as_str()); Ok(()) } Integer(integer) => { out.push_sql(integer.to_string().as_str()); Ok(()) } Long(long) => { out.push_sql(long.to_string().as_str()); Ok(()) } Value(ref v) => out.push_typed_value(v), NullableAggregate(ref e, _) | &Expression(ref e, _) => e.push_sql(out), } } } impl QueryFragment for Expression { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { match self { Expression::Unary { ref sql_op, ref arg, } => { out.push_sql(sql_op); // No need to escape built-ins. out.push_sql("("); arg.push_sql(out)?; out.push_sql(")"); Ok(()) } } } } impl QueryFragment for Projection { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { use self::Projection::*; match self { One => out.push_sql("1"), Star => out.push_sql("*"), Columns(ref cols) => { let &ProjectedColumn(ref col, ref alias) = &cols[0]; col.push_sql(out)?; out.push_sql(" AS "); out.push_identifier(alias.as_str())?; for &ProjectedColumn(ref col, ref alias) in &cols[1..] { out.push_sql(", "); col.push_sql(out)?; out.push_sql(" AS "); out.push_identifier(alias.as_str())?; } } }; Ok(()) } } impl QueryFragment for Op { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { // No escaping needed. out.push_sql(self.0); Ok(()) } } impl QueryFragment for Constraint { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { use self::Constraint::*; match self { Infix { ref op, ref left, ref right, } => { left.push_sql(out)?; out.push_sql(" "); op.push_sql(out)?; out.push_sql(" "); right.push_sql(out) } IsNull { ref value } => { value.push_sql(out)?; out.push_sql(" IS NULL"); Ok(()) } IsNotNull { ref value } => { value.push_sql(out)?; out.push_sql(" IS NOT NULL"); Ok(()) } And { ref constraints } => { // An empty intersection is true. if constraints.is_empty() { out.push_sql("1"); return Ok(()); } out.push_sql("("); interpose!(constraint, constraints, { constraint.push_sql(out)? }, { out.push_sql(" AND ") }); out.push_sql(")"); Ok(()) } Or { ref constraints } => { // An empty alternation is false. if constraints.is_empty() { out.push_sql("0"); return Ok(()); } out.push_sql("("); interpose!(constraint, constraints, { constraint.push_sql(out)? }, { out.push_sql(" OR ") }); out.push_sql(")"); Ok(()) } In { ref left, ref list } => { left.push_sql(out)?; out.push_sql(" IN ("); interpose!(item, list, { item.push_sql(out)? }, { out.push_sql(", ") }); out.push_sql(")"); Ok(()) } NotExists { ref subquery } => { out.push_sql("NOT EXISTS "); subquery.push_sql(out) } TypeCheck { ref value, ref affinity, } => { out.push_sql("typeof("); value.push_sql(out)?; out.push_sql(") = "); out.push_sql(match *affinity { SQLTypeAffinity::Null => "'null'", SQLTypeAffinity::Integer => "'integer'", SQLTypeAffinity::Real => "'real'", SQLTypeAffinity::Text => "'text'", SQLTypeAffinity::Blob => "'blob'", }); Ok(()) } } } } impl QueryFragment for JoinOp { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { out.push_sql(" JOIN "); Ok(()) } } // We don't own QualifiedAlias or QueryFragment, so we can't implement the trait. fn qualified_alias_push_sql(out: &mut dyn QueryBuilder, qa: &QualifiedAlias) -> BuildQueryResult { out.push_identifier(qa.0.as_str())?; out.push_sql("."); push_column(out, &qa.1) } // We don't own SourceAlias or QueryFragment, so we can't implement the trait. fn source_alias_push_sql(out: &mut dyn QueryBuilder, sa: &SourceAlias) -> BuildQueryResult { let &SourceAlias(ref table, ref alias) = sa; out.push_identifier(table.name())?; out.push_sql(" AS "); out.push_identifier(alias.as_str()) } impl QueryFragment for TableList { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { if self.0.is_empty() { return Ok(()); } interpose!(t, self.0, { t.push_sql(out)? }, { out.push_sql(", ") }); Ok(()) } } impl QueryFragment for Join { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { self.left.push_sql(out)?; self.op.push_sql(out)?; self.right.push_sql(out) } } impl QueryFragment for TableOrSubquery { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { use self::TableOrSubquery::*; match *self { Table(ref sa) => source_alias_push_sql(out, sa), Union(ref subqueries, ref table_alias) => { out.push_sql("("); interpose!(subquery, subqueries, { subquery.push_sql(out)? }, { out.push_sql(" UNION ") }); out.push_sql(") AS "); out.push_identifier(table_alias.as_str()) } Subquery(ref subquery) => { out.push_sql("("); subquery.push_sql(out)?; out.push_sql(")"); Ok(()) } Values(ref values, ref table_alias) => { // XXX: does this work for Values::Unnamed? out.push_sql("("); values.push_sql(out)?; out.push_sql(") AS "); out.push_identifier(table_alias.as_str()) } } } } impl QueryFragment for Values { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { // There are at least 3 ways to name the columns of a VALUES table: // 1) the columns are named "", ":1", ":2", ... -- but this is undocumented. See // http://stackoverflow.com/a/40921724. // 2) A CTE ("WITH" statement) can declare the shape of the table, like "WITH // table_name(column_name, ...) AS (VALUES ...)". // 3) We can "UNION ALL" a dummy "SELECT" statement in place. // // We don't want to use an undocumented SQLite quirk, and we're a little concerned that some // SQL systems will not optimize WITH statements well. It's also convenient to have an in // place table to query, so for now we implement option 3. if let Values::Named(ref names, _) = *self { out.push_sql("SELECT "); interpose!( alias, names, { out.push_sql("0 AS "); out.push_identifier(alias.as_str())? }, { out.push_sql(", ") } ); out.push_sql(" WHERE 0 UNION ALL "); } let values = match *self { Values::Named(ref names, ref values) => values.chunks(names.len()), Values::Unnamed(ref size, ref values) => values.chunks(*size), }; out.push_sql("VALUES "); interpose_iter!( outer, values, { out.push_sql("("); interpose!(inner, outer, { out.push_typed_value(inner)? }, { out.push_sql(", ") }); out.push_sql(")"); }, { out.push_sql(", ") } ); Ok(()) } } impl QueryFragment for FromClause { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { use self::FromClause::*; match *self { TableList(ref table_list) => { if table_list.is_empty() { Ok(()) } else { out.push_sql(" FROM "); table_list.push_sql(out) } } Join(ref join) => { out.push_sql(" FROM "); join.push_sql(out) } Nothing => Ok(()), } } } /// `var` is something like `?foo99-people`. /// Trim the `?` and escape the rest. Prepend `i` to distinguish from /// the inline value space `v`. fn format_select_var(var: &str) -> String { use std::iter::once; let without_question = var.split_at(1).1; let replaced_iter = without_question .chars() .map(|c| if c.is_ascii_alphanumeric() { c } else { '_' }); // Prefix with `i` (Avoiding this copy is probably not worth the trouble but whatever). once('i').chain(replaced_iter).collect() } impl SelectQuery { fn push_variable_param(&self, var: &Variable, out: &mut dyn QueryBuilder) -> BuildQueryResult { let bind_param = format_select_var(var.as_str()); out.push_bind_param(bind_param.as_str()) } } impl QueryFragment for SelectQuery { fn push_sql(&self, out: &mut dyn QueryBuilder) -> BuildQueryResult { if self.distinct { out.push_sql("SELECT DISTINCT "); } else { out.push_sql("SELECT "); } self.projection.push_sql(out)?; self.from.push_sql(out)?; if !self.constraints.is_empty() { out.push_sql(" WHERE "); interpose!( constraint, self.constraints, { constraint.push_sql(out)? }, { out.push_sql(" AND ") } ); } match &self.group_by { group_by if !group_by.is_empty() => { out.push_sql(" GROUP BY "); interpose!(group, group_by, { group.push_sql(out)? }, { out.push_sql(", ") }); } _ => {} } if !self.order.is_empty() { out.push_sql(" ORDER BY "); interpose!( &OrderBy(ref dir, ref var), self.order, { push_variable_column(out, var)?; match *dir { Direction::Ascending => { out.push_sql(" ASC"); } Direction::Descending => { out.push_sql(" DESC"); } }; }, { out.push_sql(", ") } ); } match self.limit { Limit::None => (), Limit::Fixed(limit) => { // Guaranteed to be non-negative: u64. out.push_sql(" LIMIT "); out.push_sql(limit.to_string().as_str()); } Limit::Variable(ref var) => { // Guess this wasn't bound yet. Produce an argument. out.push_sql(" LIMIT "); self.push_variable_param(var, out)?; } } Ok(()) } } impl SelectQuery { pub fn to_sql_query(&self) -> Result { let mut builder = SQLiteQueryBuilder::new(); self.push_sql(&mut builder).map(|_| builder.finish()) } } #[cfg(test)] mod tests { use super::*; use std::rc::Rc; use mentat_query_algebrizer::{Column, DatomsColumn, DatomsTable, FulltextColumn}; fn build_query(c: &dyn QueryFragment) -> SQLQuery { let mut builder = SQLiteQueryBuilder::new(); c.push_sql(&mut builder) .map(|_| builder.finish()) .expect("to produce a query for the given constraint") } fn build(c: &dyn QueryFragment) -> String { build_query(c).sql } #[test] fn test_in_constraint() { let none = Constraint::In { left: ColumnOrExpression::Column(QualifiedAlias::new( "datoms01".to_string(), Column::Fixed(DatomsColumn::Value), )), list: vec![], }; let one = Constraint::In { left: ColumnOrExpression::Column(QualifiedAlias::new( "datoms01".to_string(), DatomsColumn::Value, )), list: vec![ColumnOrExpression::Entid(123)], }; let three = Constraint::In { left: ColumnOrExpression::Column(QualifiedAlias::new( "datoms01".to_string(), DatomsColumn::Value, )), list: vec![ ColumnOrExpression::Entid(123), ColumnOrExpression::Entid(456), ColumnOrExpression::Entid(789), ], }; assert_eq!("`datoms01`.v IN ()", build(&none)); assert_eq!("`datoms01`.v IN (123)", build(&one)); assert_eq!("`datoms01`.v IN (123, 456, 789)", build(&three)); } #[test] fn test_and_constraint() { let c = Constraint::And { constraints: vec![Constraint::And { constraints: vec![ Constraint::Infix { op: Op("="), left: ColumnOrExpression::Entid(123), right: ColumnOrExpression::Entid(456), }, Constraint::Infix { op: Op("="), left: ColumnOrExpression::Entid(789), right: ColumnOrExpression::Entid(246), }, ], }], }; // Two sets of parens: the outermost AND only has one child, // but still contributes parens. assert_eq!("((123 = 456 AND 789 = 246))", build(&c)); } #[test] fn test_unnamed_values() { let build = |len, values| build(&Values::Unnamed(len, values)); assert_eq!(build(1, vec![TypedValue::Long(1)]), "VALUES (1)"); assert_eq!( build(2, vec![TypedValue::Boolean(false), TypedValue::Long(1)]), "VALUES (0, 1)" ); assert_eq!( build( 2, vec![ TypedValue::Boolean(false), TypedValue::Long(1), TypedValue::Boolean(true), TypedValue::Long(2) ] ), "VALUES (0, 1), (1, 2)" ); } #[test] fn test_named_values() { let build = |names: Vec<_>, values| { build(&Values::Named( names.into_iter().map(Variable::from_valid_name).collect(), values, )) }; assert_eq!( build(vec!["?a"], vec![TypedValue::Long(1)]), "SELECT 0 AS `?a` WHERE 0 UNION ALL VALUES (1)" ); assert_eq!( build( vec!["?a", "?b"], vec![TypedValue::Boolean(false), TypedValue::Long(1)] ), "SELECT 0 AS `?a`, 0 AS `?b` WHERE 0 UNION ALL VALUES (0, 1)" ); assert_eq!( build( vec!["?a", "?b"], vec![ TypedValue::Boolean(false), TypedValue::Long(1), TypedValue::Boolean(true), TypedValue::Long(2) ] ), "SELECT 0 AS `?a`, 0 AS `?b` WHERE 0 UNION ALL VALUES (0, 1), (1, 2)" ); } #[test] fn test_matches_constraint() { let c = Constraint::Infix { op: Op("MATCHES"), left: ColumnOrExpression::Column(QualifiedAlias( "fulltext01".to_string(), Column::Fulltext(FulltextColumn::Text), )), right: ColumnOrExpression::Value("needle".into()), }; let q = build_query(&c); assert_eq!("`fulltext01`.text MATCHES $v0", q.sql); assert_eq!( vec![( "$v0".to_string(), Rc::new(rusqlite::types::Value::Text("needle".to_string())) )], q.args ); let c = Constraint::Infix { op: Op("="), left: ColumnOrExpression::Column(QualifiedAlias( "fulltext01".to_string(), Column::Fulltext(FulltextColumn::Rowid), )), right: ColumnOrExpression::Column(QualifiedAlias( "datoms02".to_string(), Column::Fixed(DatomsColumn::Value), )), }; assert_eq!("`fulltext01`.rowid = `datoms02`.v", build(&c)); } #[test] fn test_end_to_end() { // [:find ?x :where [?x 65537 ?v] [?x 65536 ?v]] let datoms00 = "datoms00".to_string(); let datoms01 = "datoms01".to_string(); let eq = Op("="); let source_aliases = vec![ TableOrSubquery::Table(SourceAlias(DatomsTable::Datoms, datoms00.clone())), TableOrSubquery::Table(SourceAlias(DatomsTable::Datoms, datoms01.clone())), ]; let mut query = SelectQuery { distinct: true, projection: Projection::Columns(vec![ProjectedColumn( ColumnOrExpression::Column(QualifiedAlias::new( datoms00.clone(), DatomsColumn::Entity, )), "x".to_string(), )]), from: FromClause::TableList(TableList(source_aliases)), constraints: vec![ Constraint::Infix { op: eq, left: ColumnOrExpression::Column(QualifiedAlias::new( datoms01.clone(), DatomsColumn::Value, )), right: ColumnOrExpression::Column(QualifiedAlias::new( datoms00.clone(), DatomsColumn::Value, )), }, Constraint::Infix { op: eq, left: ColumnOrExpression::Column(QualifiedAlias::new( datoms00, DatomsColumn::Attribute, )), right: ColumnOrExpression::Entid(65537), }, Constraint::Infix { op: eq, left: ColumnOrExpression::Column(QualifiedAlias::new( datoms01, DatomsColumn::Attribute, )), right: ColumnOrExpression::Entid(65536), }, ], group_by: vec![], order: vec![], limit: Limit::None, }; let SQLQuery { sql, args } = query.to_sql_query().unwrap(); println!("{}", sql); assert_eq!("SELECT DISTINCT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql); assert!(args.is_empty()); // And without distinct… query.distinct = false; let SQLQuery { sql, args } = query.to_sql_query().unwrap(); println!("{}", sql); assert_eq!("SELECT `datoms00`.e AS `x` FROM `datoms` AS `datoms00`, `datoms` AS `datoms01` WHERE `datoms01`.v = `datoms00`.v AND `datoms00`.a = 65537 AND `datoms01`.a = 65536", sql); assert!(args.is_empty()); } #[test] fn test_format_select_var() { assert_eq!(format_select_var("?foo99-people"), "ifoo99_people"); assert_eq!(format_select_var("?FOO99-pëople.123"), "iFOO99_p_ople_123"); assert_eq!(format_select_var("?foo①bar越"), "ifoo_bar_"); } }