Add a command to the CLI that displays the SQL and query plan for a query. Fixes #428. (#523) r=rnewman

2018-01-19 19:44:39 -05:00 · 2018-01-19 19:44:39 -05:00 · 023fd9b70b
commit 023fd9b70b
parent ebb77d59bc
7 changed files with 199 additions and 25 deletions
--- a/src/conn.rs
+++ b/src/conn.rs
@ -40,6 +40,8 @@ use errors::*;
 use query::{
    lookup_value_for_attribute,
    q_once,
+    q_explain,
+    QueryExplanation,
    QueryInputs,
    QueryResults,
 };
@ -214,6 +216,15 @@ impl Conn {
               inputs)
    }

+    pub fn q_explain<T>(&self,
+                        sqlite: &rusqlite::Connection,
+                        query: &str,
+                        inputs: T) -> Result<QueryExplanation>
+        where T: Into<Option<QueryInputs>>
+    {
+        q_explain(sqlite, &*self.current_schema(), query, inputs)
+    }
+
    pub fn lookup_value_for_attribute(&self,
                                      sqlite: &rusqlite::Connection,
                                      entity: Entid,
--- a/src/lib.rs
+++ b/src/lib.rs
@ -55,7 +55,9 @@ pub use mentat_db::{
 pub use query::{
    NamespacedKeyword,
    PlainSymbol,
+    QueryExplanation,
    QueryInputs,
+    QueryPlanStep,
    QueryResults,
    Variable,
    q_once,
--- a/src/query.rs
+++ b/src/query.rs
@ -11,6 +11,8 @@
 use rusqlite;
 use rusqlite::types::ToSql;

+use std::rc::Rc;
+
 use mentat_core::{
    Entid,
    Schema,
@ -20,6 +22,7 @@ use mentat_core::{
 use mentat_query_algebrizer::{
    AlgebraicQuery,
    algebrize_with_inputs,
+    EmptyBecause,
 };

 pub use mentat_query_algebrizer::{
@ -90,6 +93,45 @@ impl IntoResult for QueryExecutionResult {
    }
 }

+/// A struct describing information about how Mentat would execute a query.
+pub enum QueryExplanation {
+    /// A query known in advance to be empty, and why we believe that.
+    KnownEmpty(EmptyBecause),
+    /// A query that takes actual work to execute.
+    ExecutionPlan {
+        /// The translated query and any bindings.
+        query: SQLQuery,
+        /// The output of SQLite's `EXPLAIN QUERY PLAN`.
+        steps: Vec<QueryPlanStep>,
+    },
+}
+
+/// A single row in the output of SQLite's `EXPLAIN QUERY PLAN`.
+/// See https://www.sqlite.org/eqp.html for an explanation of each field.
+pub struct QueryPlanStep {
+    pub select_id: i32,
+    pub order: i32,
+    pub from: i32,
+    pub detail: String,
+}
+
+fn algebrize_query<'schema, T>
+(schema: &'schema Schema,
+ query: FindQuery,
+ inputs: T) -> Result<AlgebraicQuery>
+    where T: Into<Option<QueryInputs>>
+{
+    let algebrized = algebrize_with_inputs(schema, query, 0, inputs.into().unwrap_or(QueryInputs::default()))?;
+    let unbound = algebrized.unbound_variables();
+    // Because we are running once, we can check that all of our `:in` variables are bound at this point.
+    // If they aren't, the user has made an error -- perhaps writing the wrong variable in `:in`, or
+    // not binding in the `QueryInput`.
+    if !unbound.is_empty() {
+        bail!(ErrorKind::UnboundVariables(unbound.into_iter().map(|v| v.to_string()).collect()));
+    }
+    Ok(algebrized)
+}
+
 fn fetch_values<'sqlite, 'schema>
 (sqlite: &'sqlite rusqlite::Connection,
 schema: &'schema Schema,
@ -111,7 +153,7 @@ fn fetch_values<'sqlite, 'schema>
    let query = FindQuery::simple(spec,
                                  vec![WhereClause::Pattern(pattern)]);

-    let algebrized = algebrize_with_inputs(schema, query, 0, QueryInputs::default())?;
+    let algebrized = algebrize_query(schema, query, None)?;

    run_algebrized_query(sqlite, algebrized)
 }
@ -161,34 +203,61 @@ pub fn lookup_values_for_attribute<'sqlite, 'schema, 'attribute>
    lookup_values(sqlite, schema, entity, lookup_attribute(schema, attribute)?)
 }

+fn run_statement<'sqlite, 'stmt, 'bound>
+(statement: &'stmt mut rusqlite::Statement<'sqlite>,
+ bindings: &'bound [(String, Rc<rusqlite::types::Value>)]) -> Result<rusqlite::Rows<'stmt>> {
+
+    let rows = if bindings.is_empty() {
+        statement.query(&[])?
+    } else {
+        let refs: Vec<(&str, &ToSql)> =
+            bindings.iter()
+                    .map(|&(ref k, ref v)| (k.as_str(), v.as_ref() as &ToSql))
+                    .collect();
+        statement.query_named(&refs)?
+    };
+    Ok(rows)
+}
+
+fn run_sql_query<'sqlite, 'sql, 'bound, T, F>
+(sqlite: &'sqlite rusqlite::Connection,
+ sql: &'sql str,
+ bindings: &'bound [(String, Rc<rusqlite::types::Value>)],
+ mut mapper: F) -> Result<Vec<T>>
+    where F: FnMut(&rusqlite::Row) -> T
+{
+    let mut statement = sqlite.prepare(sql)?;
+    let mut rows = run_statement(&mut statement, &bindings)?;
+    let mut result = vec![];
+    while let Some(row_or_error) = rows.next() {
+        result.push(mapper(&row_or_error?));
+    }
+    Ok(result)
+}
+
+fn algebrize_query_str<'schema, 'query, T>
+(schema: &'schema Schema,
+ query: &'query str,
+ inputs: T) -> Result<AlgebraicQuery>
+    where T: Into<Option<QueryInputs>>
+{
+    let parsed = parse_find_string(query)?;
+    algebrize_query(schema, parsed, inputs)
+}
+
 fn run_algebrized_query<'sqlite>(sqlite: &'sqlite rusqlite::Connection, algebrized: AlgebraicQuery) -> QueryExecutionResult {
+    assert!(algebrized.unbound_variables().is_empty(),
+            "Unbound variables should be checked by now");
    if algebrized.is_known_empty() {
        // We don't need to do any SQL work at all.
        return Ok(QueryResults::empty(&algebrized.find_spec));
    }

-    // Because we are running once, we can check that all of our `:in` variables are bound at this point.
-    // If they aren't, the user has made an error -- perhaps writing the wrong variable in `:in`, or
-    // not binding in the `QueryInput`.
-    let unbound = algebrized.unbound_variables();
-    if !unbound.is_empty() {
-        bail!(ErrorKind::UnboundVariables(unbound.into_iter().map(|v| v.to_string()).collect()));
-    }
-
    let select = query_to_select(algebrized)?;
    let SQLQuery { sql, args } = select.query.to_sql_query()?;

    let mut statement = sqlite.prepare(sql.as_str())?;
-
-    let rows = if args.is_empty() {
-        statement.query(&[])?
-    } else {
-        let refs: Vec<(&str, &ToSql)> =
-            args.iter()
-                .map(|&(ref k, ref v)| (k.as_str(), v.as_ref() as &ToSql))
-                .collect();
-        statement.query_named(refs.as_slice())?
-    };
+    let rows = run_statement(&mut statement, &args)?;

    select.projector
          .project(rows)
@ -209,8 +278,34 @@ pub fn q_once<'sqlite, 'schema, 'query, T>
 inputs: T) -> QueryExecutionResult
        where T: Into<Option<QueryInputs>>
 {
-    let parsed = parse_find_string(query)?;
-    let algebrized = algebrize_with_inputs(schema, parsed, 0, inputs.into().unwrap_or(QueryInputs::default()))?;
+    let algebrized = algebrize_query_str(schema, query, inputs)?;

    run_algebrized_query(sqlite, algebrized)
 }
+
+pub fn q_explain<'sqlite, 'schema, 'query, T>
+(sqlite: &'sqlite rusqlite::Connection,
+ schema: &'schema Schema,
+ query: &'query str,
+ inputs: T) -> Result<QueryExplanation>
+        where T: Into<Option<QueryInputs>>
+{
+    let algebrized = algebrize_query_str(schema, query, inputs)?;
+    if algebrized.is_known_empty() {
+        return Ok(QueryExplanation::KnownEmpty(algebrized.cc.empty_because.unwrap()));
+    }
+    let query = query_to_select(algebrized)?.query.to_sql_query()?;
+
+    let plan_sql = format!("EXPLAIN QUERY PLAN {}", query.sql);
+
+    let steps = run_sql_query(sqlite, &plan_sql, &query.args, |row| {
+        QueryPlanStep {
+            select_id: row.get(0),
+            order: row.get(1),
+            from: row.get(2),
+            detail: row.get(3)
+        }
+    })?;
+
+    Ok(QueryExplanation::ExecutionPlan { query, steps })
+}
--- a/tools/cli/src/mentat_cli/command_parser.rs
+++ b/tools/cli/src/mentat_cli/command_parser.rs
@ -42,6 +42,8 @@ pub static LONG_TRANSACT_COMMAND: &'static str = &"transact";
 pub static SHORT_TRANSACT_COMMAND: &'static str = &"t";
 pub static LONG_EXIT_COMMAND: &'static str = &"exit";
 pub static SHORT_EXIT_COMMAND: &'static str = &"e";
+pub static LONG_QUERY_EXPLAIN_COMMAND: &'static str = &"explain_query";
+pub static SHORT_QUERY_EXPLAIN_COMMAND: &'static str = &"eq";

 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Command {
@ -52,6 +54,7 @@ pub enum Command {
    Query(String),
    Schema,
    Transact(String),
+    QueryExplain(String),
 }

 impl Command {
@ -62,7 +65,8 @@ impl Command {
    pub fn is_complete(&self) -> bool {
        match self {
            &Command::Query(ref args) |
-            &Command::Transact(ref args) => {
+            &Command::Transact(ref args) |
+            &Command::QueryExplain(ref args) => {
                edn::parse::value(&args).is_ok()
            },
            &Command::Help(_) |
@ -96,6 +100,9 @@ impl Command {
            &Command::Schema => {
                format!(".{}", SCHEMA_COMMAND)
            },
+            &Command::QueryExplain(ref args) => {
+                format!(".{} {}", LONG_QUERY_EXPLAIN_COMMAND, args)
+            },
        }
    }
 }
@ -174,12 +181,19 @@ pub fn command(s: &str) -> Result<Command, cli::Error> {
                        Ok(Command::Transact(x))
                    });

+    let explain_query_parser = try(string(LONG_QUERY_EXPLAIN_COMMAND))
+                           .or(try(string(SHORT_QUERY_EXPLAIN_COMMAND)))
+                        .with(edn_arg_parser())
+                        .map(|x| {
+                            Ok(Command::QueryExplain(x))
+                        });
    spaces()
    .skip(token('.'))
-    .with(choice::<[&mut Parser<Input = _, Output = Result<Command, cli::Error>>; 7], _>
+    .with(choice::<[&mut Parser<Input = _, Output = Result<Command, cli::Error>>; 8], _>
          ([&mut try(help_parser),
            &mut try(open_parser),
            &mut try(close_parser),
+            &mut try(explain_query_parser),
            &mut try(exit_parser),
            &mut try(query_parser),
            &mut try(schema_parser),
--- a/tools/cli/src/mentat_cli/input.rs
+++ b/tools/cli/src/mentat_cli/input.rs
@ -115,7 +115,8 @@ impl InputReader {
            Ok(cmd) => {
                match cmd {
                    Command::Query(_) |
-                    Command::Transact(_) if !cmd.is_complete() => {
+                    Command::Transact(_) |
+                    Command::QueryExplain(_) if !cmd.is_complete() => {
                        // A query or transact is complete if it contains a valid EDN.
                        // if the command is not complete, ask for more from the REPL and remember
                        // which type of command we've found here.
--- a/tools/cli/src/mentat_cli/repl.rs
+++ b/tools/cli/src/mentat_cli/repl.rs
@ -11,7 +11,10 @@
 use std::collections::HashMap;  
 use std::process;

-use mentat::query::QueryResults;
+use mentat::query::{
+    QueryExplanation,
+    QueryResults,
+};
 use mentat_core::TypedValue;

 use command_parser::{
@ -25,6 +28,8 @@ use command_parser::{
    SHORT_TRANSACT_COMMAND,
    LONG_EXIT_COMMAND,
    SHORT_EXIT_COMMAND,
+    LONG_QUERY_EXPLAIN_COMMAND,
+    SHORT_QUERY_EXPLAIN_COMMAND,
 };
 use input::InputReader;
 use input::InputResult::{
@ -50,6 +55,9 @@ lazy_static! {
        map.insert(SCHEMA_COMMAND, "Output the schema for the current open database.");
        map.insert(LONG_TRANSACT_COMMAND, "Execute a transact against the current open database.");
        map.insert(SHORT_TRANSACT_COMMAND, "Shortcut for `.transact`. Execute a transact against the current open database.");
+        map.insert(LONG_QUERY_EXPLAIN_COMMAND, "Show the SQL and query plan that would be executed for a given query.");
+        map.insert(SHORT_QUERY_EXPLAIN_COMMAND,
+            "Shortcut for `.explain_query`. Show the SQL and query plan that would be executed for a given query.");
        map
    };
 }
@ -112,6 +120,7 @@ impl Repl {
            },
            Command::Close => self.close(),
            Command::Query(query) => self.execute_query(query),
+            Command::QueryExplain(query) => self.explain_query(query),
            Command::Schema => {
                let edn = self.store.fetch_schema();
                match edn.to_pretty(120) {
@ -197,6 +206,43 @@ impl Repl {
        println!("\n{}", output);
    }

+    pub fn explain_query(&self, query: String) {
+        match self.store.explain_query(query) {
+            Result::Err(err) =>
+                println!("{:?}.", err),
+            Result::Ok(QueryExplanation::KnownEmpty(empty_because)) =>
+                println!("Query is known empty: {:?}", empty_because),
+            Result::Ok(QueryExplanation::ExecutionPlan { query, steps }) => {
+                println!("SQL: {}", query.sql);
+                if !query.args.is_empty() {
+                    println!("  Bindings:");
+                    for (arg_name, value) in query.args {
+                        println!("    {} = {:?}", arg_name, *value)
+                    }
+                }
+
+                println!("Plan: select id | order | from | detail");
+                // Compute the number of columns we need for order, select id, and from,
+                // so that longer query plans don't become misaligned.
+                let (max_select_id, max_order, max_from) = steps.iter().fold((0, 0, 0), |acc, step|
+                    (acc.0.max(step.select_id), acc.1.max(step.order), acc.2.max(step.from)));
+                // This is less efficient than computing it via the logarithm base 10,
+                // but it's clearer and doesn't have require special casing "0"
+                let max_select_digits = max_select_id.to_string().len();
+                let max_order_digits = max_order.to_string().len();
+                let max_from_digits = max_from.to_string().len();
+                for step in steps {
+                    // Note: > is right align.
+                    println!("  {:>sel_cols$}|{:>ord_cols$}|{:>from_cols$}|{}",
+                             step.select_id, step.order, step.from, step.detail,
+                             sel_cols = max_select_digits,
+                             ord_cols = max_order_digits,
+                             from_cols = max_from_digits);
+                }
+            }
+        };
+    }
+
    pub fn execute_transact(&mut self, transaction: String) {
        match self.store.transact(transaction) {
            Result::Ok(report) => println!("{:?}", report),
--- a/tools/cli/src/mentat_cli/store.rs
+++ b/tools/cli/src/mentat_cli/store.rs
@ -16,6 +16,7 @@ use errors as cli;

 use mentat::{
    new_connection,
+    QueryExplanation,
 };

 use mentat::query::QueryResults;
@ -58,6 +59,10 @@ impl Store {
        Ok(self.conn.q_once(&self.handle, &query, None)?)
    }

+    pub fn explain_query(&self, query: String) -> Result<QueryExplanation, cli::Error> {
+        Ok(self.conn.q_explain(&self.handle, &query, None)?)
+    }
+
    pub fn transact(&mut self, transaction: String) -> Result<TxReport, cli::Error> {
        Ok(self.conn.transact(&mut self.handle, &transaction)?)
    }