dbsql/src/sql_tokenize.c

/*-
 * DBSQL - A SQL database engine.
 *
 * Copyright (C) 2007-2008  The DBSQL Group, Inc. - All rights reserved.
 *
 * This library is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * There are special exceptions to the terms and conditions of the GPL as it
 * is applied to this software. View the full text of the exception in file
 * LICENSE_EXCEPTIONS in the directory of this software distribution.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 */

/*
 * An tokenizer for SQL
 *
 * This file contains C code that splits an SQL input string up into
 * individual tokens and sends those tokens one-by-one over to the
 * parser for analysis.
 */

#include "dbsql_config.h"

#ifndef NO_SYSTEM_INCLUDES
#include <ctype.h>
#include <stdlib.h>
#endif

#include "dbsql_int.h"

/*
 * All the keywords of the SQL language are stored as in a hash
 * table composed of instances of the following structure.
 */
typedef struct keyword keyword_t;
struct keyword {
	char *name;          /* The keyword name */
	u_int8_t token_type; /* token_t value for this keyword */
	u_int8_t len;        /* Length of this keyword */
	u_int8_t inext;      /* Index in sql_tokens_table[] of next with
                                same hash */
};

/*
 * These are the keywords
 */
static keyword_t sql_tokens_table[] = {
  { "ABORT",             TK_ABORT,        },
  { "AFTER",             TK_AFTER,        },
  { "ALL",               TK_ALL,          },
  { "AND",               TK_AND,          },
  { "AS",                TK_AS,           },
  { "ASC",               TK_ASC,          },
  { "ATTACH",            TK_ATTACH,       },
  { "BEFORE",            TK_BEFORE,       },
  { "BEGIN",             TK_BEGIN,        },
  { "BETWEEN",           TK_BETWEEN,      },
  { "BY",                TK_BY,           },
  { "CASCADE",           TK_CASCADE,      },
  { "CASE",              TK_CASE,         },
  { "CHECK",             TK_CHECK,        },
  { "CLUSTER",           TK_CLUSTER,      },
  { "COLLATE",           TK_COLLATE,      },
  { "COMMIT",            TK_COMMIT,       },
  { "CONFLICT",          TK_CONFLICT,     },
  { "CONSTRAINT",        TK_CONSTRAINT,   },
  { "COPY",              TK_COPY,         },
  { "CREATE",            TK_CREATE,       },
  { "CROSS",             TK_JOIN_KW,      },
  { "DATABASE",          TK_DATABASE,     },
  { "DEFAULT",           TK_DEFAULT,      },
  { "DEFERRED",          TK_DEFERRED,     },
  { "DEFERRABLE",        TK_DEFERRABLE,   },
  { "DELETE",            TK_DELETE,       },
  { "DELIMITERS",        TK_DELIMITERS,   },
  { "DESC",              TK_DESC,         },
  { "DETACH",            TK_DETACH,       },
  { "DISTINCT",          TK_DISTINCT,     },
  { "DROP",              TK_DROP,         },
  { "END",               TK_END,          },
  { "EACH",              TK_EACH,         },
  { "ELSE",              TK_ELSE,         },
  { "EXCEPT",            TK_EXCEPT,       },
  { "EXPLAIN",           TK_EXPLAIN,      },
  { "FAIL",              TK_FAIL,         },
  { "FOR",               TK_FOR,          },
  { "FOREIGN",           TK_FOREIGN,      },
  { "FROM",              TK_FROM,         },
  { "FULL",              TK_JOIN_KW,      },
  { "GLOB",              TK_GLOB,         },
  { "GROUP",             TK_GROUP,        },
  { "HAVING",            TK_HAVING,       },
  { "IGNORE",            TK_IGNORE,       },
  { "IMMEDIATE",         TK_IMMEDIATE,    },
  { "IN",                TK_IN,           },
  { "INDEX",             TK_INDEX,        },
  { "INITIALLY",         TK_INITIALLY,    },
  { "INNER",             TK_JOIN_KW,      },
  { "INSERT",            TK_INSERT,       },
  { "INSTEAD",           TK_INSTEAD,      },
  { "INTERSECT",         TK_INTERSECT,    },
  { "INTO",              TK_INTO,         },
  { "IS",                TK_IS,           },
  { "ISNULL",            TK_ISNULL,       },
  { "JOIN",              TK_JOIN,         },
  { "KEY",               TK_KEY,          },
  { "LEFT",              TK_JOIN_KW,      },
  { "LIKE",              TK_LIKE,         },
  { "LIMIT",             TK_LIMIT,        },
  { "MATCH",             TK_MATCH,        },
  { "NATURAL",           TK_JOIN_KW,      },
  { "NOT",               TK_NOT,          },
  { "NOTNULL",           TK_NOTNULL,      },
  { "NULL",              TK_NULL,         },
  { "OF",                TK_OF,           },
  { "OFFSET",            TK_OFFSET,       },
  { "ON",                TK_ON,           },
  { "OR",                TK_OR,           },
  { "ORDER",             TK_ORDER,        },
  { "OUTER",             TK_JOIN_KW,      },
  { "PRAGMA",            TK_PRAGMA,       },
  { "PRIMARY",           TK_PRIMARY,      },
  { "RAISE",             TK_RAISE,        },
  { "REFERENCES",        TK_REFERENCES,   },
  { "REPLACE",           TK_REPLACE,      },
  { "RESTRICT",          TK_RESTRICT,     },
  { "RIGHT",             TK_JOIN_KW,      },
  { "ROLLBACK",          TK_ROLLBACK,     },
  { "ROW",               TK_ROW,          },
  { "SELECT",            TK_SELECT,       },
  { "SET",               TK_SET,          },
  { "STATEMENT",         TK_STATEMENT,    },
  { "TABLE",             TK_TABLE,        },
  { "TEMP",              TK_TEMP,         },
  { "TEMPORARY",         TK_TEMP,         },
  { "THEN",              TK_THEN,         },
  { "TRANSACTION",       TK_TRANSACTION,  },
  { "TRIGGER",           TK_TRIGGER,      },
  { "UNION",             TK_UNION,        },
  { "UNIQUE",            TK_UNIQUE,       },
  { "UPDATE",            TK_UPDATE,       },
  { "USING",             TK_USING,        },
  { "VACUUM",            TK_VACUUM,       },
  { "VALUES",            TK_VALUES,       },
  { "VIEW",              TK_VIEW,         },
  { "WHEN",              TK_WHEN,         },
  { "WHERE",             TK_WHERE,        },
};

/*
 * This is the hash table
 */
#define KEY_HASH_SIZE 101
static u_int8_t ai_table[KEY_HASH_SIZE];


/*
 * __get_keyword_code --
 *	This function looks up an identifier to determine if it is a
 *	keyword.  If it is a keyword, the token code of that keyword is
 *	returned.  If the input is not a keyword, TK_ID is returned.
 *
 * PUBLIC: int get_keyword_code __P((const char *, int));
 */
int
__get_keyword_code(z, n)
	const char *z;
	int n;
{
	int h, i;
	int nk;
	keyword_t *p;
	static char need_init = 1;
	if (need_init) { /* TODO: beginning of what used to be mutex'ed */
		/* Initialize the keyword hash table */
		need_init = 0;
		nk = sizeof(sql_tokens_table) /
			sizeof(sql_tokens_table[0]);
		for (i = 0; i < nk; i++) {
			sql_tokens_table[i].len =
				strlen(sql_tokens_table[i].name);
			h = __hash_ignore_case(sql_tokens_table[i].name,
					       sql_tokens_table[i].len);
			h %= KEY_HASH_SIZE;
			sql_tokens_table[i].inext = ai_table[h];
			ai_table[h] = i+1;
		}
	} /* TODO: end of what used to be mutex'ed */
	h = __hash_ignore_case(z, n) % KEY_HASH_SIZE;
	for (i = ai_table[h]; i; i = p->inext) {
		p = &sql_tokens_table[i-1];
		if (p->len == n &&
		    strncasecmp(p->name, z, n) == 0) {
			return p->token_type;
		}
	}
	return TK_ID;
}


/*
 * If X is a character that can be used in an identifier and
 * X&0x80==0 then id_char_p[X] will be 1.  If X&0x80==0x80 then
 * X is always an identifier character.  (Hence all UTF-8
 * characters can be part of an identifier).  id_char_p[X] will
 * be 0 for every character in the lower 128 ASCII characters
 * that cannot be used as part of an identifier.
 *
 * In this implementation, an identifier can be a string of
 * alphabetic characters, digits, and "_" plus any character
 * with the high-order bit set.  The latter rule means that
 * any sequence of UTF-8 characters or characters taken from
 * an extended ISO8859 character set can form an identifier.
 */
static const char id_char_p[] = {
/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
};


/*
 * __get_token --
 *	Return the length of the token that begins at z[0].
 *	Store the token type in *token_type before returning.
 *
 * STATIC: static int __get_token __P((const unsigned char *, int *));
 */
static int
__get_token(z, token_type)
	const unsigned char *z;
	int *token_type;
{
	int i, delim;

	switch(*z) {
	case ' ':  /* FALLTHROUGH */
	case '\t': /* FALLTHROUGH */
	case '\n': /* FALLTHROUGH */
	case '\f': /* FALLTHROUGH */
	case '\r': /* FALLTHROUGH */
		i = 1;
		while (isspace(z[i])) {
			i++;
		}
		*token_type = TK_SPACE;
		return i;
		break;
	case '-':
		if (z[1] == '-') {
			i = 2;
			while (z[i] && z[i] != '\n') {
				i++;
			}
			*token_type = TK_COMMENT;
			return i;
		}
		*token_type = TK_MINUS;
		return 1;
		break;
	case '(':
		*token_type = TK_LP;
		return 1;
		break;
	case ')':
		*token_type = TK_RP;
		return 1;
		break;
	case ';':
		*token_type = TK_SEMI;
		return 1;
		break;
	case '+':
		*token_type = TK_PLUS;
		return 1;
		break;
	case '*':
		*token_type = TK_STAR;
		return 1;
		break;
	case '/':
		if (z[1] != '*' || z[2] == 0) {
			*token_type = TK_SLASH;
			return 1;
		}
		i = 3;
		while (z[i] && (z[i]!='/' || z[i-1]!='*')) {
			i++;
		}
		if (z[i])
			i++;
		*token_type = TK_COMMENT;
		return i;
		break;
	case '%':
		*token_type = TK_REM;
		return 1;
		break;
	case '=':
		*token_type = TK_EQ;
		return 1 + (z[1] == '=');
		break;
	case '<':
		if (z[1] == '=') {
			*token_type = TK_LE;
			return 2;
		} else if (z[1] == '>') {
			*token_type = TK_NE;
			return 2;
		} else if (z[1] == '<') {
			*token_type = TK_LSHIFT;
			return 2;
		} else {
			*token_type = TK_LT;
			return 1;
		}
		break;
	case '>':
		if (z[1] == '=') {
			*token_type = TK_GE;
			return 2;
		} else if (z[1] == '>') {
			*token_type = TK_RSHIFT;
			return 2;
		} else {
			*token_type = TK_GT;
			return 1;
		}
		break;
	case '!':
		if (z[1] != '=') {
			*token_type = TK_ILLEGAL;
			return 2;
		} else {
			*token_type = TK_NE;
			return 2;
		}
		break;
	case '|':
		if (z[1] != '|') {
			*token_type = TK_BITOR;
			return 1;
		} else {
			*token_type = TK_CONCAT;
			return 2;
		}
		break;
	case ',':
		*token_type = TK_COMMA;
		return 1;
		break;
	case '&':
		*token_type = TK_BITAND;
		return 1;
		break;
	case '~':
		*token_type = TK_BITNOT;
		return 1;
		break;
	case '\'': /* FALLTHROUGH */
	case '"':
		delim = z[0];
		for (i = 1; z[i]; i++) {
			if (z[i] == delim) {
				if (z[i+1] == delim) {
					i++;
				} else {
					break;
				}
			}
		}
		if (z[i])
			i++;
		*token_type = TK_STRING;
		return i;
	case '.':
		*token_type = TK_DOT;
		return 1;
		break;
	case '0': /* FALLTHROUGH */
	case '1': /* FALLTHROUGH */
	case '2': /* FALLTHROUGH */
	case '3': /* FALLTHROUGH */
	case '4': /* FALLTHROUGH */
	case '5': /* FALLTHROUGH */
	case '6': /* FALLTHROUGH */
	case '7': /* FALLTHROUGH */
	case '8': /* FALLTHROUGH */
	case '9':
		*token_type = TK_INTEGER;
		i = 1;
		while (isdigit(z[i])) {
			i++;
		}
		if (z[i] == '.' && isdigit(z[i+1])) {
			i += 2;
			while(isdigit(z[i])) {
				i++;
			}
			*token_type = TK_FLOAT;
		}
		if ((z[i] == 'e' || z[i] == 'E') &&
		    (isdigit(z[i+1]) ||
		     ((z[i+1] == '+' || z[i+1] == '-') && isdigit(z[i+2])))) {
			i += 2;
			while(isdigit(z[i])) {
				i++;
			}
			*token_type = TK_FLOAT;
		}
		return i;
		break;
	case '[':
		i = 1;
		while (z[i] && z[i-1] != ']') {
			i++;
		}
		*token_type = TK_ID;
		return i;
		break;
	case '?':
		*token_type = TK_VARIABLE;
		return 1;
		break;
	default:
		if ((*z & 0x80) == 0 && !id_char_p[*z]) {
			break;
		}
		i = 1;
		while((z[i] & 0x80)!=0 || id_char_p[z[i]]) {
			i++;
		}
		*token_type = __get_keyword_code((char*)z, i);
		return i;
		break;
	}
	*token_type = TK_ILLEGAL;
	return 1;
}

/*
 * __run_sql_parser --
 *	Run the parser on the given SQL string.  The parser structure is
 *	passed in.  A DBSQL_ status code is returned.
 *
 * PUBLIC: int __run_sql_parser __P((parser_t *, const char *, char **));
 */
/*TODO: REMOVE THIS  If an error occurs
 * and pzErrMsg!=NULL then an error message might be written into
 * memory obtained from malloc() and *pzErrMsg made to point to that
 * error message.  Or maybe not.
 */
int
__run_sql_parser(parser, sql, err_msgs)
	parser_t *parser;
	const char *sql;
	char **err_msgs;
{
	int nerr = 0;
	int i;
	void *engine;
	int token_type;
	int last_token_parsed = -1;
	DBSQL *dbp = parser->db;
	extern void *__sql_parser_alloc(DBSQL *, int(*)(DBSQL*,size_t,void *));
	extern void __sql_parser_free(DBSQL *, void *, void(*)(DBSQL *,void*));
	extern int __sql_parser(void*, int, token_t, parser_t*);

	dbp->flags &= ~DBSQL_Interrupt;
	parser->rc = DBSQL_SUCCESS;
	i = 0;
	engine = __sql_parser_alloc(dbp, __dbsql_malloc);
	if (engine == 0) {
		__str_append(err_msgs, "out of memory", (char*)0);
		return 1;
	}
	parser->sLastToken.dyn = 0;
	parser->zTail = sql;
	while (parser->rc == DBSQL_SUCCESS && sql[i] != 0) {
		DBSQL_ASSERT(i >= 0);
		parser->sLastToken.z = &sql[i];
		DBSQL_ASSERT(parser->sLastToken.dyn == 0);
		parser->sLastToken.n = __get_token((unsigned char*)&sql[i],
						   &token_type);
		i += parser->sLastToken.n;
		switch (token_type) {
		case TK_SPACE: /* FALLTHROUGH */
		case TK_COMMENT:
			if ((dbp->flags & DBSQL_Interrupt) != 0) {
				parser->rc = DBSQL_INTERRUPTED;
				__str_append(err_msgs, "interrupt",
					     (char*)0);
				goto abort_parse;
			}
			break;
		case TK_ILLEGAL:
			__str_nappend(err_msgs, "unrecognized token: \"",
				      -1, parser->sLastToken.z,
				      parser->sLastToken.n, "\"", 1, NULL);
			nerr++;
			goto abort_parse;
			break;
		case TK_SEMI:
			parser->zTail = &sql[i];
			/* FALLTHROUGH */
		default:
			__sql_parser(engine, token_type, parser->sLastToken,
				     parser);
			last_token_parsed = token_type;
			if (parser->rc != DBSQL_SUCCESS) {
				goto abort_parse;
			}
			break;
		}
	}
  abort_parse:
	if (sql[i] == 0 && nerr == 0 && parser->rc == DBSQL_SUCCESS) {
		if (last_token_parsed != TK_SEMI) {
			__sql_parser(engine, TK_SEMI, parser->sLastToken,
				     parser);
			parser->zTail = &sql[i];
		}
		__sql_parser(engine, 0, parser->sLastToken, parser);
	}
	__sql_parser_free(dbp, engine, __dbsql_free);
	if (parser->rc != DBSQL_SUCCESS && parser->rc != DBSQL_DONE &&
	    parser->zErrMsg == 0) {
		__str_append(&parser->zErrMsg,
			     dbsql_strerror(parser->rc), (char*)0);
	}
	if (parser->zErrMsg) {
		if (err_msgs && *err_msgs == 0) {
			*err_msgs = parser->zErrMsg;
		} else {
			__dbsql_free(dbp, parser->zErrMsg);
		}
		parser->zErrMsg = 0;
		if (!nerr)
			nerr++;
	}
	if (parser->pVdbe && (parser->useCallback || parser->nErr > 0)) {
		__vdbe_delete(parser->pVdbe);
		parser->pVdbe = 0;
	}
	if (parser->pNewTable) {
		__vdbe_delete_table(parser->db, parser->pNewTable);
		parser->pNewTable = 0;
	}
	if (parser->pNewTrigger) {
		__vdbe_delete_trigger(parser->pNewTrigger);
		parser->pNewTrigger = 0;
	}
	if (nerr > 0 &&
	    (parser->rc == DBSQL_SUCCESS || parser->rc == DBSQL_DONE)) {
		parser->rc = DBSQL_ERROR;
	}
	return nerr;
}

/*
 * Token types used by the dbsql_complete_stmt() routine.  See the header
 * comments on that procedure for additional information.
 */
#define tkEXPLAIN 0
#define tkCREATE  1
#define tkTEMP    2
#define tkTRIGGER 3
#define tkEND     4
#define tkSEMI    5
#define tkWS      6
#define tkOTHER   7

/*
 * dbsql_complete_stmt --
 *
 *	Return TRUE if the given SQL string ends in a semicolon.
 *
 *	Special handling is require for CREATE TRIGGER statements.
 *	Whenever the CREATE TRIGGER keywords are seen, the statement
 *	must end with ";END;".
 *
 *	This implementation uses a state machine with 7 states:
 *
 *   (0) START     At the beginning or end of an SQL statement.  This routine
 *                 returns 1 if it ends in the START state and 0 if it ends
 *                 in any other state.
 *
 *   (1) EXPLAIN   The keyword EXPLAIN has been seen at the beginning of
 *                 a statement.
 *
 *   (2) CREATE    The keyword CREATE has been seen at the beginning of a
 *                 statement, possibly preceeded by EXPLAIN and/or followed by
 *                 TEMP or TEMPORARY
 *
 *   (3) NORMAL    We are in the middle of statement which ends with a single
 *                 semicolon.
 *
 *   (4) TRIGGER   We are in the middle of a trigger definition that must be
 *                 ended by a semicolon, the keyword END, and another
 *                 semicolon.
 *
 *   (5) SEMI      We've seen the first semicolon in the ";END;" that occurs at
 *                 the end of a trigger definition.
 *
 *   (6) END       We've seen the ";END" of the ";END;" that occurs at the end
 *                 of a trigger difinition.
 *
 * Transitions between states above are determined by tokens extracted
 * from the input.  The following tokens are significant:
 *
 *   (0) tkEXPLAIN   The "explain" keyword.
 *   (1) tkCREATE    The "create" keyword.
 *   (2) tkTEMP      The "temp" or "temporary" keyword.
 *   (3) tkTRIGGER   The "trigger" keyword.
 *   (4) tkEND       The "end" keyword.
 *   (5) tkSEMI      A semicolon.
 *   (6) tkWS        Whitespace
 *   (7) tkOTHER     Any other SQL token.
 *
 * Whitespace never causes a state transition and is always ignored.
 *
 * EXTERN: int dbsql_complete_stmt __P((const char *));
 *
 */
int
dbsql_complete_stmt(sql)
	const char *sql;
{
	u_int8_t state = 0; /* Current state, using values from comment */
	u_int8_t token;     /* Value of the next token */
	int c;

	/*
	 * The following matrix defines the transition from one state to
	 * another according to what token is seen.  trans[state][token]
	 * returns the next state.
	 */
	static const u_int8_t trans[7][8] = {
                    /* Token:                                                */
    /* State:       **  EXPLAIN  CREATE  TEMP  TRIGGER  END  SEMI  WS  OTHER */
    /* 0   START: */ {       1,      2,    3,       3,   3,    0,  0,     3, },
    /* 1 EXPLAIN: */ {       3,      2,    3,       3,   3,    0,  1,     3, },
    /* 2  CREATE: */ {       3,      3,    2,       4,   3,    0,  2,     3, },
    /* 3  NORMAL: */ {       3,      3,    3,       3,   3,    0,  3,     3, },
    /* 4 TRIGGER: */ {       4,      4,    4,       4,   4,    5,  4,     4, },
    /* 5    SEMI: */ {       4,      4,    4,       4,   6,    5,  5,     4, },
    /* 6     END: */ {       4,      4,    4,       4,   4,    0,  6,     4, },
  };

	while (*sql) {
		switch (*sql) {
		case ';':
			token = tkSEMI;
			break;
		case ' ': /* FALLTHROUGH */
		case '\r': /* FALLTHROUGH */
		case '\t': /* FALLTHROUGH */
		case '\n': /* FALLTHROUGH */
		case '\f':
			/* White space is ignored */
			token = tkWS;
			break;
		case '/':
			/* C-style comments */
			if (sql[1] != '*') {
				token = tkOTHER;
				break;
			}
			sql += 2;
			while (sql[0] && (sql[0] != '*' || sql[1] != '/')) {
				sql++;
			}
			if (sql[0] == 0)
				return 0;
			sql++;
			token = tkWS;
			break;
		case '-':
			/* SQL-style comments from "--" to end of line */
			if (sql[1] != '-') {
				token = tkOTHER;
				break;
			}
			while (*sql && *sql != '\n') {
				sql++;
			}
			if (*sql == 0)
				return state == 0;
			token = tkWS;
			break;
		case '[':
			/* Microsoft-style identifiers in [...] */
			sql++;
			while (*sql && *sql!=']') {
				sql++;
			}
			if (*sql == 0)
				return 0;
			token = tkOTHER;
			break;
		case '"':
			/* single- and double-quoted strings */
			/* FALLTHROUGH */
		case '\'':
			c = *sql;
			sql++;
			while (*sql && *sql != c) {
				sql++;
			}
			if (*sql == 0)
				return 0;
			token = tkOTHER;
			break;
		default:
			if (id_char_p[(u_int8_t)*sql]) {
				/* Keywords and unquoted identifiers */
				int nid = 1;
				while (id_char_p[(u_int8_t)sql[nid]]) {
					nid++;
				}
				switch (*sql) {
				case 'c': /* FALLTHROUGH */
				case 'C':
					if (nid == 6 &&
					    strncasecmp(sql,
								   "create",
								   6) == 0) {
						token = tkCREATE;
					} else {
						token = tkOTHER;
					}
					break;
				case 't': /* FALLTHROUGH */
				case 'T':
					if (nid == 7 &&
					    strncasecmp(sql,
						   "trigger", 7) == 0 ) {
						token = tkTRIGGER;
					} else if (nid == 4 &&
					    strncasecmp(sql,
						  "temp", 4) == 0) {
						token = tkTEMP;
					} else if (nid == 9 &&
					    strncasecmp(sql,
						  "temporary", 9) == 0) {
						token = tkTEMP;
					} else {
						token = tkOTHER;
					}
					break;
				case 'e': /* FALLTHROUGH */
				case 'E':
					if (nid == 3 &&
					    strncasecmp(sql,
						  "end", 3) == 0) {
						token = tkEND;
					} else if (nid == 7 &&
					    strncasecmp(sql,
						  "explain", 7) == 0) {
						token = tkEXPLAIN;
					} else {
						token = tkOTHER;
					}
					break;
				default:
					token = tkOTHER;
					break;
				}
				sql += nid - 1;
			} else {
				/* Operators and special symbols */
				token = tkOTHER;
			}
			break;
		}
		state = trans[state][token];
		sql++;
	}
	return state == 0;
}