Skip to content

Commit

Permalink
sql: clean up parser module
Browse files Browse the repository at this point in the history
  • Loading branch information
erikgrinaker committed Jul 20, 2024
1 parent 012e1d5 commit 329b92c
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 165 deletions.
105 changes: 53 additions & 52 deletions src/sql/parser/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,41 @@ use crate::sql::types::DataType;

use std::collections::BTreeMap;

/// The statement AST is the root node of the AST tree, which describes the
/// syntactic structure of a SQL query. It is passed to the planner, which
/// validates its contents and converts it into an execution plan.
/// The statement is the root node of the Abstract Syntax Tree, and describes
/// the syntactic structure of a SQL query. It is built from a raw SQL string by
/// the parser, and passed on to the planner which validates it and builds an
/// execution plan from it.
#[derive(Debug)]
pub enum Statement {
Begin {
read_only: bool,
as_of: Option<u64>,
},
/// Begin a new transaction.
Begin { read_only: bool, as_of: Option<u64> },
/// Commit a transaction.
Commit,
/// Roll back a transaction.
Rollback,
/// Explain a statement.
Explain(Box<Statement>),
CreateTable {
name: String,
columns: Vec<Column>,
},
DropTable {
name: String,
if_exists: bool,
},
Delete {
table: String,
r#where: Option<Expression>,
},
/// Create a new table.
CreateTable { name: String, columns: Vec<Column> },
/// Drop a table.
DropTable { name: String, if_exists: bool },
/// Delete matching rows.
Delete { table: String, r#where: Option<Expression> },
/// Insert new rows into a table.
Insert {
table: String,
columns: Option<Vec<String>>,
values: Vec<Vec<Expression>>,
columns: Option<Vec<String>>, // columns given in values, using default for rest
values: Vec<Vec<Expression>>, // rows to insert
},
/// Update rows in a table.
Update {
table: String,
set: BTreeMap<String, Option<Expression>>, // None for DEFAULT value
set: BTreeMap<String, Option<Expression>>, // column → value, None for default value
r#where: Option<Expression>,
},
/// Select matching rows.
Select {
select: Vec<(Expression, Option<String>)>,
select: Vec<(Expression, Option<String>)>, // optional column aliases
from: Vec<From>,
r#where: Option<Expression>,
group_by: Vec<Expression>,
Expand All @@ -48,10 +47,12 @@ pub enum Statement {
},
}

/// A FROM item: a table or join.
/// A FROM item.
#[derive(Debug)]
pub enum From {
/// A table.
Table { name: String, alias: Option<String> },
/// A join of two or more tables (may be nested).
Join { left: Box<From>, right: Box<From>, r#type: JoinType, predicate: Option<Expression> },
}

Expand All @@ -78,8 +79,8 @@ pub enum JoinType {
}

impl JoinType {
// If true, the join is an outer join -- rows with no join match are emitted
// with a NULL match.
// If true, the join is an outer join, where rows with no join matches are
// emitted with a NULL match.
pub fn is_outer(&self) -> bool {
match self {
Self::Left | Self::Right => true,
Expand All @@ -88,7 +89,7 @@ impl JoinType {
}
}

/// Sort orders.
/// ORDER BY direction.
#[derive(Debug)]
pub enum Order {
Ascending,
Expand All @@ -110,7 +111,7 @@ pub enum Expression {
Operator(Operator),
}

/// Expression literals.
/// Expression literal values.
#[derive(Clone, Debug)]
pub enum Literal {
Null,
Expand All @@ -120,19 +121,19 @@ pub enum Literal {
String(String),
}

/// To allow Expressions and Literals in e.g. hashmap lookups, implement simple
/// To allow using Expressions and Literals in e.g. hashmaps, implement simple
/// equality and hash for all types, including Null and f64::NAN. This is not
/// used for expression evaluation (handled by sql::types::Expression), where
/// these values should not be considered equal, only in lookups.
/// these values should not be considered equal to themselves, only in lookups.
impl std::cmp::PartialEq for Literal {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Boolean(l), Self::Boolean(r)) => l == r,
(Self::Integer(l), Self::Integer(r)) => l == r,
// Consider e.g. NaN equal to NaN for comparison purposes.
// Implies NaN == NaN but -NaN != NaN. Similarly with +/-0.0.
(Self::Float(l), Self::Float(r)) => l.to_bits() == r.to_bits(),
(Self::String(l), Self::String(r)) => l == r,
_ => core::mem::discriminant(self) == core::mem::discriminant(other),
(l, r) => core::mem::discriminant(l) == core::mem::discriminant(r),
}
}
}
Expand All @@ -159,29 +160,29 @@ impl std::hash::Hash for Literal {
/// around this, but we keep it simple.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum Operator {
And(Box<Expression>, Box<Expression>),
Not(Box<Expression>),
Or(Box<Expression>, Box<Expression>),
And(Box<Expression>, Box<Expression>), // a AND b
Not(Box<Expression>), // NOT a
Or(Box<Expression>, Box<Expression>), // a OR b

Equal(Box<Expression>, Box<Expression>),
GreaterThan(Box<Expression>, Box<Expression>),
GreaterThanOrEqual(Box<Expression>, Box<Expression>),
Is(Box<Expression>, Literal), // NULL or f64 NAN
LessThan(Box<Expression>, Box<Expression>),
LessThanOrEqual(Box<Expression>, Box<Expression>),
NotEqual(Box<Expression>, Box<Expression>),
Equal(Box<Expression>, Box<Expression>), // a = b
GreaterThan(Box<Expression>, Box<Expression>), // a > b
GreaterThanOrEqual(Box<Expression>, Box<Expression>), // a != b
Is(Box<Expression>, Literal), // IS NULL or IS NAN
LessThan(Box<Expression>, Box<Expression>), // a < b
LessThanOrEqual(Box<Expression>, Box<Expression>), // a <= b
NotEqual(Box<Expression>, Box<Expression>), // a != b

Add(Box<Expression>, Box<Expression>),
Divide(Box<Expression>, Box<Expression>),
Exponentiate(Box<Expression>, Box<Expression>),
Factorial(Box<Expression>),
Identity(Box<Expression>),
Multiply(Box<Expression>, Box<Expression>),
Negate(Box<Expression>),
Remainder(Box<Expression>, Box<Expression>),
Subtract(Box<Expression>, Box<Expression>),
Add(Box<Expression>, Box<Expression>), // a + b
Divide(Box<Expression>, Box<Expression>), // a / b
Exponentiate(Box<Expression>, Box<Expression>), // a ^ b
Factorial(Box<Expression>), // a!
Identity(Box<Expression>), // +a
Multiply(Box<Expression>, Box<Expression>), // a * b
Negate(Box<Expression>), // -a
Remainder(Box<Expression>, Box<Expression>), // a % b
Subtract(Box<Expression>, Box<Expression>), // a - b

Like(Box<Expression>, Box<Expression>),
Like(Box<Expression>, Box<Expression>), // a LIKE b
}

impl Expression {
Expand Down
31 changes: 14 additions & 17 deletions src/sql/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use crate::errinput;
use crate::error::Result;

/// The lexer (lexical analyzer) preprocesses raw SQL strings into a sequence of
/// lexical tokens (e.g. keyword, number, string, etc) that are passed onto the
/// SQL parser. In doing so, it strips away basic syntactic noise such as
/// whitespace, case, and quotes, and performs initial validation of symbols.
/// lexical tokens (e.g. keyword, number, string, etc), which are passed on to
/// the SQL parser. In doing so, it strips away basic syntactic noise such as
/// whitespace, case, and quotes, and performs initial symbol validation.
pub struct Lexer<'a> {
chars: std::iter::Peekable<std::str::Chars<'a>>,
}
Expand All @@ -16,8 +16,8 @@ pub struct Lexer<'a> {
/// fine for our purposes here.
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
/// A number, with digits, decimal points, and/or exponents. Leading signs
/// (e.g. -) are separate tokens.
/// A numeric string, with digits, decimal points, and/or exponents. Leading
/// signs (e.g. -) are separate tokens.
Number(String),
/// A Unicode string, with quotes stripped and escape sequences resolved.
String(String),
Expand Down Expand Up @@ -156,7 +156,7 @@ pub enum Keyword {
}

impl TryFrom<&str> for Keyword {
// The error just indicates this isn't a keyword, so use a cheap string.
// Use a cheap static string, since this just indicates it's not a keyword.
type Error = &'static str;

fn try_from(value: &str) -> std::result::Result<Self, Self::Error> {
Expand Down Expand Up @@ -351,8 +351,9 @@ impl<'a> Lexer<'a> {

/// Scans the next token, if any.
fn scan(&mut self) -> Result<Option<Token>> {
// Ignore whitespace. The first character tells us the token type.
// Ignore whitespace.
self.skip_whitespace();
// The first character tells us the token type.
match self.chars.peek() {
Some('\'') => self.scan_string(),
Some('"') => self.scan_ident_quoted(),
Expand All @@ -378,8 +379,7 @@ impl<'a> Lexer<'a> {
}
}

/// Scans the next quoted identifier, if any. Case is preserved, keywords
/// are ignored.
/// Scans the next quoted identifier, if any. Case is preserved.
fn scan_ident_quoted(&mut self) -> Result<Option<Token>> {
if !self.next_is('"') {
return Ok(None);
Expand All @@ -405,8 +405,8 @@ impl<'a> Lexer<'a> {
number.push(c)
}
// Scan the fractional part, if any.
if let Some(sep) = self.next_if(|c| c == '.') {
number.push(sep);
if self.next_is('.') {
number.push('.');
while let Some(dec) = self.next_if(|c| c.is_ascii_digit()) {
number.push(dec)
}
Expand All @@ -424,7 +424,7 @@ impl<'a> Lexer<'a> {
Some(Token::Number(number))
}

/// Scans the next string literal, if any.
/// Scans the next quoted string literal, if any.
fn scan_string(&mut self) -> Result<Option<Token>> {
if !self.next_is('\'') {
return Ok(None);
Expand Down Expand Up @@ -485,9 +485,6 @@ impl<'a> Lexer<'a> {
/// Returns true if the entire given string is a single valid identifier.
pub fn is_ident(ident: &str) -> bool {
let mut lexer = Lexer::new(ident);
let token = lexer.next();
if lexer.next().is_some() {
return false; // multiple tokens, so not an identifier
}
matches!(token, Some(Ok(Token::Ident(_))))
let Some(Ok(Token::Ident(_))) = lexer.next() else { return false };
lexer.next().is_none() // if further tokens, it's not a lone identifier
}
2 changes: 2 additions & 0 deletions src/sql/parser/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Parses raw SQL strings into a structured Abstract Syntax Tree.
pub mod ast;
mod lexer;
mod parser;
Expand Down
Loading

0 comments on commit 329b92c

Please sign in to comment.