Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/Eventual-Inc/Daft into erro…
Browse files Browse the repository at this point in the history
…r-messages
  • Loading branch information
universalmind303 committed Jan 14, 2025
2 parents 7c172b3 + feab49a commit bf0d1b8
Show file tree
Hide file tree
Showing 27 changed files with 521 additions and 442 deletions.
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ repos:
(?x)^(
tutorials/.*\.ipynb|
docs/.*\.ipynb|
docs/source/user_guide/fotw/data/
docs/source/user_guide/fotw/data/|
.*\.jsonl
)$
args:
- --autofix
Expand Down
111 changes: 92 additions & 19 deletions src/daft-sql/src/planner.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::{
cell::{Ref, RefCell, RefMut},
collections::{HashMap, HashSet},
path::Path,
rc::Rc,
sync::Arc,
};
Expand All @@ -21,10 +22,11 @@ use daft_functions::{
use daft_logical_plan::{LogicalPlanBuilder, LogicalPlanRef};
use sqlparser::{
ast::{
ArrayElemTypeDef, BinaryOperator, CastKind, ColumnDef, DateTimeField, Distinct,
ExactNumberInfo, ExcludeSelectItem, GroupByExpr, Ident, Query, SelectItem, SetExpr,
Statement, StructField, Subscript, TableAlias, TableWithJoins, TimezoneInfo, UnaryOperator,
Value, WildcardAdditionalOptions, With,
self, ArrayElemTypeDef, BinaryOperator, CastKind, ColumnDef, DateTimeField, Distinct,
ExactNumberInfo, ExcludeSelectItem, FunctionArg, FunctionArgExpr, GroupByExpr, Ident,
ObjectName, Query, SelectItem, SetExpr, Statement, StructField, Subscript, TableAlias,
TableFunctionArgs, TableWithJoins, TimezoneInfo, UnaryOperator, Value,
WildcardAdditionalOptions, With,
},
dialect::GenericDialect,
parser::{Parser, ParserOptions},
Expand Down Expand Up @@ -1002,21 +1004,11 @@ impl<'a> SQLPlanner<'a> {
alias,
..
} => {
let table_name = name.to_string();
let Some(rel) = self
.table_map
.get(&table_name)
.cloned()
.or_else(|| self.cte_map().get(&table_name).cloned())
.or_else(|| {
self.catalog()
.get_table(&table_name)
.map(|table| Relation::new(table.into(), table_name.clone()))
})
else {
table_not_found_err!(table_name)
let rel = if is_table_path(name) {
self.plan_relation_path(name)?
} else {
self.plan_relation_table(name)?
};

(rel, alias.clone())
}
sqlparser::ast::TableFactor::Derived {
Expand Down Expand Up @@ -1066,6 +1058,45 @@ impl<'a> SQLPlanner<'a> {
}
}

/// Plan a `FROM <path>` table factor by rewriting to relevant table-value function.
fn plan_relation_path(&self, name: &ObjectName) -> SQLPlannerResult<Relation> {
let path = name.to_string();
let path = &path[1..path.len() - 1]; // strip single-quotes ' '
let func = match Path::new(path).extension() {
Some(ext) if ext.eq_ignore_ascii_case("csv") => "read_csv",
Some(ext) if ext.eq_ignore_ascii_case("json") => "read_json",
Some(ext) if ext.eq_ignore_ascii_case("parquet") => "read_parquet",
Some(_) => invalid_operation_err!("unsupported file path extension: {}", name),
None => invalid_operation_err!("unsupported file path, no extension: {}", name),
};
let args = TableFunctionArgs {
args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(
ast::Expr::Value(Value::SingleQuotedString(path.to_string())),
))],
settings: None,
};
self.plan_table_function(func, &args)
}

/// Plan a `FROM <table>` table factor.
fn plan_relation_table(&self, name: &ObjectName) -> SQLPlannerResult<Relation> {
let table_name = name.to_string();
let Some(rel) = self
.table_map
.get(&table_name)
.cloned()
.or_else(|| self.cte_map().get(&table_name).cloned())
.or_else(|| {
self.catalog()
.get_table(&table_name)
.map(|table| Relation::new(table.into(), table_name.clone()))
})
else {
table_not_found_err!(table_name)
};
Ok(rel)
}

fn plan_identifier(&self, idents: &[Ident]) -> SQLPlannerResult<ExprRef> {
// if the current relation is not resolved (e.g. in a `sql_expr` call, simply wrap identifier in a col)
if self.current_relation.is_none() {
Expand Down Expand Up @@ -2208,6 +2239,24 @@ fn idents_to_str(idents: &[Ident]) -> String {
.join(".")
}

/// Returns true iff the ObjectName is a string literal (single-quoted identifier e.g. 'path/to/file.extension').
///
/// # Examples
///
/// ```
/// 'file.ext' -> true
/// 'path/to/file.ext' -> true
/// 'a'.'b'.'c' -> false (multiple identifiers)
/// "path/to/file.ext" -> false (double-quotes)
/// hello -> false (not single-quoted)
/// ```
fn is_table_path(name: &ObjectName) -> bool {
if name.0.len() != 1 {
return false;
}
matches!(name.0[0].quote_style, Some('\''))
}

/// unresolves an alias in a projection
/// Example:
/// ```sql
Expand Down Expand Up @@ -2248,8 +2297,9 @@ fn unresolve_alias(expr: ExprRef, projection: &[ExprRef]) -> SQLPlannerResult<Ex
#[cfg(test)]
mod tests {
use daft_core::prelude::*;
use sqlparser::ast::{Ident, ObjectName};

use crate::sql_schema;
use crate::{planner::is_table_path, sql_schema};

#[test]
fn test_sql_schema_creates_expected_schema() {
Expand Down Expand Up @@ -2292,4 +2342,27 @@ mod tests {
let expected = Schema::new(vec![Field::new("col1", DataType::Int32)]).unwrap();
assert_eq!(&*result, &expected);
}

#[test]
fn test_is_table_path() {
// single-quoted path should return true
assert!(is_table_path(&ObjectName(vec![Ident {
value: "path/to/file.ext".to_string(),
quote_style: Some('\'')
}])));
// multiple identifiers should return false
assert!(!is_table_path(&ObjectName(vec![
Ident::new("a"),
Ident::new("b")
])));
// double-quoted identifier should return false
assert!(!is_table_path(&ObjectName(vec![Ident {
value: "path/to/file.ext".to_string(),
quote_style: Some('"')
}])));
// unquoted identifier should return false
assert!(!is_table_path(&ObjectName(vec![Ident::new(
"path/to/file.ext"
)])));
}
}
25 changes: 25 additions & 0 deletions tests/assets/json-data/small.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{ "x": 42, "y": "apple", "z": true }
{ "x": 17, "y": "banana", "z": false }
{ "x": 89, "y": "cherry", "z": true }
{ "x": 3, "y": "date", "z": false }
{ "x": 156, "y": "elderberry", "z": true }
{ "x": 23, "y": "fig", "z": true }
{ "x": 777, "y": "grape", "z": false }
{ "x": 444, "y": "honeydew", "z": true }
{ "x": 91, "y": "kiwi", "z": false }
{ "x": 12, "y": "lemon", "z": true }
{ "x": 365, "y": "mango", "z": false }
{ "x": 55, "y": "nectarine", "z": true }
{ "x": 888, "y": "orange", "z": false }
{ "x": 247, "y": "papaya", "z": true }
{ "x": 33, "y": "quince", "z": false }
{ "x": 159, "y": "raspberry", "z": true }
{ "x": 753, "y": "strawberry", "z": false }
{ "x": 951, "y": "tangerine", "z": true }
{ "x": 426, "y": "ugli fruit", "z": false }
{ "x": 87, "y": "vanilla", "z": true }
{ "x": 234, "y": "watermelon", "z": false }
{ "x": 567, "y": "xigua", "z": true }
{ "x": 111, "y": "yuzu", "z": false }
{ "x": 999, "y": "zucchini", "z": true }
{ "x": 123, "y": "apricot", "z": false }
21 changes: 0 additions & 21 deletions tests/connect/test_alias.py

This file was deleted.

18 changes: 0 additions & 18 deletions tests/connect/test_analyze_plan.py

This file was deleted.

46 changes: 0 additions & 46 deletions tests/connect/test_basic_column.py

This file was deleted.

Loading

0 comments on commit bf0d1b8

Please sign in to comment.