From cecafac01664d00855492eca39f8688282ee137a Mon Sep 17 00:00:00 2001 From: mihailoale-db Date: Wed, 19 Feb 2025 16:49:30 +0100 Subject: [PATCH] [SPARK-51257][SQL][TESTS] Add order-by-alias.sql ### What changes were proposed in this pull request? Added a new test, `order-by-alias.sql`, which covers the cases where `ORDER BY` is done using an alias. ### Why are the changes needed? To improve the testing coverage of `ORDER BY` feature. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50007 from mihailoale-db/orderbyalias. Authored-by: mihailoale-db Signed-off-by: Max Gekk --- .../analyzer-results/order-by-alias.sql.out | 214 ++++++++++++++++ .../sql-tests/inputs/order-by-alias.sql | 37 +++ .../sql-tests/results/order-by-alias.sql.out | 231 ++++++++++++++++++ 3 files changed, 482 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/analyzer-results/order-by-alias.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/inputs/order-by-alias.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/order-by-alias.sql.out diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-alias.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-alias.sql.out new file mode 100644 index 0000000000000..df770a578bf67 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-alias.sql.out @@ -0,0 +1,214 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) +AS testData(a, b) +-- !query analysis +CreateViewCommand `testData`, SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) +AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a FROM testData ORDER BY A +-- !query analysis +Sort [A#x ASC NULLS FIRST], true ++- Project [a#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a as alias FROM testData ORDER BY ALIAS +-- !query analysis +Sort [ALIAS#x ASC NULLS FIRST], true ++- Project [a#x AS alias#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a AS k FROM testData ORDER BY 'k' +-- !query analysis +Sort [k ASC NULLS FIRST], true ++- Project [a#x AS k#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT 1 AS k FROM testData ORDER BY 'k' +-- !query analysis +Sort [k ASC NULLS FIRST], true ++- Project [1 AS k#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT concat_ws(' ', a, b) FROM testData ORDER BY `concat_ws( , a, b)` +-- !query analysis +Sort [concat_ws( , a, b)#x ASC NULLS FIRST], true ++- Project [concat_ws( , cast(a#x as string), cast(b#x as string)) AS concat_ws( , a, b)#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT 1 AS a FROM testData ORDER BY a +-- !query analysis +Sort [a#x ASC NULLS FIRST], true ++- Project [1 AS a#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT 1 AS a FROM testData ORDER BY `a` +-- !query analysis +Sort [a#x ASC NULLS FIRST], true ++- Project [1 AS a#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT 1 ORDER BY `1` +-- !query analysis +Sort [1#x ASC NULLS FIRST], true ++- Project [1 AS 1#x] + +- OneRowRelation + + +-- !query +SELECT a, b FROM testData ORDER BY a, (SELECT b FROM testData LIMIT 1) +-- !query analysis +Sort [a#x ASC NULLS FIRST, scalar-subquery#x [] ASC NULLS FIRST], true +: +- GlobalLimit 1 +: +- LocalLimit 1 +: +- Project [b#x] +: +- SubqueryAlias testdata +: +- View (`testData`, [a#x, b#x]) +: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] +: +- Project [a#x, b#x] +: +- SubqueryAlias testData +: +- LocalRelation [a#x, b#x] ++- Project [a#x, b#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a FROM testData ORDER BY (SELECT b FROM testData) +-- !query analysis +Sort [scalar-subquery#x [] ASC NULLS FIRST], true +: +- Project [b#x] +: +- SubqueryAlias testdata +: +- View (`testData`, [a#x, b#x]) +: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] +: +- Project [a#x, b#x] +: +- SubqueryAlias testData +: +- LocalRelation [a#x, b#x] ++- Project [a#x] + +- SubqueryAlias testdata + +- View (`testData`, [a#x, b#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x] + +- Project [a#x, b#x] + +- SubqueryAlias testData + +- LocalRelation [a#x, b#x] + + +-- !query +SELECT a, b FROM testData ORDER BY a, b IN (SELECT a FROM testData) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY", + "sqlState" : "0A000", + "messageParameters" : { + "treeNode" : "Sort [a#x ASC NULLS FIRST, b#x IN (list#x []) ASC NULLS FIRST], true\n: +- Project [a#x]\n: +- SubqueryAlias testdata\n: +- View (`testData`, [a#x, b#x])\n: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n: +- Project [a#x, b#x]\n: +- SubqueryAlias testData\n: +- LocalRelation [a#x, b#x]\n+- Project [a#x, b#x]\n +- SubqueryAlias testdata\n +- View (`testData`, [a#x, b#x])\n +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n +- Project [a#x, b#x]\n +- SubqueryAlias testData\n +- LocalRelation [a#x, b#x]\n" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 41, + "stopIndex" : 67, + "fragment" : "IN (SELECT a FROM testData)" + } ] +} + + +-- !query +SELECT a, b FROM testData ORDER BY a, EXISTS(SELECT b FROM testData) +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY", + "sqlState" : "0A000", + "messageParameters" : { + "treeNode" : "Sort [a#x ASC NULLS FIRST, exists#x [] ASC NULLS FIRST], true\n: +- Project [b#x]\n: +- SubqueryAlias testdata\n: +- View (`testData`, [a#x, b#x])\n: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n: +- Project [a#x, b#x]\n: +- SubqueryAlias testData\n: +- LocalRelation [a#x, b#x]\n+- Project [a#x, b#x]\n +- SubqueryAlias testdata\n +- View (`testData`, [a#x, b#x])\n +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n +- Project [a#x, b#x]\n +- SubqueryAlias testData\n +- LocalRelation [a#x, b#x]\n" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 39, + "stopIndex" : 68, + "fragment" : "EXISTS(SELECT b FROM testData)" + } ] +} + + +-- !query +SELECT a AS k, c FROM testData ORDER BY k +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`c`", + "proposal" : "`a`, `b`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 16, + "fragment" : "c" + } ] +} diff --git a/sql/core/src/test/resources/sql-tests/inputs/order-by-alias.sql b/sql/core/src/test/resources/sql-tests/inputs/order-by-alias.sql new file mode 100644 index 0000000000000..04ed52356b6fc --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/order-by-alias.sql @@ -0,0 +1,37 @@ +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) +AS testData(a, b); + +-- ORDER BY alias should work with case insensitive names +SELECT a FROM testData ORDER BY A; + +-- Aliases in SELECT could be used in ORDER BY +SELECT a as alias FROM testData ORDER BY ALIAS; + +-- ORDER BY literal +SELECT a AS k FROM testData ORDER BY 'k'; +SELECT 1 AS k FROM testData ORDER BY 'k'; + +-- ORDER BY alias with the function name +SELECT concat_ws(' ', a, b) FROM testData ORDER BY `concat_ws( , a, b)`; + +-- ORDER BY column with name same as an alias used in the project list +SELECT 1 AS a FROM testData ORDER BY a; +SELECT 1 AS a FROM testData ORDER BY `a`; + +-- ORDER BY implicit alias +SELECT 1 ORDER BY `1`; + +-- ORDER BY with expression subqueries +SELECT a, b FROM testData ORDER BY a, (SELECT b FROM testData LIMIT 1); + +-- ORDER BY more than one row +SELECT a FROM testData ORDER BY (SELECT b FROM testData); + +-- Unsupported expressions in ORDER BY +SELECT a, b FROM testData ORDER BY a, b IN (SELECT a FROM testData); +SELECT a, b FROM testData ORDER BY a, EXISTS(SELECT b FROM testData); + +-- ORDER BY alias with invalid col in SELECT list +SELECT a AS k, c FROM testData ORDER BY k; diff --git a/sql/core/src/test/resources/sql-tests/results/order-by-alias.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-alias.sql.out new file mode 100644 index 0000000000000..17d721ce74e6d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/order-by-alias.sql.out @@ -0,0 +1,231 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) +AS testData(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT a FROM testData ORDER BY A +-- !query schema +struct +-- !query output +NULL +NULL +1 +1 +2 +2 +3 +3 +3 + + +-- !query +SELECT a as alias FROM testData ORDER BY ALIAS +-- !query schema +struct +-- !query output +NULL +NULL +1 +1 +2 +2 +3 +3 +3 + + +-- !query +SELECT a AS k FROM testData ORDER BY 'k' +-- !query schema +struct +-- !query output +1 +1 +2 +2 +3 +3 +NULL +3 +NULL + + +-- !query +SELECT 1 AS k FROM testData ORDER BY 'k' +-- !query schema +struct +-- !query output +1 +1 +1 +1 +1 +1 +1 +1 +1 + + +-- !query +SELECT concat_ws(' ', a, b) FROM testData ORDER BY `concat_ws( , a, b)` +-- !query schema +struct +-- !query output + +1 +1 1 +1 2 +2 1 +2 2 +3 +3 1 +3 2 + + +-- !query +SELECT 1 AS a FROM testData ORDER BY a +-- !query schema +struct +-- !query output +1 +1 +1 +1 +1 +1 +1 +1 +1 + + +-- !query +SELECT 1 AS a FROM testData ORDER BY `a` +-- !query schema +struct +-- !query output +1 +1 +1 +1 +1 +1 +1 +1 +1 + + +-- !query +SELECT 1 ORDER BY `1` +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +SELECT a, b FROM testData ORDER BY a, (SELECT b FROM testData LIMIT 1) +-- !query schema +struct +-- !query output +NULL 1 +NULL NULL +1 1 +1 2 +2 1 +2 2 +3 1 +3 2 +3 NULL + + +-- !query +SELECT a FROM testData ORDER BY (SELECT b FROM testData) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkException +{ + "errorClass" : "SCALAR_SUBQUERY_TOO_MANY_ROWS", + "sqlState" : "21000", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 33, + "stopIndex" : 56, + "fragment" : "(SELECT b FROM testData)" + } ] +} + + +-- !query +SELECT a, b FROM testData ORDER BY a, b IN (SELECT a FROM testData) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY", + "sqlState" : "0A000", + "messageParameters" : { + "treeNode" : "Sort [a#x ASC NULLS FIRST, b#x IN (list#x []) ASC NULLS FIRST], true\n: +- Project [a#x]\n: +- SubqueryAlias testdata\n: +- View (`testData`, [a#x, b#x])\n: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n: +- Project [a#x, b#x]\n: +- SubqueryAlias testData\n: +- LocalRelation [a#x, b#x]\n+- Project [a#x, b#x]\n +- SubqueryAlias testdata\n +- View (`testData`, [a#x, b#x])\n +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n +- Project [a#x, b#x]\n +- SubqueryAlias testData\n +- LocalRelation [a#x, b#x]\n" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 41, + "stopIndex" : 67, + "fragment" : "IN (SELECT a FROM testData)" + } ] +} + + +-- !query +SELECT a, b FROM testData ORDER BY a, EXISTS(SELECT b FROM testData) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY", + "sqlState" : "0A000", + "messageParameters" : { + "treeNode" : "Sort [a#x ASC NULLS FIRST, exists#x [] ASC NULLS FIRST], true\n: +- Project [b#x]\n: +- SubqueryAlias testdata\n: +- View (`testData`, [a#x, b#x])\n: +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n: +- Project [a#x, b#x]\n: +- SubqueryAlias testData\n: +- LocalRelation [a#x, b#x]\n+- Project [a#x, b#x]\n +- SubqueryAlias testdata\n +- View (`testData`, [a#x, b#x])\n +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]\n +- Project [a#x, b#x]\n +- SubqueryAlias testData\n +- LocalRelation [a#x, b#x]\n" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 39, + "stopIndex" : 68, + "fragment" : "EXISTS(SELECT b FROM testData)" + } ] +} + + +-- !query +SELECT a AS k, c FROM testData ORDER BY k +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION", + "sqlState" : "42703", + "messageParameters" : { + "objectName" : "`c`", + "proposal" : "`a`, `b`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 16, + "stopIndex" : 16, + "fragment" : "c" + } ] +}