diff --git a/ibis_substrait/compiler/core.py b/ibis_substrait/compiler/core.py index d5241b12..61e52370 100644 --- a/ibis_substrait/compiler/core.py +++ b/ibis_substrait/compiler/core.py @@ -81,7 +81,12 @@ def function_id( op_name = IBIS_SUBSTRAIT_OP_MAPPING[type(op).__name__] sig_key = self.get_signature(op) - extension_signature = f"{op_name}:{'_'.join(sig_key)}" + # the keys for lookup up scalar functions consist of + # tuple(tuple(input dtypes), output dtype) + # but the signature we generate in the substrait plan only needs the input types + input_key = sig_key[0] + + extension_signature = f"{op_name}:{'_'.join(input_key)}" try: function_extension = self.function_extensions[extension_signature] @@ -91,7 +96,7 @@ def function_id( ) return function_extension.function_anchor - def get_signature(self, op: ops.Node) -> tuple[str, ...]: + def get_signature(self, op: ops.Node) -> tuple[tuple[str, ...], str]: """Validate and upcast (if necessary) scalar function extension signature.""" op_name = IBIS_SUBSTRAIT_OP_MAPPING[type(op).__name__] @@ -102,25 +107,31 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]: ) anykey = ("any",) * len([arg for arg in op.args if arg is not None]) - sigkey = anykey + input_type_key = anykey + output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name] + sigkey = (input_type_key, output_type_key) + + any_sigkey = (anykey, output_type_key) # First check if `any` is an option # This function will take arguments of any type # although we still want to check if the number of args is correct - function_extension = _extension_mapping[op_name].get(anykey) + function_extension = _extension_mapping[op_name].get(any_sigkey) # Then try to look up extension based on input datatypes # Each substrait function defines the types of the inputs and at this # stage we should have performed the appropriate casts to ensure that # argument types match. if function_extension is None: - sigkey = tuple( + input_type_key = tuple( [ IBIS_SUBSTRAIT_TYPE_MAPPING[arg.dtype.name] # type: ignore for arg in op.args - if arg is not None and isinstance(arg, ops.Node) + if arg is not None and isinstance(arg, ops.Value) ] ) + output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name] + sigkey = (input_type_key, output_type_key) function_extension = _extension_mapping[op_name].get(sigkey) # Then check if extension is variadic @@ -130,7 +141,10 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]: # type is only repeated once, so we try to perform a lookup that way, then # assert, if we find anything, that the function is, indeed, variadic. if function_extension is None: - function_extension = _extension_mapping[op_name].get((sigkey[0],)) + # variadic signature would be in the form of + # ((oneof_input_arg_dype,), output_dtype) + variadic_sig = ((sigkey[0][0],), sigkey[1]) + function_extension = _extension_mapping[op_name].get(variadic_sig) if function_extension is not None: assert function_extension.variadic # Function signature for a variadic should contain the type of @@ -138,7 +152,34 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]: # types == the minimum number of variadic args allowed (but keep # it nonzero) arg_count_min = max(function_extension.variadic.get("min", 0), 1) - sigkey = (sigkey[0],) * arg_count_min + input_type_key = (sigkey[0][0],) * arg_count_min + output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name] + sigkey = (input_type_key, output_type_key) + + # Then check if we have an op that has a `date` somewhere in the input + # args and the output listed as `i32`. + # Ibis assumes i32 for the output of all time extraction functions + # because no one is going to be around in i64 years, but Substrait + # expects i64 as the output + if function_extension is None: + if "date" in sigkey[0] and sigkey[1] == "i32": + sigkey = (sigkey[0], "i64") + function_extension = _extension_mapping[op_name].get(sigkey) + + # Ibis doesn't always handle decimal promotion correctly (I think?) + # And all decimal inputs are expected to be decimal outputs, so we have + # to massage the signature key + if function_extension is None: + if set(sigkey[0]) == {"dec"} and sigkey[1] != "dec": + sigkey = (sigkey[0], "dec") + function_extension = _extension_mapping[op_name].get(sigkey) + + # How many special cases do you want? We've got lots. + # Some string functions can only have i64 outputs + if function_extension is None: + if isinstance(op, ops.StringLength): + sigkey = (sigkey[0], "i64") + function_extension = _extension_mapping[op_name].get(sigkey) # If it's still None then we're borked. if function_extension is None: @@ -151,15 +192,17 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]: def create_extension( self, op_name: str, - sigkey: tuple[str, ...], + sigkey: tuple[tuple[str, ...], str], ) -> ste.SimpleExtensionDeclaration.ExtensionFunction: """Register extension uri and create extension function.""" function_extension = _extension_mapping[op_name][sigkey] extension_uri = self.register_extension_uri(function_extension.uri) + input_key = sigkey[0] + extension_function = self.create_extension_function( - extension_uri, f"{op_name}:{'_'.join(sigkey)}" + extension_uri, f"{op_name}:{'_'.join(input_key)}" ) return extension_function diff --git a/ibis_substrait/compiler/mapping.py b/ibis_substrait/compiler/mapping.py index d6eabdda..11d9395e 100644 --- a/ibis_substrait/compiler/mapping.py +++ b/ibis_substrait/compiler/mapping.py @@ -32,6 +32,7 @@ "CountStar": "count", "CountDistinct": "count", "Divide": "divide", + "SubstraitDivide": "divide", "EndsWith": "ends_with", "Equals": "equal", "Exp": "exp", @@ -69,6 +70,7 @@ "RegexReplace": "regexp_replace", "Repeat": "repeat", "Reverse": "reverse", + "SubstraitRound": "round", "Round": "round", "RPad": "rpad", "RStrip": "rtrim", @@ -119,26 +121,52 @@ } _normalized_key_names = { - # decimal precision and scale aren't part of the - # extension signature they're passed in separately - "decimal": "dec", - "decimal": "dec", - "decimal": "dec", - "decimal": "dec", - # we don't care about string length - "fixedchar": "str", - "fixedchar": "str", - "varchar": "str", - "varchar": "str", - "varchar": "str", - # for now ignore nullability marker - "boolean?": "bool", - # why is there a 1? - "any1": "any", - "Date": "date", + "binary": "vbin", + "interval_compound": "icompound", + "interval_day": "iday", + "interval_year": "iyear", + "string": "str", + "timestamp": "ts", + "timestamp_tz": "tstz", } +def normalize_substrait_type_names(typ: str) -> str: + # First strip off any punctuation + typ = typ.strip("?").lower() + + # Common prefixes whose information does not matter to an extension function + # signature + for complex_type, abbr in [ + ("fixedchar", "fchar"), + ("varchar", "vchar"), + ("fixedbinary", "fbin"), + ("decimal", "dec"), + ("precision_timestamp", "pts"), + ("precision_timestamp_tz", "ptstz"), + ("struct", "struct"), + ("list", "list"), + ("map", "map"), + ("any", "any"), + ("boolean", "bool"), + # Absolute garbage type info + ("decimal", "dec"), + ("delta", "dec"), + ("prec", "dec"), + ("scale", "dec"), + ("init_", "dec"), + ("min_", "dec"), + ("max_", "dec"), + ]: + if typ.lower().startswith(complex_type): + typ = abbr + + # Then pass through the dictionary of mappings, defaulting to just the + # existing string + typ = _normalized_key_names.get(typ.lower(), typ.lower()) + return typ + + _extension_mapping: Mapping[str, Any] = defaultdict(dict) @@ -151,13 +179,13 @@ def __init__(self, name: str) -> None: self.uri: str = "" def parse(self, impl: Mapping[str, Any]) -> None: - self.rtn = impl["return"] + self.rtn = normalize_substrait_type_names(impl["return"]) self.nullability = impl.get("nullability", False) self.variadic = impl.get("variadic", False) if input_args := impl.get("args", []): for val in input_args: - if typ := val.get("value", None): - typ = _normalized_key_names.get(typ.lower(), typ.lower()) + if typ := val.get("value"): + typ = normalize_substrait_type_names(typ) self.inputs.append(typ) elif arg_name := val.get("name", None): self.arg_names.append(arg_name) @@ -212,7 +240,9 @@ def register_extension_yaml( for function in named_functions: for func in _parse_func(function): func.uri = uri or f"{prefix}/{fname.name}" - _extension_mapping[function["name"]][tuple(func.inputs)] = func + _extension_mapping[function["name"]][(tuple(func.inputs), func.rtn)] = ( + func + ) def _populate_default_extensions() -> None: diff --git a/ibis_substrait/compiler/translate.py b/ibis_substrait/compiler/translate.py index cd5fc69b..ac620d54 100644 --- a/ibis_substrait/compiler/translate.py +++ b/ibis_substrait/compiler/translate.py @@ -15,11 +15,12 @@ import operator import uuid from collections.abc import Iterable, Mapping, MutableMapping, Sequence -from typing import Any, TypeVar, Union +from typing import Any, Optional, TypeVar, Union import ibis import ibis.expr.datatypes as dt import ibis.expr.operations as ops +import ibis.expr.rules as rlz import ibis.expr.schema as sch import ibis.expr.types as ir from ibis import util @@ -29,6 +30,7 @@ from ibis_substrait.compiler.core import SubstraitCompiler, _get_fields from ibis_substrait.compiler.mapping import ( IBIS_SUBSTRAIT_OP_MAPPING, + IBIS_SUBSTRAIT_TYPE_MAPPING, _extension_mapping, ) @@ -505,17 +507,17 @@ def value_op( ) -> stalg.Expression: # Check if scalar function is valid for input dtype(s) and insert casts as needed to # make sure inputs are correct. - op = _check_and_upcast(op) + newop = _check_and_upcast(op) # given the details of `op` -> function id return stalg.Expression( scalar_function=stalg.Expression.ScalarFunction( - function_reference=compiler.function_id(op), - output_type=translate(op.dtype), + function_reference=compiler.function_id(newop), + output_type=translate(newop.dtype), arguments=[ stalg.FunctionArgument( value=translate(arg, compiler=compiler, **kwargs) ) - for arg in op.args + for arg in newop.args if isinstance(arg, ops.Value) ], ) @@ -538,6 +540,8 @@ def window_op( lower_bound, upper_bound = _translate_window_bounds(start, end) + func = _check_and_upcast(func) + return stalg.Expression( window_function=stalg.Expression.WindowFunction( function_reference=compiler.function_id(func), @@ -565,6 +569,7 @@ def _reduction( compiler: SubstraitCompiler, **kwargs: Any, ) -> stalg.AggregateFunction: + op = _check_and_upcast(op) return stalg.AggregateFunction( function_reference=compiler.function_id(op), arguments=[ @@ -1408,8 +1413,11 @@ def _check_and_upcast(op: ops.Node) -> ops.Node: op_name = IBIS_SUBSTRAIT_OP_MAPPING[type(op).__name__] anykey = ("any",) * len([arg for arg in op.args if arg is not None]) + output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name] + any_sigkey = (anykey, output_type_key) + # First check if `any` is an option - function_extension = _extension_mapping[op_name].get(anykey) + function_extension = _extension_mapping[op_name].get(any_sigkey) # Otherwise, if the types don't match, cast up if function_extension is None: @@ -1463,15 +1471,55 @@ def _upcast_string_op(op: string_op) -> string_op: return type(op)(*casted_args) +# Ibis has (usually good) opinions about what the dtypes of certain ops should be +# Substrait disagrees sometimes +class SubstraitRound(ops.Value): + """Round a value.""" + + arg: ops.Value[dt.Numeric] + digits: Optional[ops.Value[dt.Integer]] = None + + shape = rlz.shape_like("arg") + + @property + def dtype(self) -> dt.DataType: + return self.arg.dtype + + +class SubstraitDivide(ops.NumericBinary): + """Divide that always returns the same dtype as the inputs.""" + + @property + def dtype(self) -> dt.DataType: + return self.left.dtype + + @_upcast.register(ops.Round) -def _upcast_round_digits(op: ops.Round) -> ops.Round: +def _upcast_round_digits(op: ops.Round) -> SubstraitRound: # Substrait wants Int32 for decimal place argument to round if op.digits is None: raise ValueError( "Substrait requires that a rounding operation specify the number of digits to round to" ) elif not isinstance(op.digits.dtype, dt.Int32): - return ops.Round( + return SubstraitRound( op.arg, op.digits.copy(dtype=dt.Int32(nullable=op.digits.dtype.nullable)) ) + return SubstraitRound(op.arg, op.digits) + + +@_upcast.register(ops.Mean) +def _upcast_mean(op: ops.Mean) -> ops.Mean: + # Substrait wants the input types and output types of reductions to match + # We cast the _input_ type to match the output type + # So mean(some_int) -> float will go to mean(cast(some_int as float)) -> float + if op.arg.dtype != op.dtype: + return ops.Mean(arg=ops.Cast(op.arg, to=op.dtype), where=op.where) + return op + + +@_upcast.register(ops.Divide) +def _matchy_matchy_divide(op: ops.Divide) -> SubstraitDivide: + new_op = SubstraitDivide(op.left, op.right) + return _upcast_bin_op(new_op) diff --git a/ibis_substrait/extensions/__init__.py b/ibis_substrait/extensions/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/ibis_substrait/extensions/extension_types.yaml b/ibis_substrait/extensions/extension_types.yaml deleted file mode 100644 index e03073c5..00000000 --- a/ibis_substrait/extensions/extension_types.yaml +++ /dev/null @@ -1,10 +0,0 @@ ---- -types: - - name: point - structure: - latitude: i32 - longitude: i32 - - name: line - structure: - start: point - end: point diff --git a/ibis_substrait/extensions/functions_aggregate_approx.yaml b/ibis_substrait/extensions/functions_aggregate_approx.yaml deleted file mode 100644 index c77caecc..00000000 --- a/ibis_substrait/extensions/functions_aggregate_approx.yaml +++ /dev/null @@ -1,18 +0,0 @@ -%YAML 1.2 ---- -aggregate_functions: - - name: "approx_count_distinct" - description: >- - Calculates the approximate number of rows that contain distinct values of the expression argument using - HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which - returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT - processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact - result. - impls: - - args: - - name: x - value: any - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: binary - return: i64 diff --git a/ibis_substrait/extensions/functions_aggregate_generic.yaml b/ibis_substrait/extensions/functions_aggregate_generic.yaml deleted file mode 100644 index 4d891e9c..00000000 --- a/ibis_substrait/extensions/functions_aggregate_generic.yaml +++ /dev/null @@ -1,37 +0,0 @@ -%YAML 1.2 ---- -aggregate_functions: - - name: "count" - description: Count a set of values - impls: - - args: - - name: x - value: any - options: - overflow: - values: [SILENT, SATURATE, ERROR] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - name: "count" - description: "Count a set of records (not field referenced)" - impls: - - options: - overflow: - values: [SILENT, SATURATE, ERROR] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64 - return: i64 - - name: "any_value" - description: > - Selects an arbitrary value from a group of values. - - If the input is empty, the function returns null. - impls: - - args: - - name: x - value: any - nullability: DECLARED_OUTPUT - return: any? diff --git a/ibis_substrait/extensions/functions_arithmetic.yaml b/ibis_substrait/extensions/functions_arithmetic.yaml deleted file mode 100644 index eaa66ea2..00000000 --- a/ibis_substrait/extensions/functions_arithmetic.yaml +++ /dev/null @@ -1,1522 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "add" - description: "Add two values." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - value: i64 - - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "subtract" - description: "Subtract one value from another." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "multiply" - description: "Multiply two values." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "divide" - description: > - Divide x by y. In the case of integer division, partial values are truncated (i.e. rounded towards 0). - The `on_division_by_zero` option governs behavior in cases where y is 0 and x is not 0. - `LIMIT` means positive or negative infinity (depending on the sign of x and y). - If x and y are both 0 or both +/-infinity, behavior will be governed by `on_domain_error`. - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_division_by_zero: - values: [ LIMIT, NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_division_by_zero: - values: [ LIMIT, NAN, ERROR ] - return: fp64 - - - name: "negate" - description: "Negation of the value" - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "modulus" - description: "Get the remainder when dividing one value by another." - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - - name: "power" - description: "Take the power with x as the base and y as exponent." - impls: - - args: - - name: x - value: i64 - - name: y - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - - name: y - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - return: fp64 - - - name: "sqrt" - description: "Square root of the value" - impls: - - args: - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "exp" - description: "The mathematical constant e, raised to the power of the value." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "cos" - description: "Get the cosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "sin" - description: "Get the sine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "tan" - description: "Get the tangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "cosh" - description: "Get the hyperbolic cosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "sinh" - description: "Get the hyperbolic sine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "tanh" - description: "Get the hyperbolic tangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "acos" - description: "Get the arccosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "asin" - description: "Get the arcsine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "atan" - description: "Get the arctangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "acosh" - description: "Get the hyperbolic arccosine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "asinh" - description: "Get the hyperbolic arcsine of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - return: fp64 - - - name: "atanh" - description: "Get the hyperbolic arctangent of a value in radians." - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "atan2" - description: "Get the arctangent of values given as x/y pairs." - impls: - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - return: fp64 - - - name: "abs" - description: > - Calculate the absolute value of the argument. - - Integer values allow the specification of overflow behavior to handle the - unevenness of the twos complement, e.g. Int8 range [-128 : 127]. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i8 - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i16 - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "sign" - description: > - Return the signedness of the argument. - - Integer values return signedness with the same type as the input. - Possible return values are [-1, 0, 1] - - Floating point values return signedness with the same type as the input. - Possible return values are [-1.0, -0.0, 0.0, 1.0, NaN] - impls: - - args: - - name: x - value: i8 - return: i8 - - args: - - name: x - value: i16 - return: i16 - - args: - - name: x - value: i32 - return: i32 - - args: - - name: x - value: i64 - return: i64 - - args: - - name: x - value: fp32 - return: fp32 - - args: - - name: x - value: fp64 - return: fp64 - - - name: "factorial" - description: > - Return the factorial of a given integer input. - - The factorial of 0! is 1 by convention. - - Negative inputs will raise an error. - impls: - - args: - - value: i32 - name: "n" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i32 - - args: - - value: i64 - name: "n" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: i64 - - - name: "bitwise_not" - description: > - Return the bitwise NOT result for one integer input. - - impls: - - args: - - name: x - value: i8 - return: i8 - - args: - - name: x - value: i16 - return: i16 - - args: - - name: x - value: i32 - return: i32 - - args: - - name: x - value: i64 - return: i64 - - - name: "bitwise_and" - description: > - Return the bitwise AND result for two integer inputs. - - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - - name: "bitwise_or" - description: > - Return the bitwise OR result for two given integer inputs. - - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - - - name: "bitwise_xor" - description: > - Return the bitwise XOR result for two integer inputs. - - impls: - - args: - - name: x - value: i8 - - name: y - value: i8 - return: i8 - - args: - - name: x - value: i16 - - name: y - value: i16 - return: i16 - - args: - - name: x - value: i32 - - name: y - value: i32 - return: i32 - - args: - - name: x - value: i64 - - name: y - value: i64 - return: i64 - -aggregate_functions: - - name: "sum" - description: Sum a set of values. The sum of zero elements yields null. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - args: - - name: x - value: fp64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "avg" - description: Average a set of values. For integral types, this truncates partial values. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i8? - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i16? - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i32? - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: i64? - - args: - - name: x - value: fp32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: fp32? - - args: - - name: x - value: fp64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT" - return: fp64? - - name: "min" - description: Min a set of values. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i8? - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i16? - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i32? - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp32? - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "max" - description: Max a set of values. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i8? - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i16? - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i32? - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: i64? - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp32? - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: fp64? - return: fp64? - - name: "product" - description: Product of a set of values. Returns 1 for empty input. - impls: - - args: - - name: x - value: i8 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i8 - - args: - - name: x - value: i16 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i16 - - args: - - name: x - value: i32 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i32 - - args: - - name: x - value: i64 - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: MIRROR - decomposable: MANY - intermediate: i64 - return: i64 - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: MIRROR - decomposable: MANY - intermediate: fp64 - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: MIRROR - decomposable: MANY - intermediate: fp64 - return: fp64 - - name: "std_dev" - description: Calculates standard-deviation for a set of values. - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "variance" - description: Calculates variance for a set of values. - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - distribution: - values: [ SAMPLE, POPULATION] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "corr" - description: > - Calculates the value of Pearson's correlation coefficient between `x` and `y`. - If there is no input, null is returned. - impls: - - args: - - name: x - value: fp32 - - name: y - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - - name: y - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "mode" - description: > - Calculates mode for a set of values. - If there is no input, null is returned. - impls: - - args: - - name: x - value: i8 - nullability: DECLARED_OUTPUT - return: i8? - - args: - - name: x - value: i16 - nullability: DECLARED_OUTPUT - return: i16? - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - return: i32? - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - return: i64? - - args: - - name: x - value: fp32 - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: x - value: fp64 - nullability: DECLARED_OUTPUT - return: fp64? - - name: "median" - description: > - Calculate the median for a set of values. - - Returns null if applied to zero records. For the integer implementations, - the rounding option determines how the median should be rounded if it ends - up midway between two values. For the floating point implementations, - they specify the usual floating point rounding mode. - impls: - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i8 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i8? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i16 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i16? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i32? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: i64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: i64? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - return: fp64? - - name: "quantile" - description: > - Calculates quantiles for a set of values. - - This function will divide the aggregated values (passed via the - distribution argument) over N equally-sized bins, where N is passed - via a constant argument. It will then return the values at the - boundaries of these bins in list form. If the input is appropriately - sorted, this computes the quantiles of the distribution. - - The function can optionally return the first and/or last element of - the input, as specified by the `boundaries` argument. If the input is - appropriately sorted, this will thus be the minimum and/or maximum - values of the distribution. - - When the boundaries do not lie exactly on elements of the incoming - distribution, the function will interpolate between the two nearby - elements. If the interpolated value cannot be represented exactly, - the `rounding` option controls how the value should be selected or - computed. - - The function fails and returns null in the following cases: - - `n` is null or less than one; - - any value in `distribution` is null. - - The function returns an empty list if `n` equals 1 and `boundaries` is - set to `NEITHER`. - - impls: - - args: - - name: boundaries - description: > - Which boundaries to include. For NEITHER, the output will have - n-1 elements, for MINIMUM and MAXIMUM it will have n elements, - and for BOTH it will have n+1 elements. - options: [ NEITHER, MINIMUM, MAXIMUM, BOTH ] - - name: precision - description: > - Based on required operator performance and configured optimizations - on saving memory bandwidth, the precision of the end result can be - the highest possible accuracy or an approximation. - - - EXACT: provides the exact result, rounded if needed according - to the rounding option. - - APPROXIMATE: provides only an estimate; the result must lie - between the minimum and maximum values in the input - (inclusive), but otherwise the accuracy is left up to the - consumer. - options: [ EXACT, APPROXIMATE ] - - value: i64 - constant: true - name: n - description: > - A positive integer which defines the number of quantile - partitions. - - value: any - name: distribution - description: > - The data for which the quantiles should be computed. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. For floating point numbers, it specifies the IEEE - 754 rounding mode (as it does for all other floating point - operations). For integer types: - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - For non-numeric types, the behavior is the same as for integer - types, but applied to the index of the value in distribution. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - nullability: DECLARED_OUTPUT - ordered: true - return: LIST? - -window_functions: - - name: "row_number" - description: "the number of the current row within its partition." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "rank" - description: "the rank of the current row, with gaps." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "dense_rank" - description: "the rank of the current row, without gaps." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION - - name: "percent_rank" - description: "the relative rank of the current row." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: fp64? - window_type: PARTITION - - name: "cume_dist" - description: "the cumulative distribution." - impls: - - args: [] - nullability: DECLARED_OUTPUT - decomposable: NONE - return: fp64? - window_type: PARTITION - - name: "ntile" - description: "Return an integer ranging from 1 to the argument value,dividing the partition as equally as possible." - impls: - - args: - - name: x - value: i32 - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i32? - window_type: PARTITION - - args: - - name: x - value: i64 - nullability: DECLARED_OUTPUT - decomposable: NONE - return: i64? - window_type: PARTITION diff --git a/ibis_substrait/extensions/functions_arithmetic_decimal.yaml b/ibis_substrait/extensions/functions_arithmetic_decimal.yaml deleted file mode 100644 index 0fc4caae..00000000 --- a/ibis_substrait/extensions/functions_arithmetic_decimal.yaml +++ /dev/null @@ -1,151 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "add" - description: "Add two decimal values." - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(S1,S2) - init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "subtract" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(S1,S2) - init_prec = init_scale + max(P1 - S1, P2 - S2) + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "multiply" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = S1 + S2 - init_prec = P1 + P2 + 1 - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "divide" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(6, S1 + P2 + 1) - init_prec = P1 - S1 + P2 + init_scale - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL - - - name: "modulus" - impls: - - args: - - name: x - value: decimal - - name: y - value: decimal - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - return: |- - init_scale = max(S1,S2) - init_prec = min(P1 - S1, P2 - S2) + init_scale - min_scale = min(init_scale, 6) - delta = init_prec - 38 - prec = min(init_prec, 38) - scale_after_borrow = max(init_scale - delta, min_scale) - scale = init_prec > 38 ? scale_after_borrow : init_scale - DECIMAL -aggregate_functions: - - name: "sum" - description: Sum a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?<38,S>" - return: "DECIMAL?<38,S>" - - name: "avg" - description: Average a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - options: - overflow: - values: [ SILENT, SATURATE, ERROR ] - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "STRUCT,i64>" - return: "DECIMAL<38,S>" - - name: "min" - description: Min a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?" - return: "DECIMAL?" - - name: "max" - description: Max a set of values. - impls: - - args: - - name: x - value: "DECIMAL" - nullability: DECLARED_OUTPUT - decomposable: MANY - intermediate: "DECIMAL?" - return: "DECIMAL?" diff --git a/ibis_substrait/extensions/functions_boolean.yaml b/ibis_substrait/extensions/functions_boolean.yaml deleted file mode 100644 index 22ae296d..00000000 --- a/ibis_substrait/extensions/functions_boolean.yaml +++ /dev/null @@ -1,140 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: or - description: > - The boolean `or` using Kleene logic. - - This function behaves as follows with nulls: - - true or null = true - - null or true = true - - false or null = null - - null or false = null - - null or null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `or` true is always true. - - Behavior for 0 or 1 inputs is as follows: - or() -> false - or(x) -> x - impls: - - args: - - value: boolean? - name: a - variadic: - min: 0 - return: boolean? - - - name: and - description: > - The boolean `and` using Kleene logic. - - This function behaves as follows with nulls: - - true and null = null - - null and true = null - - false and null = false - - null and false = false - - null and null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `and` false is always false. - - Behavior for 0 or 1 inputs is as follows: - and() -> true - and(x) -> x - impls: - - args: - - value: boolean? - name: a - variadic: - min: 0 - return: boolean? - - - name: and_not - description: > - The boolean `and` of one value and the negation of the other using Kleene logic. - - This function behaves as follows with nulls: - - true and not null = null - - null and not false = null - - false and not null = false - - null and not true = false - - null and not null = null - - In other words, in this context a null value really means "unknown", and - an unknown value `and not` true is always false, as is false `and not` an - unknown value. - impls: - - args: - - value: boolean? - name: a - - value: boolean? - name: b - return: boolean? - - - name: xor - description: > - The boolean `xor` of two values using Kleene logic. - - When a null is encountered in either input, a null is output. - impls: - - args: - - value: boolean? - name: a - - value: boolean? - name: b - return: boolean? - - - name: not - description: > - The `not` of a boolean value. - - When a null is input, a null is output. - impls: - - args: - - value: boolean? - name: a - return: boolean? - -aggregate_functions: - - - name: "bool_and" - description: > - If any value in the input is false, false is returned. If the input is - empty or only contains nulls, null is returned. Otherwise, true is - returned. - impls: - - args: - - value: boolean - name: a - nullability: DECLARED_OUTPUT - return: boolean? - - - name: "bool_or" - description: > - If any value in the input is true, true is returned. If the input is - empty or only contains nulls, null is returned. Otherwise, false is - returned. - impls: - - args: - - value: boolean - name: a - nullability: DECLARED_OUTPUT - return: boolean? diff --git a/ibis_substrait/extensions/functions_comparison.yaml b/ibis_substrait/extensions/functions_comparison.yaml deleted file mode 100644 index 7d11f3c7..00000000 --- a/ibis_substrait/extensions/functions_comparison.yaml +++ /dev/null @@ -1,216 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "not_equal" - description: > - Whether two values are not_equal. - - `not_equal(x, y) := (x != y)` - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "equal" - description: > - Whether two values are equal. - - `equal(x, y) := (x == y)` - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "is_not_distinct_from" - description: > - Whether two values are equal. - - This function treats `null` values as comparable, so - - `is_not_distinct_from(null, null) == True` - - This is in contrast to `equal`, in which `null` values do not compare. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "lt" - description: > - Less than. - - lt(x, y) := (x < y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "gt" - description: > - Greater than. - - gt(x, y) := (x > y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "lte" - description: > - Less than or equal to. - - lte(x, y) := (x <= y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "gte" - description: > - Greater than or equal to. - - gte(x, y) := (x >= y) - - If either/both of `x` and `y` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: BOOLEAN - - - name: "between" - description: >- - Whether the `expression` is greater than or equal to `low` and less than or equal to `high`. - - `expression` BETWEEN `low` AND `high` - - If `low`, `high`, or `expression` are `null`, `null` is returned. - impls: - - args: - - value: any1 - name: expression - description: The expression to test for in the range defined by `low` and `high`. - - value: any1 - name: low - description: The value to check if greater than or equal to. - - value: any1 - name: high - description: The value to check if less than or equal to. - return: BOOLEAN - - - name: "is_null" - description: Whether a value is null. NaN is not null. - impls: - - args: - - value: any1 - name: x - return: BOOLEAN - nullability: DECLARED_OUTPUT - - - name: "is_not_null" - description: Whether a value is not null. NaN is not null. - impls: - - args: - - value: any1 - name: x - return: BOOLEAN - nullability: DECLARED_OUTPUT - - - name: "is_nan" - description: > - Whether a value is not a number. - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: BOOLEAN - - args: - - value: fp64 - name: x - return: BOOLEAN - - - name: "is_finite" - description: > - Whether a value is finite (neither infinite nor NaN). - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: BOOLEAN - - args: - - value: fp64 - name: x - return: BOOLEAN - - - name: "is_infinite" - description: > - Whether a value is infinite. - - If `x` is `null`, `null` is returned. - impls: - - args: - - value: fp32 - name: x - return: BOOLEAN - - args: - - value: fp64 - name: x - return: BOOLEAN - - - name: "nullif" - description: If two values are equal, return null. Otherwise, return the first value. - impls: - - args: - - value: any1 - name: x - - value: any1 - name: y - return: any1 - - - name: "coalesce" - description: >- - Evaluate arguments from left to right and return the first argument that is not null. Once - a non-null argument is found, the remaining arguments are not evaluated. - - If all arguments are null, return null. - impls: - - args: - - value: any1 - variadic: - min: 2 - return: any1 diff --git a/ibis_substrait/extensions/functions_datetime.yaml b/ibis_substrait/extensions/functions_datetime.yaml deleted file mode 100644 index 51407e4c..00000000 --- a/ibis_substrait/extensions/functions_datetime.yaml +++ /dev/null @@ -1,267 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: extract - description: Extract portion of a date/time value. - impls: - - args: - - name: component - options: [ YEAR, MONTH, DAY, SECOND ] - description: The part of the value to extract. - - name: x - value: timestamp - return: i64 - - args: - - name: component - options: [ YEAR, MONTH, DAY, SECOND ] - description: The part of the value to extract. - - name: x - value: timestamp_tz - return: i64 - - args: - - name: component - options: [ YEAR, MONTH, DAY ] - description: The part of the value to extract. - - name: x - value: date - return: i64 - - args: - - name: component - options: [ SECOND ] - description: The part of the value to extract. - - name: x - value: time - return: i64 - - - name: "add" - description: Add an interval to a date/time type. - impls: - - args: - - name: x - value: timestamp - - name: y - value: interval_year - return: timestamp - - args: - - name: x - value: timestamp_tz - - name: y - value: interval_year - return: timestamp - - args: - - name: x - value: date - - name: y - value: interval_year - return: timestamp - - args: - - name: x - value: timestamp - - name: y - value: interval_day - return: timestamp - - args: - - name: x - value: timestamp_tz - - name: y - value: interval_day - return: timestamp - - args: - - name: x - value: date - - name: y - value: interval_day - return: timestamp - - - name: "add_intervals" - description: Add two intervals together. - impls: - - args: - - name: x - value: interval_day - - name: y - value: interval_day - return: interval_day - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: interval_year - - - name: "subtract" - description: Subtract an interval from a date/time type. - impls: - - args: - - name: x - value: timestamp - - name: y - value: interval_year - return: timestamp - - args: - - name: x - value: timestamp_tz - - name: y - value: interval_year - return: timestamp_tz - - args: - - name: x - value: date - - name: y - value: interval_year - return: date - - args: - - name: x - value: timestamp - - name: y - value: interval_day - return: timestamp - - args: - - name: x - value: timestamp_tz - - name: y - value: interval_day - return: timestamp_tz - - args: - - name: x - value: date - - name: y - value: interval_day - return: date - - - name: "lte" - description: less than or equal to - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day - - name: y - value: interval_day - return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean - - - name: "lt" - description: less than - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day - - name: y - value: interval_day - return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean - - - name: "gte" - description: greater than or equal to - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day - - name: y - value: interval_day - return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean - - - name: "gt" - description: greater than - impls: - - args: - - name: x - value: timestamp - - name: y - value: timestamp - return: boolean - - args: - - name: x - value: timestamp_tz - - name: y - value: timestamp_tz - return: boolean - - args: - - name: x - value: date - - name: y - value: date - return: boolean - - args: - - name: x - value: interval_day - - name: y - value: interval_day - return: boolean - - args: - - name: x - value: interval_year - - name: y - value: interval_year - return: boolean diff --git a/ibis_substrait/extensions/functions_logarithmic.yaml b/ibis_substrait/extensions/functions_logarithmic.yaml deleted file mode 100644 index f4b8acc1..00000000 --- a/ibis_substrait/extensions/functions_logarithmic.yaml +++ /dev/null @@ -1,147 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "ln" - description: "Natural logarithm of the value" - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - - name: "log10" - description: "Logarithm to base 10 of the value" - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - - name: "log2" - description: "Logarithm to base 2 of the value" - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - - name: "logb" - description: > - Logarithm of the value with the given base - - logb(x, b) => log_{b} (x) - impls: - - args: - - value: fp32 - name: "x" - description: "The number `x` to compute the logarithm of" - - value: fp32 - name: "base" - description: "The logarithm base `b` to use" - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - value: fp64 - name: "x" - description: "The number `x` to compute the logarithm of" - - value: fp64 - name: "base" - description: "The logarithm base `b` to use" - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 - - - name: "log1p" - description: > - Natural logarithm (base e) of 1 + x - - log1p(x) => log(1+x) - impls: - - args: - - name: x - value: fp32 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp32 - - args: - - name: x - value: fp64 - options: - rounding: - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ] - on_domain_error: - values: [ NAN, ERROR ] - on_log_zero: - values: [NAN, ERROR, MINUS_INFINITY] - return: fp64 diff --git a/ibis_substrait/extensions/functions_rounding.yaml b/ibis_substrait/extensions/functions_rounding.yaml deleted file mode 100644 index 09309f2c..00000000 --- a/ibis_substrait/extensions/functions_rounding.yaml +++ /dev/null @@ -1,270 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "ceil" - description: > - Rounding to the ceiling of the value `x`. - impls: - - args: - - value: fp32 - name: "x" - return: fp32 - - args: - - value: fp64 - name: "x" - return: fp64 - - - name: "floor" - description: > - Rounding to the floor of the value `x`. - impls: - - args: - - value: fp32 - name: "x" - return: fp32 - - args: - - value: fp64 - name: "x" - return: fp64 - - - name: "round" - description: > - Rounding the value `x` to `s` decimal places. - impls: - - args: - - value: i8 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i8? - - args: - - value: i16 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i16? - - args: - - value: i32 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i32? - - args: - - value: i64 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, nothing will happen - since `x` is an integer value. - - When `s` is a negative number, the rounding is - performed to the nearest multiple of `10^(-s)`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: i64? - - args: - - value: fp32 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, the rounding - is performed to a `s` number of decimal places. - - When `s` is a negative number, the rounding is - performed to the left side of the decimal point - as specified by `s`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: fp32? - - args: - - value: fp64 - name: "x" - description: > - Numerical expression to be rounded. - - value: i32 - name: "s" - description: > - Number of decimal places to be rounded to. - - When `s` is a positive number, the rounding - is performed to a `s` number of decimal places. - - When `s` is a negative number, the rounding is - performed to the left side of the decimal point - as specified by `s`. - options: - rounding: - description: > - When a boundary is computed to lie somewhere between two values, - and this value cannot be exactly represented, this specifies how - to round it. - - - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie - to the even option. - - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly - halfway, tie away from zero. - - TRUNCATE: always round toward zero. - - CEILING: always round toward positive infinity. - - FLOOR: always round toward negative infinity. - - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule - - TIE_DOWN: round ties with FLOOR rule - - TIE_UP: round ties with CEILING rule - - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule - - TIE_TO_ODD: round to nearest value; if exactly halfway, tie - to the odd option. - values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR, - AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ] - nullability: DECLARED_OUTPUT - return: fp64? diff --git a/ibis_substrait/extensions/functions_set.yaml b/ibis_substrait/extensions/functions_set.yaml deleted file mode 100644 index ce02bf32..00000000 --- a/ibis_substrait/extensions/functions_set.yaml +++ /dev/null @@ -1,27 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: "index_in" - description: > - Checks the membership of a value in a list of values - - Returns the first 0-based index value of some input `T` if `T` is equal to - any element in `List`. Returns `NULL` if not found. - - If `T` is `NULL`, returns `NULL`. - - If `T` is `NaN`: - - Returns 0-based index of `NaN` in `List` (default) - - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified) - impls: - - args: - - name: x - value: T - - name: y - value: List - options: - nan_equality: - values: [ NAN_IS_NAN, NAN_IS_NOT_NAN ] - nullability: DECLARED_OUTPUT - return: int64? diff --git a/ibis_substrait/extensions/functions_string.yaml b/ibis_substrait/extensions/functions_string.yaml deleted file mode 100644 index 19d594fd..00000000 --- a/ibis_substrait/extensions/functions_string.yaml +++ /dev/null @@ -1,1330 +0,0 @@ -%YAML 1.2 ---- -scalar_functions: - - - name: concat - description: Concatenate strings. - impls: - - args: - - value: "varchar" - name: "input" - variadic: - min: 1 - return: "varchar" - - args: - - value: "string" - name: "input" - variadic: - min: 1 - return: "string" - - - name: like - description: >- - Are two strings like each other. - - The `case_sensitivity` option applies to the `match` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "match" - description: The string to match against the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "match" - description: The string to match against the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - - name: substring - description: >- - Extract a substring of a specified `length` starting from position `start`. - A `start` value of 1 refers to the first characters of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - return: "string" - - args: - - value: "fixedchar" - name: "input" - - value: i32 - name: "start" - - value: i32 - name: "length" - return: "string" - - - name: regexp_match_substring - description: >- - Extract a substring that matches the given regular expression pattern. The regular expression - pattern should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The occurrence of the - pattern to be extracted is specified using the `occurrence` argument. Specifying `1` means - the first occurrence will be extracted, `2` means the second occurrence, and so on. - The `occurrence` argument should be a positive non-zero integer. The number of characters - from the beginning of the string to begin starting to search for pattern matches can be - specified using the `position` argument. Specifying `1` means to search for matches - starting at the first character of the input string, `2` means the second character, and so - on. The `position` argument should be a positive non-zero integer. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or - the position value is out of range. - impls: - - args: - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "varchar" - - args: - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "string" - - - name: starts_with - description: >- - Whether the `input` string starts with the `substring`. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - - name: ends_with - description: >- - Whether `input` string ends with the substring. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - - name: contains - description: >- - Whether the `input` string contains the `substring`. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "BOOLEAN" - - - name: strpos - description: >- - Return the position of the first occurrence of a string in another string. The first - character of the string is at position 1. If no occurrence is found, 0 is returned. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to search for. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - - name: regexp_strpos - description: >- - Return the position of an occurrence of the given regular expression pattern in a - string. The first character of the string is at position 1. The regular expression pattern - should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The number of characters - from the beginning of the string to begin starting to search for pattern matches can be - specified using the `position` argument. Specifying `1` means to search for matches - starting at the first character of the input string, `2` means the second character, and so - on. The `position` argument should be a positive non-zero integer. Which occurrence to - return the position of is specified using the `occurrence` argument. Specifying `1` means - the position first occurrence will be returned, `2` means the position of the second - occurrence, and so on. The `occurrence` argument should be a positive non-zero integer. If - no occurrence is found, 0 is returned. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or - the position value is out of range. - impls: - - args: - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - args: - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - - value: i64 - name: "occurrence" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - - name: count_substring - description: >- - Return the number of non-overlapping occurrences of a substring in an input string. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "substring" - description: The substring to count. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "substring" - description: The substring to count. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - args: - - value: "fixedchar" - name: "input" - description: The input string. - - value: "fixedchar" - name: "substring" - description: The substring to count. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: i64 - - - name: regexp_count_substring - description: >- - Return the number of non-overlapping occurrences of a regular expression pattern in an input - string. The regular expression pattern should follow the International Components for - Unicode implementation (https://unicode-org.github.io/icu/userguide/strings/regexp.html). - The number of characters from the beginning of the string to begin starting to search for - pattern matches can be specified using the `position` argument. Specifying `1` means to - search for matches starting at the first character of the input string, `2` means the - second character, and so on. The `position` argument should be a positive non-zero integer. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile or the position value is out of range. - impls: - - args: - - value: "string" - name: "input" - - value: "string" - name: "pattern" - - value: i64 - name: "position" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - args: - - value: "varchar" - name: "input" - - value: "varchar" - name: "pattern" - - value: i64 - name: "position" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - args: - - value: "fixedchar" - name: "input" - - value: "fixedchar" - name: "pattern" - - value: i64 - name: "position" - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: i64 - - - name: replace - description: >- - Replace all occurrences of the substring with the replacement string. - - The `case_sensitivity` option applies to the `substring` argument. - impls: - - args: - - value: "string" - name: "input" - description: Input string. - - value: "string" - name: "substring" - description: The substring to replace. - - value: "string" - name: "replacement" - description: The replacement string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "string" - - args: - - value: "varchar" - name: "input" - description: Input string. - - value: "varchar" - name: "substring" - description: The substring to replace. - - value: "varchar" - name: "replacement" - description: The replacement string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - return: "varchar" - - - name: concat_ws - description: Concatenate strings together separated by a separator. - impls: - - args: - - value: "string" - name: "separator" - description: Character to separate strings by. - - value: "string" - name: "string_arguments" - description: Strings to be concatenated. - variadic: - min: 1 - return: "string" - - args: - - value: "varchar" - name: "separator" - description: Character to separate strings by. - - value: "varchar" - name: "string_arguments" - description: Strings to be concatenated. - variadic: - min: 1 - return: "varchar" - - - name: repeat - description: Repeat a string `count` number of times. - impls: - - args: - - value: "string" - name: "input" - - value: i64 - name: "count" - return: "string" - - args: - - value: "varchar" - - value: i64 - name: "input" - - value: i64 - name: "count" - return: "varchar" - - - name: reverse - description: Returns the string in reverse order. - impls: - - args: - - value: "string" - name: "input" - return: "string" - - args: - - value: "varchar" - name: "input" - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - return: "fixedchar" - - - name: replace_slice - description: >- - Replace a slice of the input string. A specified 'length' of characters will be deleted from - the input string beginning at the 'start' position and will be replaced by a new string. A - start value of 1 indicates the first character of the input string. If start is negative - or zero, or greater than the length of the input string, a null string is returned. If 'length' - is negative, a null string is returned. If 'length' is zero, inserting of the new string - occurs at the specified 'start' position and no characters are deleted. If 'length' is - greater than the input string, deletion will occur up to the last character of the input string. - impls: - - args: - - value: "string" - name: "input" - description: Input string. - - value: i64 - name: "start" - description: The position in the string to start deleting/inserting characters. - - value: i64 - name: "length" - description: The number of characters to delete from the input string. - - value: "string" - name: "replacement" - description: The new string to insert at the start position. - return: "string" - - args: - - value: "varchar" - name: "input" - description: Input string. - - value: i64 - name: "start" - description: The position in the string to start deleting/inserting characters. - - value: i64 - name: "length" - description: The number of characters to delete from the input string. - - value: "varchar" - name: "replacement" - description: The new string to insert at the start position. - return: "varchar" - - - name: lower - description: >- - Transform the string to lower case characters. Implementation should follow the utf8_unicode_ci - collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: upper - description: >- - Transform the string to upper case characters. Implementation should follow the utf8_unicode_ci - collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: swapcase - description: >- - Transform the string's lowercase characters to uppercase and uppercase characters to - lowercase. Implementation should follow the utf8_unicode_ci collations according to the - Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: capitalize - description: >- - Capitalize the first character of the input string. Implementation should follow the - utf8_unicode_ci collations according to the Unicode Collation Algorithm described at - http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: title - description: >- - Converts the input string into titlecase. Capitalize the first character of each word in the - input string except for articles (a, an, the). Implementation should follow the - utf8_unicode_ci collations according to the Unicode Collation Algorithm described at - http://www.unicode.org/reports/tr10/. - impls: - - args: - - value: "string" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "string" - - args: - - value: "varchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "varchar" - - args: - - value: "fixedchar" - name: "input" - options: - char_set: - values: [ UTF8, ASCII_ONLY ] - return: "fixedchar" - - - name: char_length - description: >- - Return the number of characters in the input string. The length includes trailing spaces. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: bit_length - description: Return the number of bits in the input string. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: octet_length - description: Return the number of bytes in the input string. - impls: - - args: - - value: "string" - name: "input" - return: i64 - - args: - - value: "varchar" - name: "input" - return: i64 - - args: - - value: "fixedchar" - name: "input" - return: i64 - - - name: regexp_replace - description: >- - Search a string for a substring that matches a given regular expression pattern and replace - it with a replacement string. The regular expression pattern should follow the - International Components for Unicode implementation (https://unicode-org.github - .io/icu/userguide/strings/regexp.html). The occurrence of the pattern to be replaced is - specified using the `occurrence` argument. Specifying `1` means only the first occurrence - will be replaced, `2` means the second occurrence, and so on. Specifying `0` means all - occurrences will be replaced. The number of characters from the beginning of the string to - begin starting to search for pattern matches can be specified using the `position` argument. - Specifying `1` means to search for matches starting at the first character of the input - string, `2` means the second character, and so on. The `position` argument should be a - positive non-zero integer. The replacement string can capture groups using numbered - backreferences. - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - - Behavior is undefined if the regex fails to compile, the replacement contains an illegal - back-reference, the occurrence value is out of range, or the position value is out of range. - impls: - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "pattern" - description: The regular expression to search for within the input string. - - value: "string" - name: "replacement" - description: The replacement string. - - value: i64 - name: "position" - description: The position to start the search. - - value: i64 - name: "occurrence" - description: Which occurrence of the match to replace. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "string" - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "pattern" - description: The regular expression to search for within the input string. - - value: "varchar" - name: "replacement" - description: The replacement string. - - value: i64 - name: "position" - description: The position to start the search. - - value: i64 - name: "occurrence" - description: Which occurrence of the match to replace. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "varchar" - - - name: ltrim - description: >- - Remove any occurrence of the characters from the left side of the string. - If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: rtrim - description: >- - Remove any occurrence of the characters from the right side of the string. - If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: trim - description: >- - Remove any occurrence of the characters from the left and right sides of - the string. If no characters are specified, spaces are removed. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to remove characters from." - - value: "varchar" - name: "characters" - description: "The set of characters to remove." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to remove characters from." - - value: "string" - name: "characters" - description: "The set of characters to remove." - return: "string" - - - name: lpad - description: >- - Left-pad the input string with the string of 'characters' until the specified length of the - string has been reached. If the input string is longer than 'length', remove characters from - the right-side to shorten it to 'length' characters. If the string of 'characters' is longer - than the remaining 'length' needed to be filled, only pad until 'length' has been reached. - If 'characters' is not specified, the default value is a single space. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar" - name: "characters" - description: "The string of characters to use for padding." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "characters" - description: "The string of characters to use for padding." - return: "string" - - - name: rpad - description: >- - Right-pad the input string with the string of 'characters' until the specified length of the - string has been reached. If the input string is longer than 'length', remove characters from - the left-side to shorten it to 'length' characters. If the string of 'characters' is longer - than the remaining 'length' needed to be filled, only pad until 'length' has been reached. - If 'characters' is not specified, the default value is a single space. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar" - name: "characters" - description: "The string of characters to use for padding." - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "characters" - description: "The string of characters to use for padding." - return: "string" - - - name: center - description: >- - Center the input string by padding the sides with a single `character` until the specified - `length` of the string has been reached. By default, if the `length` will be reached with - an uneven number of padding, the extra padding will be applied to the right side. - The side with extra padding can be controlled with the `padding` option. - - Behavior is undefined if the number of characters passed to the `character` argument is not 1. - impls: - - args: - - value: "varchar" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "varchar<1>" - name: "character" - description: "The character to use for padding." - options: - padding: - values: [ RIGHT, LEFT ] - return: "varchar" - - args: - - value: "string" - name: "input" - description: "The string to pad." - - value: i32 - name: "length" - description: "The length of the output string." - - value: "string" - name: "character" - description: "The character to use for padding." - options: - padding: - values: [ RIGHT, LEFT ] - return: "string" - - - name: left - description: Extract `count` characters starting from the left of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "count" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "count" - return: "string" - - - name: right - description: Extract `count` characters starting from the right of the string. - impls: - - args: - - value: "varchar" - name: "input" - - value: i32 - name: "count" - return: "varchar" - - args: - - value: "string" - name: "input" - - value: i32 - name: "count" - return: "string" - - - name: string_split - description: >- - Split a string into a list of strings, based on a specified `separator` character. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "separator" - description: A character used for splitting the string. - return: "List>" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "separator" - description: A character used for splitting the string. - return: "List" - - - name: regexp_string_split - description: >- - Split a string into a list of strings, based on a regular expression pattern. The - substrings matched by the pattern will be used as the separators to split the input - string and will not be included in the resulting list. The regular expression - pattern should follow the International Components for Unicode implementation - (https://unicode-org.github.io/icu/userguide/strings/regexp.html). - - The `case_sensitivity` option specifies case-sensitive or case-insensitive matching. - Enabling the `multiline` option will treat the input string as multiple lines. This makes - the `^` and `$` characters match at the beginning and end of any line, instead of just the - beginning and end of the input string. Enabling the `dotall` option makes the `.` character - match line terminator characters in a string. - impls: - - args: - - value: "varchar" - name: "input" - description: The input string. - - value: "varchar" - name: "pattern" - description: The regular expression to search for within the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "List>" - - args: - - value: "string" - name: "input" - description: The input string. - - value: "string" - name: "pattern" - description: The regular expression to search for within the input string. - options: - case_sensitivity: - values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ] - multiline: - values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ] - dotall: - values: [ DOTALL_DISABLED, DOTALL_ENABLED ] - return: "List" - -aggregate_functions: - - - - name: string_agg - description: Concatenates a column of string values with a separator. - impls: - - args: - - value: "string" - name: "input" - description: "Column of string values." - - value: "string" - name: "separator" - constant: true - description: "Separator for concatenated strings" - ordered: true - return: "string" diff --git a/ibis_substrait/extensions/type_variations.yaml b/ibis_substrait/extensions/type_variations.yaml deleted file mode 100644 index f6f96d50..00000000 --- a/ibis_substrait/extensions/type_variations.yaml +++ /dev/null @@ -1,25 +0,0 @@ -%YAML 1.2 ---- -type_variations: - - parent: string - name: dict4 - description: a four-byte dictionary encoded string - functions: INHERITS - - parent: string - name: bigoffset - description: >- - The arrow large string representation of strings, still restricted to the default string size defined in - Substrait. - functions: SEPARATE - - parent: struct - name: avro - description: an avro encoded struct - functions: SEPARATE - - parent: struct - name: cstruct - description: a cstruct representation of the struct - functions: SEPARATE - - parent: struct - name: dict2 - description: a 2-byte dictionary encoded string. - functions: INHERITS diff --git a/ibis_substrait/extensions/unknown.yaml b/ibis_substrait/extensions/unknown.yaml deleted file mode 100644 index 3b0e6c1e..00000000 --- a/ibis_substrait/extensions/unknown.yaml +++ /dev/null @@ -1,66 +0,0 @@ -%YAML 1.2 ---- -types: - - name: unknown -scalar_functions: - - name: "add" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "subtract" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "multiply" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "divide" - impls: - - args: - - value: unknown - - value: unknown - return: unknown - - name: "modulus" - impls: - - args: - - value: unknown - - value: unknown - return: unknown -aggregate_functions: - - name: "sum" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "avg" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "min" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "max" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown - - name: "count" - impls: - - args: - - value: unknown - intermediate: unknown - return: unknown diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json index 8b91be74..4c9436e0 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json @@ -1471,7 +1471,9 @@ "scalarFunction": { "functionReference": 8, "outputType": { - "fp64": { + "decimal": { + "scale": 2, + "precision": 38, "nullability": "NULLABILITY_NULLABLE" } }, diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json index b1da788f..a02b5b89 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json @@ -971,7 +971,9 @@ "scalarFunction": { "functionReference": 9, "outputType": { - "fp64": { + "decimal": { + "scale": 2, + "precision": 38, "nullability": "NULLABILITY_NULLABLE" } }, diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json index a49d826b..fa574348 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json @@ -1011,7 +1011,9 @@ "scalarFunction": { "functionReference": 7, "outputType": { - "fp64": { + "decimal": { + "scale": 2, + "precision": 38, "nullability": "NULLABILITY_NULLABLE" } }, diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json index 2393c8d1..893ff9e1 100644 --- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json +++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json @@ -556,13 +556,25 @@ "arguments": [ { "value": { - "selection": { - "directReference": { - "structField": { - "field": 5 + "cast": { + "type": { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": {} } }, - "rootReference": {} + "failureBehavior": "FAILURE_BEHAVIOR_THROW_EXCEPTION" } } } diff --git a/ibis_substrait/tests/compiler/test_extensions.py b/ibis_substrait/tests/compiler/test_extensions.py index 2aceb4e8..af9b25ee 100644 --- a/ibis_substrait/tests/compiler/test_extensions.py +++ b/ibis_substrait/tests/compiler/test_extensions.py @@ -332,11 +332,11 @@ def test_extension_register_uri_override(tmp_path): register_extension_yaml(yaml_file, uri="orkbork") assert _extension_mapping["anotheradd"] - assert _extension_mapping["anotheradd"][("a", "b")].uri == "orkbork" + assert _extension_mapping["anotheradd"][(("a", "b"), "c")].uri == "orkbork" register_extension_yaml(yaml_file, prefix="orkbork") assert _extension_mapping["anotheradd"] - assert _extension_mapping["anotheradd"][("a", "b")].uri == "orkbork/foo.yaml" + assert _extension_mapping["anotheradd"][(("a", "b"), "c")].uri == "orkbork/foo.yaml" def test_extension_arithmetic_multiple_signatures(compiler): @@ -388,13 +388,14 @@ def test_extension_round_upcast(compiler, col_dtype, digits_dtype): def test_ops_mapping_validity(): + from ibis_substrait.compiler import translate from ibis_substrait.compiler.mapping import ( IBIS_SUBSTRAIT_OP_MAPPING, _extension_mapping, ) for op in IBIS_SUBSTRAIT_OP_MAPPING.keys(): - assert hasattr(ops, op) + assert hasattr(ops, op) or hasattr(translate, op) # `any` isn't a valid mapping for target in IBIS_SUBSTRAIT_OP_MAPPING.values(): diff --git a/pyproject.toml b/pyproject.toml index fba946bc..67aba211 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,7 @@ ignore = [ "SIM117", # nested withs "SIM118", # remove .keys() calls from dictionaries "UP006", # use collections.deque instead of Deque for type annotation + "UP007", # Optional[str] -> str | None ] [tool.ruff.lint.per-file-ignores]