diff --git a/ibis_substrait/compiler/core.py b/ibis_substrait/compiler/core.py
index d5241b12..61e52370 100644
--- a/ibis_substrait/compiler/core.py
+++ b/ibis_substrait/compiler/core.py
@@ -81,7 +81,12 @@ def function_id(
op_name = IBIS_SUBSTRAIT_OP_MAPPING[type(op).__name__]
sig_key = self.get_signature(op)
- extension_signature = f"{op_name}:{'_'.join(sig_key)}"
+ # the keys for lookup up scalar functions consist of
+ # tuple(tuple(input dtypes), output dtype)
+ # but the signature we generate in the substrait plan only needs the input types
+ input_key = sig_key[0]
+
+ extension_signature = f"{op_name}:{'_'.join(input_key)}"
try:
function_extension = self.function_extensions[extension_signature]
@@ -91,7 +96,7 @@ def function_id(
)
return function_extension.function_anchor
- def get_signature(self, op: ops.Node) -> tuple[str, ...]:
+ def get_signature(self, op: ops.Node) -> tuple[tuple[str, ...], str]:
"""Validate and upcast (if necessary) scalar function extension signature."""
op_name = IBIS_SUBSTRAIT_OP_MAPPING[type(op).__name__]
@@ -102,25 +107,31 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]:
)
anykey = ("any",) * len([arg for arg in op.args if arg is not None])
- sigkey = anykey
+ input_type_key = anykey
+ output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name]
+ sigkey = (input_type_key, output_type_key)
+
+ any_sigkey = (anykey, output_type_key)
# First check if `any` is an option
# This function will take arguments of any type
# although we still want to check if the number of args is correct
- function_extension = _extension_mapping[op_name].get(anykey)
+ function_extension = _extension_mapping[op_name].get(any_sigkey)
# Then try to look up extension based on input datatypes
# Each substrait function defines the types of the inputs and at this
# stage we should have performed the appropriate casts to ensure that
# argument types match.
if function_extension is None:
- sigkey = tuple(
+ input_type_key = tuple(
[
IBIS_SUBSTRAIT_TYPE_MAPPING[arg.dtype.name] # type: ignore
for arg in op.args
- if arg is not None and isinstance(arg, ops.Node)
+ if arg is not None and isinstance(arg, ops.Value)
]
)
+ output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name]
+ sigkey = (input_type_key, output_type_key)
function_extension = _extension_mapping[op_name].get(sigkey)
# Then check if extension is variadic
@@ -130,7 +141,10 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]:
# type is only repeated once, so we try to perform a lookup that way, then
# assert, if we find anything, that the function is, indeed, variadic.
if function_extension is None:
- function_extension = _extension_mapping[op_name].get((sigkey[0],))
+ # variadic signature would be in the form of
+ # ((oneof_input_arg_dype,), output_dtype)
+ variadic_sig = ((sigkey[0][0],), sigkey[1])
+ function_extension = _extension_mapping[op_name].get(variadic_sig)
if function_extension is not None:
assert function_extension.variadic
# Function signature for a variadic should contain the type of
@@ -138,7 +152,34 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]:
# types == the minimum number of variadic args allowed (but keep
# it nonzero)
arg_count_min = max(function_extension.variadic.get("min", 0), 1)
- sigkey = (sigkey[0],) * arg_count_min
+ input_type_key = (sigkey[0][0],) * arg_count_min
+ output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name]
+ sigkey = (input_type_key, output_type_key)
+
+ # Then check if we have an op that has a `date` somewhere in the input
+ # args and the output listed as `i32`.
+ # Ibis assumes i32 for the output of all time extraction functions
+ # because no one is going to be around in i64 years, but Substrait
+ # expects i64 as the output
+ if function_extension is None:
+ if "date" in sigkey[0] and sigkey[1] == "i32":
+ sigkey = (sigkey[0], "i64")
+ function_extension = _extension_mapping[op_name].get(sigkey)
+
+ # Ibis doesn't always handle decimal promotion correctly (I think?)
+ # And all decimal inputs are expected to be decimal outputs, so we have
+ # to massage the signature key
+ if function_extension is None:
+ if set(sigkey[0]) == {"dec"} and sigkey[1] != "dec":
+ sigkey = (sigkey[0], "dec")
+ function_extension = _extension_mapping[op_name].get(sigkey)
+
+ # How many special cases do you want? We've got lots.
+ # Some string functions can only have i64 outputs
+ if function_extension is None:
+ if isinstance(op, ops.StringLength):
+ sigkey = (sigkey[0], "i64")
+ function_extension = _extension_mapping[op_name].get(sigkey)
# If it's still None then we're borked.
if function_extension is None:
@@ -151,15 +192,17 @@ def get_signature(self, op: ops.Node) -> tuple[str, ...]:
def create_extension(
self,
op_name: str,
- sigkey: tuple[str, ...],
+ sigkey: tuple[tuple[str, ...], str],
) -> ste.SimpleExtensionDeclaration.ExtensionFunction:
"""Register extension uri and create extension function."""
function_extension = _extension_mapping[op_name][sigkey]
extension_uri = self.register_extension_uri(function_extension.uri)
+ input_key = sigkey[0]
+
extension_function = self.create_extension_function(
- extension_uri, f"{op_name}:{'_'.join(sigkey)}"
+ extension_uri, f"{op_name}:{'_'.join(input_key)}"
)
return extension_function
diff --git a/ibis_substrait/compiler/mapping.py b/ibis_substrait/compiler/mapping.py
index d6eabdda..11d9395e 100644
--- a/ibis_substrait/compiler/mapping.py
+++ b/ibis_substrait/compiler/mapping.py
@@ -32,6 +32,7 @@
"CountStar": "count",
"CountDistinct": "count",
"Divide": "divide",
+ "SubstraitDivide": "divide",
"EndsWith": "ends_with",
"Equals": "equal",
"Exp": "exp",
@@ -69,6 +70,7 @@
"RegexReplace": "regexp_replace",
"Repeat": "repeat",
"Reverse": "reverse",
+ "SubstraitRound": "round",
"Round": "round",
"RPad": "rpad",
"RStrip": "rtrim",
@@ -119,26 +121,52 @@
}
_normalized_key_names = {
- # decimal precision and scale aren't part of the
- # extension signature they're passed in separately
- "decimal
": "dec",
- "decimal
": "dec",
- "decimal": "dec",
- "decimal": "dec",
- # we don't care about string length
- "fixedchar": "str",
- "fixedchar": "str",
- "varchar": "str",
- "varchar": "str",
- "varchar": "str",
- # for now ignore nullability marker
- "boolean?": "bool",
- # why is there a 1?
- "any1": "any",
- "Date": "date",
+ "binary": "vbin",
+ "interval_compound": "icompound",
+ "interval_day": "iday",
+ "interval_year": "iyear",
+ "string": "str",
+ "timestamp": "ts",
+ "timestamp_tz": "tstz",
}
+def normalize_substrait_type_names(typ: str) -> str:
+ # First strip off any punctuation
+ typ = typ.strip("?").lower()
+
+ # Common prefixes whose information does not matter to an extension function
+ # signature
+ for complex_type, abbr in [
+ ("fixedchar", "fchar"),
+ ("varchar", "vchar"),
+ ("fixedbinary", "fbin"),
+ ("decimal", "dec"),
+ ("precision_timestamp", "pts"),
+ ("precision_timestamp_tz", "ptstz"),
+ ("struct", "struct"),
+ ("list", "list"),
+ ("map", "map"),
+ ("any", "any"),
+ ("boolean", "bool"),
+ # Absolute garbage type info
+ ("decimal", "dec"),
+ ("delta", "dec"),
+ ("prec", "dec"),
+ ("scale", "dec"),
+ ("init_", "dec"),
+ ("min_", "dec"),
+ ("max_", "dec"),
+ ]:
+ if typ.lower().startswith(complex_type):
+ typ = abbr
+
+ # Then pass through the dictionary of mappings, defaulting to just the
+ # existing string
+ typ = _normalized_key_names.get(typ.lower(), typ.lower())
+ return typ
+
+
_extension_mapping: Mapping[str, Any] = defaultdict(dict)
@@ -151,13 +179,13 @@ def __init__(self, name: str) -> None:
self.uri: str = ""
def parse(self, impl: Mapping[str, Any]) -> None:
- self.rtn = impl["return"]
+ self.rtn = normalize_substrait_type_names(impl["return"])
self.nullability = impl.get("nullability", False)
self.variadic = impl.get("variadic", False)
if input_args := impl.get("args", []):
for val in input_args:
- if typ := val.get("value", None):
- typ = _normalized_key_names.get(typ.lower(), typ.lower())
+ if typ := val.get("value"):
+ typ = normalize_substrait_type_names(typ)
self.inputs.append(typ)
elif arg_name := val.get("name", None):
self.arg_names.append(arg_name)
@@ -212,7 +240,9 @@ def register_extension_yaml(
for function in named_functions:
for func in _parse_func(function):
func.uri = uri or f"{prefix}/{fname.name}"
- _extension_mapping[function["name"]][tuple(func.inputs)] = func
+ _extension_mapping[function["name"]][(tuple(func.inputs), func.rtn)] = (
+ func
+ )
def _populate_default_extensions() -> None:
diff --git a/ibis_substrait/compiler/translate.py b/ibis_substrait/compiler/translate.py
index cd5fc69b..ac620d54 100644
--- a/ibis_substrait/compiler/translate.py
+++ b/ibis_substrait/compiler/translate.py
@@ -15,11 +15,12 @@
import operator
import uuid
from collections.abc import Iterable, Mapping, MutableMapping, Sequence
-from typing import Any, TypeVar, Union
+from typing import Any, Optional, TypeVar, Union
import ibis
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
+import ibis.expr.rules as rlz
import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis import util
@@ -29,6 +30,7 @@
from ibis_substrait.compiler.core import SubstraitCompiler, _get_fields
from ibis_substrait.compiler.mapping import (
IBIS_SUBSTRAIT_OP_MAPPING,
+ IBIS_SUBSTRAIT_TYPE_MAPPING,
_extension_mapping,
)
@@ -505,17 +507,17 @@ def value_op(
) -> stalg.Expression:
# Check if scalar function is valid for input dtype(s) and insert casts as needed to
# make sure inputs are correct.
- op = _check_and_upcast(op)
+ newop = _check_and_upcast(op)
# given the details of `op` -> function id
return stalg.Expression(
scalar_function=stalg.Expression.ScalarFunction(
- function_reference=compiler.function_id(op),
- output_type=translate(op.dtype),
+ function_reference=compiler.function_id(newop),
+ output_type=translate(newop.dtype),
arguments=[
stalg.FunctionArgument(
value=translate(arg, compiler=compiler, **kwargs)
)
- for arg in op.args
+ for arg in newop.args
if isinstance(arg, ops.Value)
],
)
@@ -538,6 +540,8 @@ def window_op(
lower_bound, upper_bound = _translate_window_bounds(start, end)
+ func = _check_and_upcast(func)
+
return stalg.Expression(
window_function=stalg.Expression.WindowFunction(
function_reference=compiler.function_id(func),
@@ -565,6 +569,7 @@ def _reduction(
compiler: SubstraitCompiler,
**kwargs: Any,
) -> stalg.AggregateFunction:
+ op = _check_and_upcast(op)
return stalg.AggregateFunction(
function_reference=compiler.function_id(op),
arguments=[
@@ -1408,8 +1413,11 @@ def _check_and_upcast(op: ops.Node) -> ops.Node:
op_name = IBIS_SUBSTRAIT_OP_MAPPING[type(op).__name__]
anykey = ("any",) * len([arg for arg in op.args if arg is not None])
+ output_type_key = IBIS_SUBSTRAIT_TYPE_MAPPING[op.dtype.name]
+ any_sigkey = (anykey, output_type_key)
+
# First check if `any` is an option
- function_extension = _extension_mapping[op_name].get(anykey)
+ function_extension = _extension_mapping[op_name].get(any_sigkey)
# Otherwise, if the types don't match, cast up
if function_extension is None:
@@ -1463,15 +1471,55 @@ def _upcast_string_op(op: string_op) -> string_op:
return type(op)(*casted_args)
+# Ibis has (usually good) opinions about what the dtypes of certain ops should be
+# Substrait disagrees sometimes
+class SubstraitRound(ops.Value):
+ """Round a value."""
+
+ arg: ops.Value[dt.Numeric]
+ digits: Optional[ops.Value[dt.Integer]] = None
+
+ shape = rlz.shape_like("arg")
+
+ @property
+ def dtype(self) -> dt.DataType:
+ return self.arg.dtype
+
+
+class SubstraitDivide(ops.NumericBinary):
+ """Divide that always returns the same dtype as the inputs."""
+
+ @property
+ def dtype(self) -> dt.DataType:
+ return self.left.dtype
+
+
@_upcast.register(ops.Round)
-def _upcast_round_digits(op: ops.Round) -> ops.Round:
+def _upcast_round_digits(op: ops.Round) -> SubstraitRound:
# Substrait wants Int32 for decimal place argument to round
if op.digits is None:
raise ValueError(
"Substrait requires that a rounding operation specify the number of digits to round to"
)
elif not isinstance(op.digits.dtype, dt.Int32):
- return ops.Round(
+ return SubstraitRound(
op.arg, op.digits.copy(dtype=dt.Int32(nullable=op.digits.dtype.nullable))
)
+ return SubstraitRound(op.arg, op.digits)
+
+
+@_upcast.register(ops.Mean)
+def _upcast_mean(op: ops.Mean) -> ops.Mean:
+ # Substrait wants the input types and output types of reductions to match
+ # We cast the _input_ type to match the output type
+ # So mean(some_int) -> float will go to mean(cast(some_int as float)) -> float
+ if op.arg.dtype != op.dtype:
+ return ops.Mean(arg=ops.Cast(op.arg, to=op.dtype), where=op.where)
+
return op
+
+
+@_upcast.register(ops.Divide)
+def _matchy_matchy_divide(op: ops.Divide) -> SubstraitDivide:
+ new_op = SubstraitDivide(op.left, op.right)
+ return _upcast_bin_op(new_op)
diff --git a/ibis_substrait/extensions/__init__.py b/ibis_substrait/extensions/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/ibis_substrait/extensions/extension_types.yaml b/ibis_substrait/extensions/extension_types.yaml
deleted file mode 100644
index e03073c5..00000000
--- a/ibis_substrait/extensions/extension_types.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
----
-types:
- - name: point
- structure:
- latitude: i32
- longitude: i32
- - name: line
- structure:
- start: point
- end: point
diff --git a/ibis_substrait/extensions/functions_aggregate_approx.yaml b/ibis_substrait/extensions/functions_aggregate_approx.yaml
deleted file mode 100644
index c77caecc..00000000
--- a/ibis_substrait/extensions/functions_aggregate_approx.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-%YAML 1.2
----
-aggregate_functions:
- - name: "approx_count_distinct"
- description: >-
- Calculates the approximate number of rows that contain distinct values of the expression argument using
- HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which
- returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT
- processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact
- result.
- impls:
- - args:
- - name: x
- value: any
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: binary
- return: i64
diff --git a/ibis_substrait/extensions/functions_aggregate_generic.yaml b/ibis_substrait/extensions/functions_aggregate_generic.yaml
deleted file mode 100644
index 4d891e9c..00000000
--- a/ibis_substrait/extensions/functions_aggregate_generic.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-%YAML 1.2
----
-aggregate_functions:
- - name: "count"
- description: Count a set of values
- impls:
- - args:
- - name: x
- value: any
- options:
- overflow:
- values: [SILENT, SATURATE, ERROR]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64
- return: i64
- - name: "count"
- description: "Count a set of records (not field referenced)"
- impls:
- - options:
- overflow:
- values: [SILENT, SATURATE, ERROR]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64
- return: i64
- - name: "any_value"
- description: >
- Selects an arbitrary value from a group of values.
-
- If the input is empty, the function returns null.
- impls:
- - args:
- - name: x
- value: any
- nullability: DECLARED_OUTPUT
- return: any?
diff --git a/ibis_substrait/extensions/functions_arithmetic.yaml b/ibis_substrait/extensions/functions_arithmetic.yaml
deleted file mode 100644
index eaa66ea2..00000000
--- a/ibis_substrait/extensions/functions_arithmetic.yaml
+++ /dev/null
@@ -1,1522 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: "add"
- description: "Add two values."
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i32
- - args:
- - value: i64
- - value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- - args:
- - name: x
- value: fp32
- - name: y
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- - name: y
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "subtract"
- description: "Subtract one value from another."
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i32
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- - args:
- - name: x
- value: fp32
- - name: y
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- - name: y
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "multiply"
- description: "Multiply two values."
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i32
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- - args:
- - name: x
- value: fp32
- - name: y
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- - name: y
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "divide"
- description: >
- Divide x by y. In the case of integer division, partial values are truncated (i.e. rounded towards 0).
- The `on_division_by_zero` option governs behavior in cases where y is 0 and x is not 0.
- `LIMIT` means positive or negative infinity (depending on the sign of x and y).
- If x and y are both 0 or both +/-infinity, behavior will be governed by `on_domain_error`.
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i32
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- - args:
- - name: x
- value: fp32
- - name: y
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_division_by_zero:
- values: [ LIMIT, NAN, ERROR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- - name: y
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_division_by_zero:
- values: [ LIMIT, NAN, ERROR ]
- return: fp64
- -
- name: "negate"
- description: "Negation of the value"
- impls:
- - args:
- - name: x
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i8
- - args:
- - name: x
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i16
- - args:
- - name: x
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i32
- - args:
- - name: x
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- - args:
- - name: x
- value: fp32
- return: fp32
- - args:
- - name: x
- value: fp64
- return: fp64
- -
- name: "modulus"
- description: "Get the remainder when dividing one value by another."
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- return: i32
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- return: i64
- -
- name: "power"
- description: "Take the power with x as the base and y as exponent."
- impls:
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- - args:
- - name: x
- value: fp32
- - name: y
- value: fp32
- return: fp32
- - args:
- - name: x
- value: fp64
- - name: y
- value: fp64
- return: fp64
- -
- name: "sqrt"
- description: "Square root of the value"
- impls:
- - args:
- - name: x
- value: i64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- -
- name: "exp"
- description: "The mathematical constant e, raised to the power of the value."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "cos"
- description: "Get the cosine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "sin"
- description: "Get the sine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "tan"
- description: "Get the tangent of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "cosh"
- description: "Get the hyperbolic cosine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "sinh"
- description: "Get the hyperbolic sine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "tanh"
- description: "Get the hyperbolic tangent of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "acos"
- description: "Get the arccosine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- -
- name: "asin"
- description: "Get the arcsine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- -
- name: "atan"
- description: "Get the arctangent of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "acosh"
- description: "Get the hyperbolic arccosine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- -
- name: "asinh"
- description: "Get the hyperbolic arcsine of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- return: fp64
- -
- name: "atanh"
- description: "Get the hyperbolic arctangent of a value in radians."
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- -
- name: "atan2"
- description: "Get the arctangent of values given as x/y pairs."
- impls:
- - args:
- - name: x
- value: fp32
- - name: y
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- - args:
- - name: x
- value: fp64
- - name: y
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- return: fp64
- -
- name: "abs"
- description: >
- Calculate the absolute value of the argument.
-
- Integer values allow the specification of overflow behavior to handle the
- unevenness of the twos complement, e.g. Int8 range [-128 : 127].
- impls:
- - args:
- - name: x
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i8
- - args:
- - name: x
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i16
- - args:
- - name: x
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i32
- - args:
- - name: x
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- - args:
- - name: x
- value: fp32
- return: fp32
- - args:
- - name: x
- value: fp64
- return: fp64
- -
- name: "sign"
- description: >
- Return the signedness of the argument.
-
- Integer values return signedness with the same type as the input.
- Possible return values are [-1, 0, 1]
-
- Floating point values return signedness with the same type as the input.
- Possible return values are [-1.0, -0.0, 0.0, 1.0, NaN]
- impls:
- - args:
- - name: x
- value: i8
- return: i8
- - args:
- - name: x
- value: i16
- return: i16
- - args:
- - name: x
- value: i32
- return: i32
- - args:
- - name: x
- value: i64
- return: i64
- - args:
- - name: x
- value: fp32
- return: fp32
- - args:
- - name: x
- value: fp64
- return: fp64
- -
- name: "factorial"
- description: >
- Return the factorial of a given integer input.
-
- The factorial of 0! is 1 by convention.
-
- Negative inputs will raise an error.
- impls:
- - args:
- - value: i32
- name: "n"
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i32
- - args:
- - value: i64
- name: "n"
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: i64
- -
- name: "bitwise_not"
- description: >
- Return the bitwise NOT result for one integer input.
-
- impls:
- - args:
- - name: x
- value: i8
- return: i8
- - args:
- - name: x
- value: i16
- return: i16
- - args:
- - name: x
- value: i32
- return: i32
- - args:
- - name: x
- value: i64
- return: i64
- -
- name: "bitwise_and"
- description: >
- Return the bitwise AND result for two integer inputs.
-
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- return: i32
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- return: i64
- -
- name: "bitwise_or"
- description: >
- Return the bitwise OR result for two given integer inputs.
-
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- return: i32
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- return: i64
- -
- name: "bitwise_xor"
- description: >
- Return the bitwise XOR result for two integer inputs.
-
- impls:
- - args:
- - name: x
- value: i8
- - name: y
- value: i8
- return: i8
- - args:
- - name: x
- value: i16
- - name: y
- value: i16
- return: i16
- - args:
- - name: x
- value: i32
- - name: y
- value: i32
- return: i32
- - args:
- - name: x
- value: i64
- - name: y
- value: i64
- return: i64
-
-aggregate_functions:
- - name: "sum"
- description: Sum a set of values. The sum of zero elements yields null.
- impls:
- - args:
- - name: x
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64?
- return: i64?
- - args:
- - name: x
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64?
- return: i64?
- - args:
- - name: x
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64?
- return: i64?
- - args:
- - name: x
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64?
- return: i64?
- - args:
- - name: x
- value: fp32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: fp64?
- return: fp64?
- - args:
- - name: x
- value: fp64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: fp64?
- return: fp64?
- - name: "avg"
- description: Average a set of values. For integral types, this truncates partial values.
- impls:
- - args:
- - name: x
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "STRUCT"
- return: i8?
- - args:
- - name: x
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "STRUCT"
- return: i16?
- - args:
- - name: x
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "STRUCT"
- return: i32?
- - args:
- - name: x
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "STRUCT"
- return: i64?
- - args:
- - name: x
- value: fp32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "STRUCT"
- return: fp32?
- - args:
- - name: x
- value: fp64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "STRUCT"
- return: fp64?
- - name: "min"
- description: Min a set of values.
- impls:
- - args:
- - name: x
- value: i8
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i8?
- return: i8?
- - args:
- - name: x
- value: i16
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i16?
- return: i16?
- - args:
- - name: x
- value: i32
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i32?
- return: i32?
- - args:
- - name: x
- value: i64
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64?
- return: i64?
- - args:
- - name: x
- value: fp32
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: fp32?
- return: fp32?
- - args:
- - name: x
- value: fp64
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: fp64?
- return: fp64?
- - name: "max"
- description: Max a set of values.
- impls:
- - args:
- - name: x
- value: i8
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i8?
- return: i8?
- - args:
- - name: x
- value: i16
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i16?
- return: i16?
- - args:
- - name: x
- value: i32
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i32?
- return: i32?
- - args:
- - name: x
- value: i64
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: i64?
- return: i64?
- - args:
- - name: x
- value: fp32
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: fp32?
- return: fp32?
- - args:
- - name: x
- value: fp64
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: fp64?
- return: fp64?
- - name: "product"
- description: Product of a set of values. Returns 1 for empty input.
- impls:
- - args:
- - name: x
- value: i8
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: MIRROR
- decomposable: MANY
- intermediate: i64
- return: i8
- - args:
- - name: x
- value: i16
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: MIRROR
- decomposable: MANY
- intermediate: i64
- return: i16
- - args:
- - name: x
- value: i32
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: MIRROR
- decomposable: MANY
- intermediate: i64
- return: i32
- - args:
- - name: x
- value: i64
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: MIRROR
- decomposable: MANY
- intermediate: i64
- return: i64
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: MIRROR
- decomposable: MANY
- intermediate: fp64
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: MIRROR
- decomposable: MANY
- intermediate: fp64
- return: fp64
- - name: "std_dev"
- description: Calculates standard-deviation for a set of values.
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- distribution:
- values: [ SAMPLE, POPULATION]
- nullability: DECLARED_OUTPUT
- return: fp32?
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- distribution:
- values: [ SAMPLE, POPULATION]
- nullability: DECLARED_OUTPUT
- return: fp64?
- - name: "variance"
- description: Calculates variance for a set of values.
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- distribution:
- values: [ SAMPLE, POPULATION]
- nullability: DECLARED_OUTPUT
- return: fp32?
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- distribution:
- values: [ SAMPLE, POPULATION]
- nullability: DECLARED_OUTPUT
- return: fp64?
- - name: "corr"
- description: >
- Calculates the value of Pearson's correlation coefficient between `x` and `y`.
- If there is no input, null is returned.
- impls:
- - args:
- - name: x
- value: fp32
- - name: y
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: fp32?
- - args:
- - name: x
- value: fp64
- - name: y
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: fp64?
- - name: "mode"
- description: >
- Calculates mode for a set of values.
- If there is no input, null is returned.
- impls:
- - args:
- - name: x
- value: i8
- nullability: DECLARED_OUTPUT
- return: i8?
- - args:
- - name: x
- value: i16
- nullability: DECLARED_OUTPUT
- return: i16?
- - args:
- - name: x
- value: i32
- nullability: DECLARED_OUTPUT
- return: i32?
- - args:
- - name: x
- value: i64
- nullability: DECLARED_OUTPUT
- return: i64?
- - args:
- - name: x
- value: fp32
- nullability: DECLARED_OUTPUT
- return: fp32?
- - args:
- - name: x
- value: fp64
- nullability: DECLARED_OUTPUT
- return: fp64?
- - name: "median"
- description: >
- Calculate the median for a set of values.
-
- Returns null if applied to zero records. For the integer implementations,
- the rounding option determines how the median should be rounded if it ends
- up midway between two values. For the floating point implementations,
- they specify the usual floating point rounding mode.
- impls:
- - args:
- - name: precision
- description: >
- Based on required operator performance and configured optimizations
- on saving memory bandwidth, the precision of the end result can be
- the highest possible accuracy or an approximation.
-
- - EXACT: provides the exact result, rounded if needed according
- to the rounding option.
- - APPROXIMATE: provides only an estimate; the result must lie
- between the minimum and maximum values in the input
- (inclusive), but otherwise the accuracy is left up to the
- consumer.
- options: [ EXACT, APPROXIMATE ]
- - name: x
- value: i8
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: i8?
- - args:
- - name: precision
- description: >
- Based on required operator performance and configured optimizations
- on saving memory bandwidth, the precision of the end result can be
- the highest possible accuracy or an approximation.
-
- - EXACT: provides the exact result, rounded if needed according
- to the rounding option.
- - APPROXIMATE: provides only an estimate; the result must lie
- between the minimum and maximum values in the input
- (inclusive), but otherwise the accuracy is left up to the
- consumer.
- options: [ EXACT, APPROXIMATE ]
- - name: x
- value: i16
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: i16?
- - args:
- - name: precision
- description: >
- Based on required operator performance and configured optimizations
- on saving memory bandwidth, the precision of the end result can be
- the highest possible accuracy or an approximation.
-
- - EXACT: provides the exact result, rounded if needed according
- to the rounding option.
- - APPROXIMATE: provides only an estimate; the result must lie
- between the minimum and maximum values in the input
- (inclusive), but otherwise the accuracy is left up to the
- consumer.
- options: [ EXACT, APPROXIMATE ]
- - name: x
- value: i32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: i32?
- - args:
- - name: precision
- description: >
- Based on required operator performance and configured optimizations
- on saving memory bandwidth, the precision of the end result can be
- the highest possible accuracy or an approximation.
-
- - EXACT: provides the exact result, rounded if needed according
- to the rounding option.
- - APPROXIMATE: provides only an estimate; the result must lie
- between the minimum and maximum values in the input
- (inclusive), but otherwise the accuracy is left up to the
- consumer.
- options: [ EXACT, APPROXIMATE ]
- - name: x
- value: i64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: i64?
- - args:
- - name: precision
- description: >
- Based on required operator performance and configured optimizations
- on saving memory bandwidth, the precision of the end result can be
- the highest possible accuracy or an approximation.
-
- - EXACT: provides the exact result, rounded if needed according
- to the rounding option.
- - APPROXIMATE: provides only an estimate; the result must lie
- between the minimum and maximum values in the input
- (inclusive), but otherwise the accuracy is left up to the
- consumer.
- options: [ EXACT, APPROXIMATE ]
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: fp32?
- - args:
- - name: precision
- description: >
- Based on required operator performance and configured optimizations
- on saving memory bandwidth, the precision of the end result can be
- the highest possible accuracy or an approximation.
-
- - EXACT: provides the exact result, rounded if needed according
- to the rounding option.
- - APPROXIMATE: provides only an estimate; the result must lie
- between the minimum and maximum values in the input
- (inclusive), but otherwise the accuracy is left up to the
- consumer.
- options: [ EXACT, APPROXIMATE ]
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- return: fp64?
- - name: "quantile"
- description: >
- Calculates quantiles for a set of values.
-
- This function will divide the aggregated values (passed via the
- distribution argument) over N equally-sized bins, where N is passed
- via a constant argument. It will then return the values at the
- boundaries of these bins in list form. If the input is appropriately
- sorted, this computes the quantiles of the distribution.
-
- The function can optionally return the first and/or last element of
- the input, as specified by the `boundaries` argument. If the input is
- appropriately sorted, this will thus be the minimum and/or maximum
- values of the distribution.
-
- When the boundaries do not lie exactly on elements of the incoming
- distribution, the function will interpolate between the two nearby
- elements. If the interpolated value cannot be represented exactly,
- the `rounding` option controls how the value should be selected or
- computed.
-
- The function fails and returns null in the following cases:
- - `n` is null or less than one;
- - any value in `distribution` is null.
-
- The function returns an empty list if `n` equals 1 and `boundaries` is
- set to `NEITHER`.
-
- impls:
- - args:
- - name: boundaries
- description: >
- Which boundaries to include. For NEITHER, the output will have
- n-1 elements, for MINIMUM and MAXIMUM it will have n elements,
- and for BOTH it will have n+1 elements.
- options: [ NEITHER, MINIMUM, MAXIMUM, BOTH ]
- - name: precision
- description: >
- Based on required operator performance and configured optimizations
- on saving memory bandwidth, the precision of the end result can be
- the highest possible accuracy or an approximation.
-
- - EXACT: provides the exact result, rounded if needed according
- to the rounding option.
- - APPROXIMATE: provides only an estimate; the result must lie
- between the minimum and maximum values in the input
- (inclusive), but otherwise the accuracy is left up to the
- consumer.
- options: [ EXACT, APPROXIMATE ]
- - value: i64
- constant: true
- name: n
- description: >
- A positive integer which defines the number of quantile
- partitions.
- - value: any
- name: distribution
- description: >
- The data for which the quantiles should be computed.
- options:
- rounding:
- description: >
- When a boundary is computed to lie somewhere between two values,
- and this value cannot be exactly represented, this specifies how
- to round it. For floating point numbers, it specifies the IEEE
- 754 rounding mode (as it does for all other floating point
- operations). For integer types:
-
- - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
- to the even option.
- - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
- halfway, tie away from zero.
- - TRUNCATE: always round toward zero.
- - CEILING: always round toward positive infinity.
- - FLOOR: always round toward negative infinity.
-
- For non-numeric types, the behavior is the same as for integer
- types, but applied to the index of the value in distribution.
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- nullability: DECLARED_OUTPUT
- ordered: true
- return: LIST?
-
-window_functions:
- - name: "row_number"
- description: "the number of the current row within its partition."
- impls:
- - args: []
- nullability: DECLARED_OUTPUT
- decomposable: NONE
- return: i64?
- window_type: PARTITION
- - name: "rank"
- description: "the rank of the current row, with gaps."
- impls:
- - args: []
- nullability: DECLARED_OUTPUT
- decomposable: NONE
- return: i64?
- window_type: PARTITION
- - name: "dense_rank"
- description: "the rank of the current row, without gaps."
- impls:
- - args: []
- nullability: DECLARED_OUTPUT
- decomposable: NONE
- return: i64?
- window_type: PARTITION
- - name: "percent_rank"
- description: "the relative rank of the current row."
- impls:
- - args: []
- nullability: DECLARED_OUTPUT
- decomposable: NONE
- return: fp64?
- window_type: PARTITION
- - name: "cume_dist"
- description: "the cumulative distribution."
- impls:
- - args: []
- nullability: DECLARED_OUTPUT
- decomposable: NONE
- return: fp64?
- window_type: PARTITION
- - name: "ntile"
- description: "Return an integer ranging from 1 to the argument value,dividing the partition as equally as possible."
- impls:
- - args:
- - name: x
- value: i32
- nullability: DECLARED_OUTPUT
- decomposable: NONE
- return: i32?
- window_type: PARTITION
- - args:
- - name: x
- value: i64
- nullability: DECLARED_OUTPUT
- decomposable: NONE
- return: i64?
- window_type: PARTITION
diff --git a/ibis_substrait/extensions/functions_arithmetic_decimal.yaml b/ibis_substrait/extensions/functions_arithmetic_decimal.yaml
deleted file mode 100644
index 0fc4caae..00000000
--- a/ibis_substrait/extensions/functions_arithmetic_decimal.yaml
+++ /dev/null
@@ -1,151 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: "add"
- description: "Add two decimal values."
- impls:
- - args:
- - name: x
- value: decimal
- - name: y
- value: decimal
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: |-
- init_scale = max(S1,S2)
- init_prec = init_scale + max(P1 - S1, P2 - S2) + 1
- min_scale = min(init_scale, 6)
- delta = init_prec - 38
- prec = min(init_prec, 38)
- scale_after_borrow = max(init_scale - delta, min_scale)
- scale = init_prec > 38 ? scale_after_borrow : init_scale
- DECIMAL
- -
- name: "subtract"
- impls:
- - args:
- - name: x
- value: decimal
- - name: y
- value: decimal
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: |-
- init_scale = max(S1,S2)
- init_prec = init_scale + max(P1 - S1, P2 - S2) + 1
- min_scale = min(init_scale, 6)
- delta = init_prec - 38
- prec = min(init_prec, 38)
- scale_after_borrow = max(init_scale - delta, min_scale)
- scale = init_prec > 38 ? scale_after_borrow : init_scale
- DECIMAL
- -
- name: "multiply"
- impls:
- - args:
- - name: x
- value: decimal
- - name: y
- value: decimal
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: |-
- init_scale = S1 + S2
- init_prec = P1 + P2 + 1
- min_scale = min(init_scale, 6)
- delta = init_prec - 38
- prec = min(init_prec, 38)
- scale_after_borrow = max(init_scale - delta, min_scale)
- scale = init_prec > 38 ? scale_after_borrow : init_scale
- DECIMAL
- -
- name: "divide"
- impls:
- - args:
- - name: x
- value: decimal
- - name: y
- value: decimal
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: |-
- init_scale = max(6, S1 + P2 + 1)
- init_prec = P1 - S1 + P2 + init_scale
- min_scale = min(init_scale, 6)
- delta = init_prec - 38
- prec = min(init_prec, 38)
- scale_after_borrow = max(init_scale - delta, min_scale)
- scale = init_prec > 38 ? scale_after_borrow : init_scale
- DECIMAL
- -
- name: "modulus"
- impls:
- - args:
- - name: x
- value: decimal
- - name: y
- value: decimal
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- return: |-
- init_scale = max(S1,S2)
- init_prec = min(P1 - S1, P2 - S2) + init_scale
- min_scale = min(init_scale, 6)
- delta = init_prec - 38
- prec = min(init_prec, 38)
- scale_after_borrow = max(init_scale - delta, min_scale)
- scale = init_prec > 38 ? scale_after_borrow : init_scale
- DECIMAL
-aggregate_functions:
- - name: "sum"
- description: Sum a set of values.
- impls:
- - args:
- - name: x
- value: "DECIMAL"
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "DECIMAL?<38,S>"
- return: "DECIMAL?<38,S>"
- - name: "avg"
- description: Average a set of values.
- impls:
- - args:
- - name: x
- value: "DECIMAL
"
- options:
- overflow:
- values: [ SILENT, SATURATE, ERROR ]
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "STRUCT,i64>"
- return: "DECIMAL<38,S>"
- - name: "min"
- description: Min a set of values.
- impls:
- - args:
- - name: x
- value: "DECIMAL"
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "DECIMAL?
"
- return: "DECIMAL?
"
- - name: "max"
- description: Max a set of values.
- impls:
- - args:
- - name: x
- value: "DECIMAL
"
- nullability: DECLARED_OUTPUT
- decomposable: MANY
- intermediate: "DECIMAL?
"
- return: "DECIMAL?
"
diff --git a/ibis_substrait/extensions/functions_boolean.yaml b/ibis_substrait/extensions/functions_boolean.yaml
deleted file mode 100644
index 22ae296d..00000000
--- a/ibis_substrait/extensions/functions_boolean.yaml
+++ /dev/null
@@ -1,140 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: or
- description: >
- The boolean `or` using Kleene logic.
-
- This function behaves as follows with nulls:
-
- true or null = true
-
- null or true = true
-
- false or null = null
-
- null or false = null
-
- null or null = null
-
- In other words, in this context a null value really means "unknown", and
- an unknown value `or` true is always true.
-
- Behavior for 0 or 1 inputs is as follows:
- or() -> false
- or(x) -> x
- impls:
- - args:
- - value: boolean?
- name: a
- variadic:
- min: 0
- return: boolean?
- -
- name: and
- description: >
- The boolean `and` using Kleene logic.
-
- This function behaves as follows with nulls:
-
- true and null = null
-
- null and true = null
-
- false and null = false
-
- null and false = false
-
- null and null = null
-
- In other words, in this context a null value really means "unknown", and
- an unknown value `and` false is always false.
-
- Behavior for 0 or 1 inputs is as follows:
- and() -> true
- and(x) -> x
- impls:
- - args:
- - value: boolean?
- name: a
- variadic:
- min: 0
- return: boolean?
- -
- name: and_not
- description: >
- The boolean `and` of one value and the negation of the other using Kleene logic.
-
- This function behaves as follows with nulls:
-
- true and not null = null
-
- null and not false = null
-
- false and not null = false
-
- null and not true = false
-
- null and not null = null
-
- In other words, in this context a null value really means "unknown", and
- an unknown value `and not` true is always false, as is false `and not` an
- unknown value.
- impls:
- - args:
- - value: boolean?
- name: a
- - value: boolean?
- name: b
- return: boolean?
- -
- name: xor
- description: >
- The boolean `xor` of two values using Kleene logic.
-
- When a null is encountered in either input, a null is output.
- impls:
- - args:
- - value: boolean?
- name: a
- - value: boolean?
- name: b
- return: boolean?
- -
- name: not
- description: >
- The `not` of a boolean value.
-
- When a null is input, a null is output.
- impls:
- - args:
- - value: boolean?
- name: a
- return: boolean?
-
-aggregate_functions:
- -
- name: "bool_and"
- description: >
- If any value in the input is false, false is returned. If the input is
- empty or only contains nulls, null is returned. Otherwise, true is
- returned.
- impls:
- - args:
- - value: boolean
- name: a
- nullability: DECLARED_OUTPUT
- return: boolean?
- -
- name: "bool_or"
- description: >
- If any value in the input is true, true is returned. If the input is
- empty or only contains nulls, null is returned. Otherwise, false is
- returned.
- impls:
- - args:
- - value: boolean
- name: a
- nullability: DECLARED_OUTPUT
- return: boolean?
diff --git a/ibis_substrait/extensions/functions_comparison.yaml b/ibis_substrait/extensions/functions_comparison.yaml
deleted file mode 100644
index 7d11f3c7..00000000
--- a/ibis_substrait/extensions/functions_comparison.yaml
+++ /dev/null
@@ -1,216 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: "not_equal"
- description: >
- Whether two values are not_equal.
-
- `not_equal(x, y) := (x != y)`
-
- If either/both of `x` and `y` are `null`, `null` is returned.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: BOOLEAN
- -
- name: "equal"
- description: >
- Whether two values are equal.
-
- `equal(x, y) := (x == y)`
-
- If either/both of `x` and `y` are `null`, `null` is returned.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: BOOLEAN
- -
- name: "is_not_distinct_from"
- description: >
- Whether two values are equal.
-
- This function treats `null` values as comparable, so
-
- `is_not_distinct_from(null, null) == True`
-
- This is in contrast to `equal`, in which `null` values do not compare.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: BOOLEAN
- -
- name: "lt"
- description: >
- Less than.
-
- lt(x, y) := (x < y)
-
- If either/both of `x` and `y` are `null`, `null` is returned.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: BOOLEAN
- -
- name: "gt"
- description: >
- Greater than.
-
- gt(x, y) := (x > y)
-
- If either/both of `x` and `y` are `null`, `null` is returned.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: BOOLEAN
- -
- name: "lte"
- description: >
- Less than or equal to.
-
- lte(x, y) := (x <= y)
-
- If either/both of `x` and `y` are `null`, `null` is returned.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: BOOLEAN
- -
- name: "gte"
- description: >
- Greater than or equal to.
-
- gte(x, y) := (x >= y)
-
- If either/both of `x` and `y` are `null`, `null` is returned.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: BOOLEAN
- -
- name: "between"
- description: >-
- Whether the `expression` is greater than or equal to `low` and less than or equal to `high`.
-
- `expression` BETWEEN `low` AND `high`
-
- If `low`, `high`, or `expression` are `null`, `null` is returned.
- impls:
- - args:
- - value: any1
- name: expression
- description: The expression to test for in the range defined by `low` and `high`.
- - value: any1
- name: low
- description: The value to check if greater than or equal to.
- - value: any1
- name: high
- description: The value to check if less than or equal to.
- return: BOOLEAN
- -
- name: "is_null"
- description: Whether a value is null. NaN is not null.
- impls:
- - args:
- - value: any1
- name: x
- return: BOOLEAN
- nullability: DECLARED_OUTPUT
- -
- name: "is_not_null"
- description: Whether a value is not null. NaN is not null.
- impls:
- - args:
- - value: any1
- name: x
- return: BOOLEAN
- nullability: DECLARED_OUTPUT
- -
- name: "is_nan"
- description: >
- Whether a value is not a number.
-
- If `x` is `null`, `null` is returned.
- impls:
- - args:
- - value: fp32
- name: x
- return: BOOLEAN
- - args:
- - value: fp64
- name: x
- return: BOOLEAN
- -
- name: "is_finite"
- description: >
- Whether a value is finite (neither infinite nor NaN).
-
- If `x` is `null`, `null` is returned.
- impls:
- - args:
- - value: fp32
- name: x
- return: BOOLEAN
- - args:
- - value: fp64
- name: x
- return: BOOLEAN
- -
- name: "is_infinite"
- description: >
- Whether a value is infinite.
-
- If `x` is `null`, `null` is returned.
- impls:
- - args:
- - value: fp32
- name: x
- return: BOOLEAN
- - args:
- - value: fp64
- name: x
- return: BOOLEAN
- -
- name: "nullif"
- description: If two values are equal, return null. Otherwise, return the first value.
- impls:
- - args:
- - value: any1
- name: x
- - value: any1
- name: y
- return: any1
- -
- name: "coalesce"
- description: >-
- Evaluate arguments from left to right and return the first argument that is not null. Once
- a non-null argument is found, the remaining arguments are not evaluated.
-
- If all arguments are null, return null.
- impls:
- - args:
- - value: any1
- variadic:
- min: 2
- return: any1
diff --git a/ibis_substrait/extensions/functions_datetime.yaml b/ibis_substrait/extensions/functions_datetime.yaml
deleted file mode 100644
index 51407e4c..00000000
--- a/ibis_substrait/extensions/functions_datetime.yaml
+++ /dev/null
@@ -1,267 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: extract
- description: Extract portion of a date/time value.
- impls:
- - args:
- - name: component
- options: [ YEAR, MONTH, DAY, SECOND ]
- description: The part of the value to extract.
- - name: x
- value: timestamp
- return: i64
- - args:
- - name: component
- options: [ YEAR, MONTH, DAY, SECOND ]
- description: The part of the value to extract.
- - name: x
- value: timestamp_tz
- return: i64
- - args:
- - name: component
- options: [ YEAR, MONTH, DAY ]
- description: The part of the value to extract.
- - name: x
- value: date
- return: i64
- - args:
- - name: component
- options: [ SECOND ]
- description: The part of the value to extract.
- - name: x
- value: time
- return: i64
- -
- name: "add"
- description: Add an interval to a date/time type.
- impls:
- - args:
- - name: x
- value: timestamp
- - name: y
- value: interval_year
- return: timestamp
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: interval_year
- return: timestamp
- - args:
- - name: x
- value: date
- - name: y
- value: interval_year
- return: timestamp
- - args:
- - name: x
- value: timestamp
- - name: y
- value: interval_day
- return: timestamp
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: interval_day
- return: timestamp
- - args:
- - name: x
- value: date
- - name: y
- value: interval_day
- return: timestamp
- -
- name: "add_intervals"
- description: Add two intervals together.
- impls:
- - args:
- - name: x
- value: interval_day
- - name: y
- value: interval_day
- return: interval_day
- - args:
- - name: x
- value: interval_year
- - name: y
- value: interval_year
- return: interval_year
- -
- name: "subtract"
- description: Subtract an interval from a date/time type.
- impls:
- - args:
- - name: x
- value: timestamp
- - name: y
- value: interval_year
- return: timestamp
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: interval_year
- return: timestamp_tz
- - args:
- - name: x
- value: date
- - name: y
- value: interval_year
- return: date
- - args:
- - name: x
- value: timestamp
- - name: y
- value: interval_day
- return: timestamp
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: interval_day
- return: timestamp_tz
- - args:
- - name: x
- value: date
- - name: y
- value: interval_day
- return: date
- -
- name: "lte"
- description: less than or equal to
- impls:
- - args:
- - name: x
- value: timestamp
- - name: y
- value: timestamp
- return: boolean
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: timestamp_tz
- return: boolean
- - args:
- - name: x
- value: date
- - name: y
- value: date
- return: boolean
- - args:
- - name: x
- value: interval_day
- - name: y
- value: interval_day
- return: boolean
- - args:
- - name: x
- value: interval_year
- - name: y
- value: interval_year
- return: boolean
- -
- name: "lt"
- description: less than
- impls:
- - args:
- - name: x
- value: timestamp
- - name: y
- value: timestamp
- return: boolean
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: timestamp_tz
- return: boolean
- - args:
- - name: x
- value: date
- - name: y
- value: date
- return: boolean
- - args:
- - name: x
- value: interval_day
- - name: y
- value: interval_day
- return: boolean
- - args:
- - name: x
- value: interval_year
- - name: y
- value: interval_year
- return: boolean
- -
- name: "gte"
- description: greater than or equal to
- impls:
- - args:
- - name: x
- value: timestamp
- - name: y
- value: timestamp
- return: boolean
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: timestamp_tz
- return: boolean
- - args:
- - name: x
- value: date
- - name: y
- value: date
- return: boolean
- - args:
- - name: x
- value: interval_day
- - name: y
- value: interval_day
- return: boolean
- - args:
- - name: x
- value: interval_year
- - name: y
- value: interval_year
- return: boolean
- -
- name: "gt"
- description: greater than
- impls:
- - args:
- - name: x
- value: timestamp
- - name: y
- value: timestamp
- return: boolean
- - args:
- - name: x
- value: timestamp_tz
- - name: y
- value: timestamp_tz
- return: boolean
- - args:
- - name: x
- value: date
- - name: y
- value: date
- return: boolean
- - args:
- - name: x
- value: interval_day
- - name: y
- value: interval_day
- return: boolean
- - args:
- - name: x
- value: interval_year
- - name: y
- value: interval_year
- return: boolean
diff --git a/ibis_substrait/extensions/functions_logarithmic.yaml b/ibis_substrait/extensions/functions_logarithmic.yaml
deleted file mode 100644
index f4b8acc1..00000000
--- a/ibis_substrait/extensions/functions_logarithmic.yaml
+++ /dev/null
@@ -1,147 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: "ln"
- description: "Natural logarithm of the value"
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp64
- -
- name: "log10"
- description: "Logarithm to base 10 of the value"
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp64
- -
- name: "log2"
- description: "Logarithm to base 2 of the value"
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp64
- -
- name: "logb"
- description: >
- Logarithm of the value with the given base
-
- logb(x, b) => log_{b} (x)
- impls:
- - args:
- - value: fp32
- name: "x"
- description: "The number `x` to compute the logarithm of"
- - value: fp32
- name: "base"
- description: "The logarithm base `b` to use"
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp32
- - args:
- - value: fp64
- name: "x"
- description: "The number `x` to compute the logarithm of"
- - value: fp64
- name: "base"
- description: "The logarithm base `b` to use"
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp64
- -
- name: "log1p"
- description: >
- Natural logarithm (base e) of 1 + x
-
- log1p(x) => log(1+x)
- impls:
- - args:
- - name: x
- value: fp32
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp32
- - args:
- - name: x
- value: fp64
- options:
- rounding:
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
- on_domain_error:
- values: [ NAN, ERROR ]
- on_log_zero:
- values: [NAN, ERROR, MINUS_INFINITY]
- return: fp64
diff --git a/ibis_substrait/extensions/functions_rounding.yaml b/ibis_substrait/extensions/functions_rounding.yaml
deleted file mode 100644
index 09309f2c..00000000
--- a/ibis_substrait/extensions/functions_rounding.yaml
+++ /dev/null
@@ -1,270 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: "ceil"
- description: >
- Rounding to the ceiling of the value `x`.
- impls:
- - args:
- - value: fp32
- name: "x"
- return: fp32
- - args:
- - value: fp64
- name: "x"
- return: fp64
- -
- name: "floor"
- description: >
- Rounding to the floor of the value `x`.
- impls:
- - args:
- - value: fp32
- name: "x"
- return: fp32
- - args:
- - value: fp64
- name: "x"
- return: fp64
- -
- name: "round"
- description: >
- Rounding the value `x` to `s` decimal places.
- impls:
- - args:
- - value: i8
- name: "x"
- description: >
- Numerical expression to be rounded.
- - value: i32
- name: "s"
- description: >
- Number of decimal places to be rounded to.
-
- When `s` is a positive number, nothing will happen
- since `x` is an integer value.
-
- When `s` is a negative number, the rounding is
- performed to the nearest multiple of `10^(-s)`.
- options:
- rounding:
- description: >
- When a boundary is computed to lie somewhere between two values,
- and this value cannot be exactly represented, this specifies how
- to round it.
-
- - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
- to the even option.
- - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
- halfway, tie away from zero.
- - TRUNCATE: always round toward zero.
- - CEILING: always round toward positive infinity.
- - FLOOR: always round toward negative infinity.
- - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
- - TIE_DOWN: round ties with FLOOR rule
- - TIE_UP: round ties with CEILING rule
- - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
- - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
- to the odd option.
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
- AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
- nullability: DECLARED_OUTPUT
- return: i8?
- - args:
- - value: i16
- name: "x"
- description: >
- Numerical expression to be rounded.
- - value: i32
- name: "s"
- description: >
- Number of decimal places to be rounded to.
-
- When `s` is a positive number, nothing will happen
- since `x` is an integer value.
-
- When `s` is a negative number, the rounding is
- performed to the nearest multiple of `10^(-s)`.
- options:
- rounding:
- description: >
- When a boundary is computed to lie somewhere between two values,
- and this value cannot be exactly represented, this specifies how
- to round it.
-
- - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
- to the even option.
- - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
- halfway, tie away from zero.
- - TRUNCATE: always round toward zero.
- - CEILING: always round toward positive infinity.
- - FLOOR: always round toward negative infinity.
- - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
- - TIE_DOWN: round ties with FLOOR rule
- - TIE_UP: round ties with CEILING rule
- - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
- - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
- to the odd option.
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
- AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
- nullability: DECLARED_OUTPUT
- return: i16?
- - args:
- - value: i32
- name: "x"
- description: >
- Numerical expression to be rounded.
- - value: i32
- name: "s"
- description: >
- Number of decimal places to be rounded to.
-
- When `s` is a positive number, nothing will happen
- since `x` is an integer value.
-
- When `s` is a negative number, the rounding is
- performed to the nearest multiple of `10^(-s)`.
- options:
- rounding:
- description: >
- When a boundary is computed to lie somewhere between two values,
- and this value cannot be exactly represented, this specifies how
- to round it.
-
- - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
- to the even option.
- - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
- halfway, tie away from zero.
- - TRUNCATE: always round toward zero.
- - CEILING: always round toward positive infinity.
- - FLOOR: always round toward negative infinity.
- - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
- - TIE_DOWN: round ties with FLOOR rule
- - TIE_UP: round ties with CEILING rule
- - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
- - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
- to the odd option.
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
- AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
- nullability: DECLARED_OUTPUT
- return: i32?
- - args:
- - value: i64
- name: "x"
- description: >
- Numerical expression to be rounded.
- - value: i32
- name: "s"
- description: >
- Number of decimal places to be rounded to.
-
- When `s` is a positive number, nothing will happen
- since `x` is an integer value.
-
- When `s` is a negative number, the rounding is
- performed to the nearest multiple of `10^(-s)`.
- options:
- rounding:
- description: >
- When a boundary is computed to lie somewhere between two values,
- and this value cannot be exactly represented, this specifies how
- to round it.
-
- - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
- to the even option.
- - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
- halfway, tie away from zero.
- - TRUNCATE: always round toward zero.
- - CEILING: always round toward positive infinity.
- - FLOOR: always round toward negative infinity.
- - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
- - TIE_DOWN: round ties with FLOOR rule
- - TIE_UP: round ties with CEILING rule
- - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
- - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
- to the odd option.
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
- AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
- nullability: DECLARED_OUTPUT
- return: i64?
- - args:
- - value: fp32
- name: "x"
- description: >
- Numerical expression to be rounded.
- - value: i32
- name: "s"
- description: >
- Number of decimal places to be rounded to.
-
- When `s` is a positive number, the rounding
- is performed to a `s` number of decimal places.
-
- When `s` is a negative number, the rounding is
- performed to the left side of the decimal point
- as specified by `s`.
- options:
- rounding:
- description: >
- When a boundary is computed to lie somewhere between two values,
- and this value cannot be exactly represented, this specifies how
- to round it.
-
- - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
- to the even option.
- - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
- halfway, tie away from zero.
- - TRUNCATE: always round toward zero.
- - CEILING: always round toward positive infinity.
- - FLOOR: always round toward negative infinity.
- - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
- - TIE_DOWN: round ties with FLOOR rule
- - TIE_UP: round ties with CEILING rule
- - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
- - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
- to the odd option.
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
- AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
- nullability: DECLARED_OUTPUT
- return: fp32?
- - args:
- - value: fp64
- name: "x"
- description: >
- Numerical expression to be rounded.
- - value: i32
- name: "s"
- description: >
- Number of decimal places to be rounded to.
-
- When `s` is a positive number, the rounding
- is performed to a `s` number of decimal places.
-
- When `s` is a negative number, the rounding is
- performed to the left side of the decimal point
- as specified by `s`.
- options:
- rounding:
- description: >
- When a boundary is computed to lie somewhere between two values,
- and this value cannot be exactly represented, this specifies how
- to round it.
-
- - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
- to the even option.
- - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
- halfway, tie away from zero.
- - TRUNCATE: always round toward zero.
- - CEILING: always round toward positive infinity.
- - FLOOR: always round toward negative infinity.
- - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
- - TIE_DOWN: round ties with FLOOR rule
- - TIE_UP: round ties with CEILING rule
- - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
- - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
- to the odd option.
- values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
- AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
- nullability: DECLARED_OUTPUT
- return: fp64?
diff --git a/ibis_substrait/extensions/functions_set.yaml b/ibis_substrait/extensions/functions_set.yaml
deleted file mode 100644
index ce02bf32..00000000
--- a/ibis_substrait/extensions/functions_set.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: "index_in"
- description: >
- Checks the membership of a value in a list of values
-
- Returns the first 0-based index value of some input `T` if `T` is equal to
- any element in `List`. Returns `NULL` if not found.
-
- If `T` is `NULL`, returns `NULL`.
-
- If `T` is `NaN`:
- - Returns 0-based index of `NaN` in `List` (default)
- - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified)
- impls:
- - args:
- - name: x
- value: T
- - name: y
- value: List
- options:
- nan_equality:
- values: [ NAN_IS_NAN, NAN_IS_NOT_NAN ]
- nullability: DECLARED_OUTPUT
- return: int64?
diff --git a/ibis_substrait/extensions/functions_string.yaml b/ibis_substrait/extensions/functions_string.yaml
deleted file mode 100644
index 19d594fd..00000000
--- a/ibis_substrait/extensions/functions_string.yaml
+++ /dev/null
@@ -1,1330 +0,0 @@
-%YAML 1.2
----
-scalar_functions:
- -
- name: concat
- description: Concatenate strings.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- variadic:
- min: 1
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- variadic:
- min: 1
- return: "string"
- -
- name: like
- description: >-
- Are two strings like each other.
-
- The `case_sensitivity` option applies to the `match` argument.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "match"
- description: The string to match against the input string.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "match"
- description: The string to match against the input string.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- -
- name: substring
- description: >-
- Extract a substring of a specified `length` starting from position `start`.
- A `start` value of 1 refers to the first characters of the string.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- - value: i32
- name: "start"
- - value: i32
- name: "length"
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- - value: i32
- name: "start"
- - value: i32
- name: "length"
- return: "string"
- - args:
- - value: "fixedchar"
- name: "input"
- - value: i32
- name: "start"
- - value: i32
- name: "length"
- return: "string"
- -
- name: regexp_match_substring
- description: >-
- Extract a substring that matches the given regular expression pattern. The regular expression
- pattern should follow the International Components for Unicode implementation
- (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The occurrence of the
- pattern to be extracted is specified using the `occurrence` argument. Specifying `1` means
- the first occurrence will be extracted, `2` means the second occurrence, and so on.
- The `occurrence` argument should be a positive non-zero integer. The number of characters
- from the beginning of the string to begin starting to search for pattern matches can be
- specified using the `position` argument. Specifying `1` means to search for matches
- starting at the first character of the input string, `2` means the second character, and so
- on. The `position` argument should be a positive non-zero integer.
-
- The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
- Enabling the `multiline` option will treat the input string as multiple lines. This makes
- the `^` and `$` characters match at the beginning and end of any line, instead of just the
- beginning and end of the input string. Enabling the `dotall` option makes the `.` character
- match line terminator characters in a string.
-
- Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or
- the position value is out of range.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- - value: "varchar"
- name: "pattern"
- - value: i64
- name: "position"
- - value: i64
- name: "occurrence"
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- - value: "string"
- name: "pattern"
- - value: i64
- name: "position"
- - value: i64
- name: "occurrence"
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: "string"
- -
- name: starts_with
- description: >-
- Whether the `input` string starts with the `substring`.
-
- The `case_sensitivity` option applies to the `substring` argument.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- -
- name: ends_with
- description: >-
- Whether `input` string ends with the substring.
-
- The `case_sensitivity` option applies to the `substring` argument.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- -
- name: contains
- description: >-
- Whether the `input` string contains the `substring`.
-
- The `case_sensitivity` option applies to the `substring` argument.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "BOOLEAN"
- -
- name: strpos
- description: >-
- Return the position of the first occurrence of a string in another string. The first
- character of the string is at position 1. If no occurrence is found, 0 is returned.
-
- The `case_sensitivity` option applies to the `substring` argument.
- impls:
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: i64
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: i64
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to search for.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: i64
- -
- name: regexp_strpos
- description: >-
- Return the position of an occurrence of the given regular expression pattern in a
- string. The first character of the string is at position 1. The regular expression pattern
- should follow the International Components for Unicode implementation
- (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The number of characters
- from the beginning of the string to begin starting to search for pattern matches can be
- specified using the `position` argument. Specifying `1` means to search for matches
- starting at the first character of the input string, `2` means the second character, and so
- on. The `position` argument should be a positive non-zero integer. Which occurrence to
- return the position of is specified using the `occurrence` argument. Specifying `1` means
- the position first occurrence will be returned, `2` means the position of the second
- occurrence, and so on. The `occurrence` argument should be a positive non-zero integer. If
- no occurrence is found, 0 is returned.
-
- The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
- Enabling the `multiline` option will treat the input string as multiple lines. This makes
- the `^` and `$` characters match at the beginning and end of any line, instead of just the
- beginning and end of the input string. Enabling the `dotall` option makes the `.` character
- match line terminator characters in a string.
-
- Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or
- the position value is out of range.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- - value: "varchar"
- name: "pattern"
- - value: i64
- name: "position"
- - value: i64
- name: "occurrence"
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: i64
- - args:
- - value: "string"
- name: "input"
- - value: "string"
- name: "pattern"
- - value: i64
- name: "position"
- - value: i64
- name: "occurrence"
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: i64
- -
- name: count_substring
- description: >-
- Return the number of non-overlapping occurrences of a substring in an input string.
-
- The `case_sensitivity` option applies to the `substring` argument.
- impls:
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "substring"
- description: The substring to count.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: i64
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "substring"
- description: The substring to count.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: i64
- - args:
- - value: "fixedchar"
- name: "input"
- description: The input string.
- - value: "fixedchar"
- name: "substring"
- description: The substring to count.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: i64
- -
- name: regexp_count_substring
- description: >-
- Return the number of non-overlapping occurrences of a regular expression pattern in an input
- string. The regular expression pattern should follow the International Components for
- Unicode implementation (https://unicode-org.github.io/icu/userguide/strings/regexp.html).
- The number of characters from the beginning of the string to begin starting to search for
- pattern matches can be specified using the `position` argument. Specifying `1` means to
- search for matches starting at the first character of the input string, `2` means the
- second character, and so on. The `position` argument should be a positive non-zero integer.
-
- The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
- Enabling the `multiline` option will treat the input string as multiple lines. This makes
- the `^` and `$` characters match at the beginning and end of any line, instead of just the
- beginning and end of the input string. Enabling the `dotall` option makes the `.` character
- match line terminator characters in a string.
-
- Behavior is undefined if the regex fails to compile or the position value is out of range.
- impls:
- - args:
- - value: "string"
- name: "input"
- - value: "string"
- name: "pattern"
- - value: i64
- name: "position"
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: i64
- - args:
- - value: "varchar"
- name: "input"
- - value: "varchar"
- name: "pattern"
- - value: i64
- name: "position"
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: i64
- - args:
- - value: "fixedchar"
- name: "input"
- - value: "fixedchar"
- name: "pattern"
- - value: i64
- name: "position"
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: i64
- -
- name: replace
- description: >-
- Replace all occurrences of the substring with the replacement string.
-
- The `case_sensitivity` option applies to the `substring` argument.
- impls:
- - args:
- - value: "string"
- name: "input"
- description: Input string.
- - value: "string"
- name: "substring"
- description: The substring to replace.
- - value: "string"
- name: "replacement"
- description: The replacement string.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- description: Input string.
- - value: "varchar"
- name: "substring"
- description: The substring to replace.
- - value: "varchar"
- name: "replacement"
- description: The replacement string.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- return: "varchar"
- -
- name: concat_ws
- description: Concatenate strings together separated by a separator.
- impls:
- - args:
- - value: "string"
- name: "separator"
- description: Character to separate strings by.
- - value: "string"
- name: "string_arguments"
- description: Strings to be concatenated.
- variadic:
- min: 1
- return: "string"
- - args:
- - value: "varchar"
- name: "separator"
- description: Character to separate strings by.
- - value: "varchar"
- name: "string_arguments"
- description: Strings to be concatenated.
- variadic:
- min: 1
- return: "varchar"
- -
- name: repeat
- description: Repeat a string `count` number of times.
- impls:
- - args:
- - value: "string"
- name: "input"
- - value: i64
- name: "count"
- return: "string"
- - args:
- - value: "varchar"
- - value: i64
- name: "input"
- - value: i64
- name: "count"
- return: "varchar"
- -
- name: reverse
- description: Returns the string in reverse order.
- impls:
- - args:
- - value: "string"
- name: "input"
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- return: "varchar"
- - args:
- - value: "fixedchar"
- name: "input"
- return: "fixedchar"
- -
- name: replace_slice
- description: >-
- Replace a slice of the input string. A specified 'length' of characters will be deleted from
- the input string beginning at the 'start' position and will be replaced by a new string. A
- start value of 1 indicates the first character of the input string. If start is negative
- or zero, or greater than the length of the input string, a null string is returned. If 'length'
- is negative, a null string is returned. If 'length' is zero, inserting of the new string
- occurs at the specified 'start' position and no characters are deleted. If 'length' is
- greater than the input string, deletion will occur up to the last character of the input string.
- impls:
- - args:
- - value: "string"
- name: "input"
- description: Input string.
- - value: i64
- name: "start"
- description: The position in the string to start deleting/inserting characters.
- - value: i64
- name: "length"
- description: The number of characters to delete from the input string.
- - value: "string"
- name: "replacement"
- description: The new string to insert at the start position.
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- description: Input string.
- - value: i64
- name: "start"
- description: The position in the string to start deleting/inserting characters.
- - value: i64
- name: "length"
- description: The number of characters to delete from the input string.
- - value: "varchar"
- name: "replacement"
- description: The new string to insert at the start position.
- return: "varchar"
- -
- name: lower
- description: >-
- Transform the string to lower case characters. Implementation should follow the utf8_unicode_ci
- collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/.
- impls:
- - args:
- - value: "string"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "varchar"
- - args:
- - value: "fixedchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "fixedchar"
- -
- name: upper
- description: >-
- Transform the string to upper case characters. Implementation should follow the utf8_unicode_ci
- collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/.
- impls:
- - args:
- - value: "string"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "varchar"
- - args:
- - value: "fixedchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "fixedchar"
- -
- name: swapcase
- description: >-
- Transform the string's lowercase characters to uppercase and uppercase characters to
- lowercase. Implementation should follow the utf8_unicode_ci collations according to the
- Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/.
- impls:
- - args:
- - value: "string"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "varchar"
- - args:
- - value: "fixedchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "fixedchar"
- -
- name: capitalize
- description: >-
- Capitalize the first character of the input string. Implementation should follow the
- utf8_unicode_ci collations according to the Unicode Collation Algorithm described at
- http://www.unicode.org/reports/tr10/.
- impls:
- - args:
- - value: "string"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "varchar"
- - args:
- - value: "fixedchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "fixedchar"
- -
- name: title
- description: >-
- Converts the input string into titlecase. Capitalize the first character of each word in the
- input string except for articles (a, an, the). Implementation should follow the
- utf8_unicode_ci collations according to the Unicode Collation Algorithm described at
- http://www.unicode.org/reports/tr10/.
- impls:
- - args:
- - value: "string"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "varchar"
- - args:
- - value: "fixedchar"
- name: "input"
- options:
- char_set:
- values: [ UTF8, ASCII_ONLY ]
- return: "fixedchar"
- -
- name: char_length
- description: >-
- Return the number of characters in the input string. The length includes trailing spaces.
- impls:
- - args:
- - value: "string"
- name: "input"
- return: i64
- - args:
- - value: "varchar"
- name: "input"
- return: i64
- - args:
- - value: "fixedchar"
- name: "input"
- return: i64
- -
- name: bit_length
- description: Return the number of bits in the input string.
- impls:
- - args:
- - value: "string"
- name: "input"
- return: i64
- - args:
- - value: "varchar"
- name: "input"
- return: i64
- - args:
- - value: "fixedchar"
- name: "input"
- return: i64
- -
- name: octet_length
- description: Return the number of bytes in the input string.
- impls:
- - args:
- - value: "string"
- name: "input"
- return: i64
- - args:
- - value: "varchar"
- name: "input"
- return: i64
- - args:
- - value: "fixedchar"
- name: "input"
- return: i64
- -
- name: regexp_replace
- description: >-
- Search a string for a substring that matches a given regular expression pattern and replace
- it with a replacement string. The regular expression pattern should follow the
- International Components for Unicode implementation (https://unicode-org.github
- .io/icu/userguide/strings/regexp.html). The occurrence of the pattern to be replaced is
- specified using the `occurrence` argument. Specifying `1` means only the first occurrence
- will be replaced, `2` means the second occurrence, and so on. Specifying `0` means all
- occurrences will be replaced. The number of characters from the beginning of the string to
- begin starting to search for pattern matches can be specified using the `position` argument.
- Specifying `1` means to search for matches starting at the first character of the input
- string, `2` means the second character, and so on. The `position` argument should be a
- positive non-zero integer. The replacement string can capture groups using numbered
- backreferences.
-
- The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
- Enabling the `multiline` option will treat the input string as multiple lines. This makes
- the `^` and `$` characters match at the beginning and end of any line, instead of just the
- beginning and end of the input string. Enabling the `dotall` option makes the `.` character
- match line terminator characters in a string.
-
- Behavior is undefined if the regex fails to compile, the replacement contains an illegal
- back-reference, the occurrence value is out of range, or the position value is out of range.
- impls:
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "pattern"
- description: The regular expression to search for within the input string.
- - value: "string"
- name: "replacement"
- description: The replacement string.
- - value: i64
- name: "position"
- description: The position to start the search.
- - value: i64
- name: "occurrence"
- description: Which occurrence of the match to replace.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: "string"
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "pattern"
- description: The regular expression to search for within the input string.
- - value: "varchar"
- name: "replacement"
- description: The replacement string.
- - value: i64
- name: "position"
- description: The position to start the search.
- - value: i64
- name: "occurrence"
- description: Which occurrence of the match to replace.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: "varchar"
- -
- name: ltrim
- description: >-
- Remove any occurrence of the characters from the left side of the string.
- If no characters are specified, spaces are removed.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: "The string to remove characters from."
- - value: "varchar"
- name: "characters"
- description: "The set of characters to remove."
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- description: "The string to remove characters from."
- - value: "string"
- name: "characters"
- description: "The set of characters to remove."
- return: "string"
- -
- name: rtrim
- description: >-
- Remove any occurrence of the characters from the right side of the string.
- If no characters are specified, spaces are removed.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: "The string to remove characters from."
- - value: "varchar"
- name: "characters"
- description: "The set of characters to remove."
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- description: "The string to remove characters from."
- - value: "string"
- name: "characters"
- description: "The set of characters to remove."
- return: "string"
- -
- name: trim
- description: >-
- Remove any occurrence of the characters from the left and right sides of
- the string. If no characters are specified, spaces are removed.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: "The string to remove characters from."
- - value: "varchar"
- name: "characters"
- description: "The set of characters to remove."
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- description: "The string to remove characters from."
- - value: "string"
- name: "characters"
- description: "The set of characters to remove."
- return: "string"
- -
- name: lpad
- description: >-
- Left-pad the input string with the string of 'characters' until the specified length of the
- string has been reached. If the input string is longer than 'length', remove characters from
- the right-side to shorten it to 'length' characters. If the string of 'characters' is longer
- than the remaining 'length' needed to be filled, only pad until 'length' has been reached.
- If 'characters' is not specified, the default value is a single space.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: "The string to pad."
- - value: i32
- name: "length"
- description: "The length of the output string."
- - value: "varchar"
- name: "characters"
- description: "The string of characters to use for padding."
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- description: "The string to pad."
- - value: i32
- name: "length"
- description: "The length of the output string."
- - value: "string"
- name: "characters"
- description: "The string of characters to use for padding."
- return: "string"
- -
- name: rpad
- description: >-
- Right-pad the input string with the string of 'characters' until the specified length of the
- string has been reached. If the input string is longer than 'length', remove characters from
- the left-side to shorten it to 'length' characters. If the string of 'characters' is longer
- than the remaining 'length' needed to be filled, only pad until 'length' has been reached.
- If 'characters' is not specified, the default value is a single space.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: "The string to pad."
- - value: i32
- name: "length"
- description: "The length of the output string."
- - value: "varchar"
- name: "characters"
- description: "The string of characters to use for padding."
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- description: "The string to pad."
- - value: i32
- name: "length"
- description: "The length of the output string."
- - value: "string"
- name: "characters"
- description: "The string of characters to use for padding."
- return: "string"
- -
- name: center
- description: >-
- Center the input string by padding the sides with a single `character` until the specified
- `length` of the string has been reached. By default, if the `length` will be reached with
- an uneven number of padding, the extra padding will be applied to the right side.
- The side with extra padding can be controlled with the `padding` option.
-
- Behavior is undefined if the number of characters passed to the `character` argument is not 1.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: "The string to pad."
- - value: i32
- name: "length"
- description: "The length of the output string."
- - value: "varchar<1>"
- name: "character"
- description: "The character to use for padding."
- options:
- padding:
- values: [ RIGHT, LEFT ]
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- description: "The string to pad."
- - value: i32
- name: "length"
- description: "The length of the output string."
- - value: "string"
- name: "character"
- description: "The character to use for padding."
- options:
- padding:
- values: [ RIGHT, LEFT ]
- return: "string"
- -
- name: left
- description: Extract `count` characters starting from the left of the string.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- - value: i32
- name: "count"
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- - value: i32
- name: "count"
- return: "string"
- -
- name: right
- description: Extract `count` characters starting from the right of the string.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- - value: i32
- name: "count"
- return: "varchar"
- - args:
- - value: "string"
- name: "input"
- - value: i32
- name: "count"
- return: "string"
- -
- name: string_split
- description: >-
- Split a string into a list of strings, based on a specified `separator` character.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "separator"
- description: A character used for splitting the string.
- return: "List>"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "separator"
- description: A character used for splitting the string.
- return: "List"
- -
- name: regexp_string_split
- description: >-
- Split a string into a list of strings, based on a regular expression pattern. The
- substrings matched by the pattern will be used as the separators to split the input
- string and will not be included in the resulting list. The regular expression
- pattern should follow the International Components for Unicode implementation
- (https://unicode-org.github.io/icu/userguide/strings/regexp.html).
-
- The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
- Enabling the `multiline` option will treat the input string as multiple lines. This makes
- the `^` and `$` characters match at the beginning and end of any line, instead of just the
- beginning and end of the input string. Enabling the `dotall` option makes the `.` character
- match line terminator characters in a string.
- impls:
- - args:
- - value: "varchar"
- name: "input"
- description: The input string.
- - value: "varchar"
- name: "pattern"
- description: The regular expression to search for within the input string.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: "List>"
- - args:
- - value: "string"
- name: "input"
- description: The input string.
- - value: "string"
- name: "pattern"
- description: The regular expression to search for within the input string.
- options:
- case_sensitivity:
- values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
- multiline:
- values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
- dotall:
- values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
- return: "List"
-
-aggregate_functions:
-
- -
- name: string_agg
- description: Concatenates a column of string values with a separator.
- impls:
- - args:
- - value: "string"
- name: "input"
- description: "Column of string values."
- - value: "string"
- name: "separator"
- constant: true
- description: "Separator for concatenated strings"
- ordered: true
- return: "string"
diff --git a/ibis_substrait/extensions/type_variations.yaml b/ibis_substrait/extensions/type_variations.yaml
deleted file mode 100644
index f6f96d50..00000000
--- a/ibis_substrait/extensions/type_variations.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-%YAML 1.2
----
-type_variations:
- - parent: string
- name: dict4
- description: a four-byte dictionary encoded string
- functions: INHERITS
- - parent: string
- name: bigoffset
- description: >-
- The arrow large string representation of strings, still restricted to the default string size defined in
- Substrait.
- functions: SEPARATE
- - parent: struct
- name: avro
- description: an avro encoded struct
- functions: SEPARATE
- - parent: struct
- name: cstruct
- description: a cstruct representation of the struct
- functions: SEPARATE
- - parent: struct
- name: dict2
- description: a 2-byte dictionary encoded string.
- functions: INHERITS
diff --git a/ibis_substrait/extensions/unknown.yaml b/ibis_substrait/extensions/unknown.yaml
deleted file mode 100644
index 3b0e6c1e..00000000
--- a/ibis_substrait/extensions/unknown.yaml
+++ /dev/null
@@ -1,66 +0,0 @@
-%YAML 1.2
----
-types:
- - name: unknown
-scalar_functions:
- - name: "add"
- impls:
- - args:
- - value: unknown
- - value: unknown
- return: unknown
- - name: "subtract"
- impls:
- - args:
- - value: unknown
- - value: unknown
- return: unknown
- - name: "multiply"
- impls:
- - args:
- - value: unknown
- - value: unknown
- return: unknown
- - name: "divide"
- impls:
- - args:
- - value: unknown
- - value: unknown
- return: unknown
- - name: "modulus"
- impls:
- - args:
- - value: unknown
- - value: unknown
- return: unknown
-aggregate_functions:
- - name: "sum"
- impls:
- - args:
- - value: unknown
- intermediate: unknown
- return: unknown
- - name: "avg"
- impls:
- - args:
- - value: unknown
- intermediate: unknown
- return: unknown
- - name: "min"
- impls:
- - args:
- - value: unknown
- intermediate: unknown
- return: unknown
- - name: "max"
- impls:
- - args:
- - value: unknown
- intermediate: unknown
- return: unknown
- - name: "count"
- impls:
- - args:
- - value: unknown
- intermediate: unknown
- return: unknown
diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json
index 8b91be74..4c9436e0 100644
--- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json
+++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h08/tpc_h08.json
@@ -1471,7 +1471,9 @@
"scalarFunction": {
"functionReference": 8,
"outputType": {
- "fp64": {
+ "decimal": {
+ "scale": 2,
+ "precision": 38,
"nullability": "NULLABILITY_NULLABLE"
}
},
diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json
index b1da788f..a02b5b89 100644
--- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json
+++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h14/tpc_h14.json
@@ -971,7 +971,9 @@
"scalarFunction": {
"functionReference": 9,
"outputType": {
- "fp64": {
+ "decimal": {
+ "scale": 2,
+ "precision": 38,
"nullability": "NULLABILITY_NULLABLE"
}
},
diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json
index a49d826b..fa574348 100644
--- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json
+++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h17/tpc_h17.json
@@ -1011,7 +1011,9 @@
"scalarFunction": {
"functionReference": 7,
"outputType": {
- "fp64": {
+ "decimal": {
+ "scale": 2,
+ "precision": 38,
"nullability": "NULLABILITY_NULLABLE"
}
},
diff --git a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json
index 2393c8d1..893ff9e1 100644
--- a/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json
+++ b/ibis_substrait/tests/compiler/snapshots/test_tpch/test_compile/tpc_h22/tpc_h22.json
@@ -556,13 +556,25 @@
"arguments": [
{
"value": {
- "selection": {
- "directReference": {
- "structField": {
- "field": 5
+ "cast": {
+ "type": {
+ "decimal": {
+ "scale": 2,
+ "precision": 15,
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ "input": {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 5
+ }
+ },
+ "rootReference": {}
}
},
- "rootReference": {}
+ "failureBehavior": "FAILURE_BEHAVIOR_THROW_EXCEPTION"
}
}
}
diff --git a/ibis_substrait/tests/compiler/test_extensions.py b/ibis_substrait/tests/compiler/test_extensions.py
index 2aceb4e8..af9b25ee 100644
--- a/ibis_substrait/tests/compiler/test_extensions.py
+++ b/ibis_substrait/tests/compiler/test_extensions.py
@@ -332,11 +332,11 @@ def test_extension_register_uri_override(tmp_path):
register_extension_yaml(yaml_file, uri="orkbork")
assert _extension_mapping["anotheradd"]
- assert _extension_mapping["anotheradd"][("a", "b")].uri == "orkbork"
+ assert _extension_mapping["anotheradd"][(("a", "b"), "c")].uri == "orkbork"
register_extension_yaml(yaml_file, prefix="orkbork")
assert _extension_mapping["anotheradd"]
- assert _extension_mapping["anotheradd"][("a", "b")].uri == "orkbork/foo.yaml"
+ assert _extension_mapping["anotheradd"][(("a", "b"), "c")].uri == "orkbork/foo.yaml"
def test_extension_arithmetic_multiple_signatures(compiler):
@@ -388,13 +388,14 @@ def test_extension_round_upcast(compiler, col_dtype, digits_dtype):
def test_ops_mapping_validity():
+ from ibis_substrait.compiler import translate
from ibis_substrait.compiler.mapping import (
IBIS_SUBSTRAIT_OP_MAPPING,
_extension_mapping,
)
for op in IBIS_SUBSTRAIT_OP_MAPPING.keys():
- assert hasattr(ops, op)
+ assert hasattr(ops, op) or hasattr(translate, op)
# `any` isn't a valid mapping
for target in IBIS_SUBSTRAIT_OP_MAPPING.values():
diff --git a/pyproject.toml b/pyproject.toml
index fba946bc..67aba211 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,6 +111,7 @@ ignore = [
"SIM117", # nested withs
"SIM118", # remove .keys() calls from dictionaries
"UP006", # use collections.deque instead of Deque for type annotation
+ "UP007", # Optional[str] -> str | None
]
[tool.ruff.lint.per-file-ignores]