diff --git a/docs/spec.md b/docs/spec.md index e07dcbc0d8..6cfa71a019 100644 --- a/docs/spec.md +++ b/docs/spec.md @@ -6691,13 +6691,18 @@ of the expressed type into corresponding integer values of the storage type using the zero point and scale associated with the quantized element type. ```python -def quantize(x: Value, type: Type) -> Value: - assert is_float(x) and is_quantized(type) - x_expressed_rounded = round_nearest_even(x / compute_scales(type, type(x))) - x_storage_rounded = convert(x_expressed_rounded, storage_type(type)) - x_storage_add = x_storage_rounded + compute_zero_points(type, type(x_storage_rounded)) - x_storage = clamp(storage_min(type), x_storage_add, storage_max(type)) - return bitcast_convert(x_storage, type) +def quantize(x: Value, result_type: Type) -> Value: + assert is_float(x) and is_quantized(result_type) + zero_points = compute_zero_points(result_type, TensorType(shape(x), storage_type(result_type))) + converted_zero_points = convert(zero_points, expressed_type(result_type)) + converted_min = convert(storage_min(result_type), expressed_type(result_type)) + converted_max = convert(storage_max(result_type), expressed_type(result_type)) + + x_scaled = x / compute_scales(result_type, type(x)) + x_scaled_add_zp = x_scaled + converted_zero_points + x_clamped = clamp(converted_min, x_scaled_add_zp, converted_max) + x_rounded = round_nearest_even(x_clamped) + return convert(x_rounded, result_type) ``` * `dequantize_op_quantize` is used to specify element-wise computations on