Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Yao <[email protected]>
  • Loading branch information
yuanyao-nv committed Aug 7, 2024
1 parent 0192fe7 commit f4ca510
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 96 deletions.
66 changes: 37 additions & 29 deletions docs/Operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -3747,7 +3747,9 @@ for from_type, to_type in test_cases:
raise ValueError(
f"Conversion from {from_type} to {to_type} is not tested."
)
expected = vect_evaluate_float4e2m1_from_bits(subbyte.float32_to_float4e2m1_unpacked(np_fp32))
expected = vect_evaluate_float4e2m1_from_bits(
subbyte.float32_to_float4e2m1_unpacked(np_fp32)
)
output = make_tensor(
"y", getattr(TensorProto, to_type), input_shape, expected.tolist()
)
Expand Down Expand Up @@ -20767,45 +20769,33 @@ expect(


<details>
<summary>e2m1</summary>
<summary>e4m3fn</summary>

```python
node = onnx.helper.make_node(
"QuantizeLinear",
inputs=["x", "y_scale", "y_zero_point"],
outputs=["y"],
axis=0,
)

x = np.array(
[
[0.0, 2.5, 4.8, 8.6],
[-30, -20, 6, 9],
[-0.0, -2.5, -4.8, -8.6],
]
).astype(np.float32)

y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
y_zero_point = make_tensor(
"y_zero_point", TensorProto.FLOAT4E2M1, y_scale.shape, np.zeros_like(y_scale)
)
y = make_tensor(
"y", TensorProto.FLOAT4E2M1, x.shape, [0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2]
)
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
y_scale = np.float32(2)
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])

expect(
node,
inputs=[x, y_scale, y_zero_point],
outputs=[y],
name="test_quantizelinear_float4e2m1",
name="test_quantizelinear_e4m3fn",
)
```

</details>


<details>
<summary>e4m3fn</summary>
<summary>e5m2</summary>

```python
node = onnx.helper.make_node(
Expand All @@ -20816,40 +20806,58 @@ node = onnx.helper.make_node(

x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
y_scale = np.float32(2)
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])

expect(
node,
inputs=[x, y_scale, y_zero_point],
outputs=[y],
name="test_quantizelinear_e4m3fn",
name="test_quantizelinear_e5m2",
)
```

</details>


<details>
<summary>e5m2</summary>
<summary>float4e2m1</summary>

```python
node = onnx.helper.make_node(
"QuantizeLinear",
inputs=["x", "y_scale", "y_zero_point"],
outputs=["y"],
axis=0,
)

x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
y_scale = np.float32(2)
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])
x = np.array(
[
[0.0, 2.5, 4.8, 8.6],
[-30, -20, 6, 9],
[-0.0, -2.5, -4.8, -8.6],
]
).astype(np.float32)

y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
y_zero_point = make_tensor(
"y_zero_point",
TensorProto.FLOAT4E2M1,
y_scale.shape,
np.zeros_like(y_scale),
)
y = make_tensor(
"y",
TensorProto.FLOAT4E2M1,
x.shape,
[0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2],
)

expect(
node,
inputs=[x, y_scale, y_zero_point],
outputs=[y],
name="test_quantizelinear_e5m2",
name="test_quantizelinear_float4e2m1",
)
```

Expand Down
66 changes: 37 additions & 29 deletions docs/TestCoverage.md
Original file line number Diff line number Diff line change
Expand Up @@ -2616,7 +2616,9 @@ for from_type, to_type in test_cases:
raise ValueError(
f"Conversion from {from_type} to {to_type} is not tested."
)
expected = vect_evaluate_float4e2m1_from_bits(subbyte.float32_to_float4e2m1_unpacked(np_fp32))
expected = vect_evaluate_float4e2m1_from_bits(
subbyte.float32_to_float4e2m1_unpacked(np_fp32)
)
output = make_tensor(
"y", getattr(TensorProto, to_type), input_shape, expected.tolist()
)
Expand Down Expand Up @@ -14184,43 +14186,31 @@ expect(

</details>
<details>
<summary>e2m1</summary>
<summary>e4m3fn</summary>

```python
node = onnx.helper.make_node(
"QuantizeLinear",
inputs=["x", "y_scale", "y_zero_point"],
outputs=["y"],
axis=0,
)

x = np.array(
[
[0.0, 2.5, 4.8, 8.6],
[-30, -20, 6, 9],
[-0.0, -2.5, -4.8, -8.6],
]
).astype(np.float32)

y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
y_zero_point = make_tensor(
"y_zero_point", TensorProto.FLOAT4E2M1, y_scale.shape, np.zeros_like(y_scale)
)
y = make_tensor(
"y", TensorProto.FLOAT4E2M1, x.shape, [0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2]
)
x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
y_scale = np.float32(2)
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])

expect(
node,
inputs=[x, y_scale, y_zero_point],
outputs=[y],
name="test_quantizelinear_float4e2m1",
name="test_quantizelinear_e4m3fn",
)
```

</details>
<details>
<summary>e4m3fn</summary>
<summary>e5m2</summary>

```python
node = onnx.helper.make_node(
Expand All @@ -14231,38 +14221,56 @@ node = onnx.helper.make_node(

x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
y_scale = np.float32(2)
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E4M3FN, [1], [0])
y = make_tensor("y", TensorProto.FLOAT8E4M3FN, [5], [0, 0.5, 1, 448, 96])
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])

expect(
node,
inputs=[x, y_scale, y_zero_point],
outputs=[y],
name="test_quantizelinear_e4m3fn",
name="test_quantizelinear_e5m2",
)
```

</details>
<details>
<summary>e5m2</summary>
<summary>float4e2m1</summary>

```python
node = onnx.helper.make_node(
"QuantizeLinear",
inputs=["x", "y_scale", "y_zero_point"],
outputs=["y"],
axis=0,
)

x = np.array([0.0, 1.0, 2.0, 100000.0, 200.0]).astype(np.float32)
y_scale = np.float32(2)
y_zero_point = make_tensor("y_zero_point", TensorProto.FLOAT8E5M2, [1], [0.0])
y = make_tensor("y", TensorProto.FLOAT8E5M2, [5], [0, 0.5, 1, 49152, 96])
x = np.array(
[
[0.0, 2.5, 4.8, 8.6],
[-30, -20, 6, 9],
[-0.0, -2.5, -4.8, -8.6],
]
).astype(np.float32)

y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
y_zero_point = make_tensor(
"y_zero_point",
TensorProto.FLOAT4E2M1,
y_scale.shape,
np.zeros_like(y_scale),
)
y = make_tensor(
"y",
TensorProto.FLOAT4E2M1,
x.shape,
[0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2],
)

expect(
node,
inputs=[x, y_scale, y_zero_point],
outputs=[y],
name="test_quantizelinear_e5m2",
name="test_quantizelinear_float4e2m1",
)
```

Expand Down
6 changes: 3 additions & 3 deletions docs/docsgen/source/technical/float4.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ The float value is defined by the following expressions:
:header-rows: 1
* -
- E4M3FN
- E2M1
* - exponent :math:`\neq` 0
- :math:`(-1)^S 2^{\sum_{i=1}^2 b_i 2^{i-1} - 1} \left( 1 + b_0 2^{-1} \right)`
* - exponent :math:`=` 0
Expand Down Expand Up @@ -108,8 +108,8 @@ The behavior for downcasting to float 4 is summarzied below
## Packing and Unpacking

Float4 is stored as 2x4bit in a single byte.
The first element is stored in the 4 LSB and the second element is stored in the 4 MSB.
i.e. for elements x, y, that are consecutive elements in the array:
The first element is stored in the 4 LSB and the second element is stored in the 4 MSB,
i.e. for elements `x` and `y` that are consecutive elements in the array:
```
pack(x,y): y << 4 | x & 0x0F
unpack(z): x = z & 0x0F, y = z >> 4
Expand Down
3 changes: 3 additions & 0 deletions onnx/_custom_element_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@
#: than its onnx size.
int4 = np.dtype((np.int8, {"int4": (np.int8, 0)}))

#: Defines float 4 e2m1 type, see See :ref:`onnx-detail-float4` for technical details.
#: Do note that one integer is stored using a byte and therefore is twice bigger
#: than its onnx size.
float4e2m1 = np.dtype((np.uint8, {"float4e2m1": (np.uint8, 0)}))

mapping_name_to_data_type = {
Expand Down
2 changes: 1 addition & 1 deletion onnx/backend/test/case/node/quantizelinear.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def export_int4() -> None:
)

@staticmethod
def export_e2m1() -> None:
def export_float4e2m1() -> None:
node = onnx.helper.make_node(
"QuantizeLinear",
inputs=["x", "y_scale", "y_zero_point"],
Expand Down
8 changes: 3 additions & 5 deletions onnx/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,15 +673,14 @@ def pack_float32_to_4bit(array: np.ndarray | Sequence, signed: bool) -> np.ndarr


def pack_float32_to_float4e2m1(array: np.ndarray | Sequence) -> np.ndarray:
"""Convert an array of float32 value to a 4bit data-type and pack every two concecutive elements in a byte.
See :ref:`onnx-detail-int4` for technical details.
"""Convert an array of float32 value to float4e2m1 and pack every two concecutive elements in a byte.
See :ref:`onnx-detail-float4` for technical details.
Args:
array: array of float to convert and pack
signed: Whether the 4 bit variant is signed or unsigned
Returns:
Packed array with size `ceil(farray.size/2)` (single dimension).
Packed array of float4e2m1 (as uint8) with size `ceil(farray.size/2)` (single dimension).
"""
if not isinstance(array, np.ndarray):
array = np.asarray(array, dtype=np.float32)
Expand Down Expand Up @@ -757,7 +756,6 @@ def make_tensor(
data_type in (TensorProto.UINT4, TensorProto.INT4, TensorProto.FLOAT4E2M1)
and len(vals) == expected_size + 0.5
):
print("$$$$$$", data_type, vals, len(vals), expected_size, dims)
raise ValueError(
f"Number of values does not match tensor's size. Expected {expected_size}, but it is {len(vals)}. "
)
Expand Down
7 changes: 3 additions & 4 deletions onnx/numpy_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def unpack_int4(
return res


def evaluate_float4e2m1_from_bits(x):
def evaluate_float4e2m1_from_bits(x: np.uint8) -> np.float32:
"""Evaluate the numerical value of a single float4e2m1 element represented as uint8
See :ref:`onnx-detail-int4` for technical details.
Expand All @@ -232,7 +232,6 @@ def evaluate_float4e2m1_from_bits(x):
Packed array with size `ceil(farray.size/2)` (single dimension).
"""
# x is stored in 4 LSB of int
# assert(isinstance(x, np.uint8))
S = -1 if bool(x & 0x08) else 1
M = x & 0x01
E = (x & 0x06) >> 1
Expand Down Expand Up @@ -509,8 +508,8 @@ def to_array(tensor: TensorProto, base_dir: str = "") -> np.ndarray:
data = tensor.int32_data
shape = tuple(tensor.dims)

# 2 packed fp4e2m1 elements must be represented as a single uint8 value.
# Therefore, y is np.uint8 (not the dtype to which the int4 maps)
# 2 packed float4e2m1 elements must be represented as a single uint8 value.
# Therefore, y is np.uint8.
y = np.empty(len(data), dtype=custom_np_types.float4e2m1).ravel() # type: ignore[assignment]
for i, d in enumerate(data):
y[i] = d
Expand Down
8 changes: 2 additions & 6 deletions onnx/reference/ops/op_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,8 @@ def cast_to(x, to, saturate): # noqa: PLR0911
return res.astype(np.float16)

if to == TensorProto.FLOAT4E2M1:
xf = x.astype(np.float32).ravel()
y = np.empty(xf.shape, dtype=float4e2m1).ravel()
for i in range(y.shape[0]):
el = subbyte.float32_to_float4e2m1_unpacked(xf[i])
y[i] = el
# This operator preduces a tensor with the same shape for INT4.
xf = x.astype(np.float32)
y = subbyte.float32_to_float4e2m1_unpacked(xf)
return y.reshape(x.shape)

if to == TensorProto.STRING:
Expand Down
8 changes: 2 additions & 6 deletions onnx/reference/ops/op_dequantize_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,8 @@ def _run(
elif x_type == TensorProto.FLOAT8E5M2FNUZ:
dx = float8e5m2_to_float32(x, fn=True, uz=True)
elif x_type == TensorProto.FLOAT4E2M1:
x_shape = x.shape
dx = np.empty(x.shape, dtype=np.float32).ravel()
xr = x.ravel()
for i in range(x.flatten().size):
dx[i] = evaluate_float4e2m1_from_bits(xr[i])
dx.reshape(x_shape)
evaluate_func = np.vectorize(evaluate_float4e2m1_from_bits)
dx = evaluate_func(x)
else:
dx = x.astype(np.float32)
y = dx * reshape_input(x_scale, x.shape, axis, block_size)
Expand Down
7 changes: 2 additions & 5 deletions onnx/reference/ops/op_quantize_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,8 @@ def _run(
return (i4,) # type: ignore[attr-defined]

if tensor_type == TensorProto.FLOAT4E2M1:
# x += zero_point
def single_func(x):
return subbyte.float32_to_float4e2m1_unpacked(x)

func = np.vectorize(single_func)
x += zero_point
func = np.vectorize(subbyte.float32_to_float4e2m1_unpacked)
f4 = func(x)
return (f4,) # type: ignore[attr-defined]

Expand Down
Loading

0 comments on commit f4ca510

Please sign in to comment.