torch.quantize_per_tensor
See https://pytorch.org/docs/stable/generated/torch.quantize_per_tensor.html
def test_quantize_per_tensor():
scale = 0.1
zero_point = 1
a = torch.tensor([10.0, 2.0], dtype=torch.float32)
q = torch.quantize_per_tensor(
input=a,
scale=scale,
zero_point=zero_point,
dtype=torch.qint8,
)
assert isinstance(q, torch.Tensor)
assert q.is_quantized is True
assert q.q_scale() == scale
assert q.q_zero_point() == zero_point
assert str(q.qscheme()) == "torch.per_tensor_affine"
assert q.dtype == torch.qint8
c = q.int_repr()
assert c[0] == a[0] / scale + zero_point
assert c[1] == a[1] / scale + zero_point
assert torch.all(torch.eq(c, torch.tensor([101, 21], dtype=torch.int8)))
d = q.dequantize()
assert d.dtype == torch.float32
assert torch.all(torch.eq(d, a))
f = torch.dequantize(q)
assert torch.all(torch.eq(f, a))
# print(q)
"""
tensor([10., 2.], size=(2,), dtype=torch.qint8,
quantization_scheme=torch.per_tensor_affine, scale=0.1, zero_point=1)
"""
assert q[0].item() == 10 # q[0].item() will dequantize() to a float
assert q[1].item() == 2
print(type(q[0].item()))
q[0] = 2.5 # Note: it will quantize 2.5 and store it in q
print(q.int_repr())
"""
tensor([26, 21], dtype=torch.int8)
"""
Compress ration
def test_size():
r = torch.rand(100, 100, dtype=torch.float32)
q = torch.quantize_per_tensor(r, scale=0.1, zero_point=0, dtype=torch.qint8)
torch.save(r, "float32.pt")
torch.save(q, "int8.pt")
float_size = os.path.getsize("float32.pt")
int8_size = os.path.getsize("int8.pt")
print("float_size:", float_size)
print("int8_size:", int8_size)
print(f"ratio: {float_size}/{int8_size}: {float_size/int8_size:.3f}")
os.remove("float32.pt")
os.remove("int8.pt")
"""
float_size: 40747
int8_size: 10795
ratio: 40747/10795: 3.775
"""