torch.quantize_per_tensor

See https://pytorch.org/docs/stable/generated/torch.quantize_per_tensor.html

def test_quantize_per_tensor():
    scale = 0.1
    zero_point = 1
    a = torch.tensor([10.0, 2.0], dtype=torch.float32)
    q = torch.quantize_per_tensor(
        input=a,
        scale=scale,
        zero_point=zero_point,
        dtype=torch.qint8,
    )
    assert isinstance(q, torch.Tensor)
    assert q.is_quantized is True

    assert q.q_scale() == scale
    assert q.q_zero_point() == zero_point
    assert str(q.qscheme()) == "torch.per_tensor_affine"
    assert q.dtype == torch.qint8

    c = q.int_repr()
    assert c[0] == a[0] / scale + zero_point
    assert c[1] == a[1] / scale + zero_point
    assert torch.all(torch.eq(c, torch.tensor([101, 21], dtype=torch.int8)))
    d = q.dequantize()
    assert d.dtype == torch.float32
    assert torch.all(torch.eq(d, a))

    f = torch.dequantize(q)
    assert torch.all(torch.eq(f, a))
    #  print(q)
    """
    tensor([10.,  2.], size=(2,), dtype=torch.qint8,
           quantization_scheme=torch.per_tensor_affine, scale=0.1, zero_point=1)
    """
    assert q[0].item() == 10  # q[0].item() will dequantize() to a float
    assert q[1].item() == 2
    print(type(q[0].item()))
    q[0] = 2.5  # Note: it will quantize 2.5 and store it in q
    print(q.int_repr())
    """
    tensor([26, 21], dtype=torch.int8)
    """

Compress ration

def test_size():
    r = torch.rand(100, 100, dtype=torch.float32)
    q = torch.quantize_per_tensor(r, scale=0.1, zero_point=0, dtype=torch.qint8)
    torch.save(r, "float32.pt")
    torch.save(q, "int8.pt")
    float_size = os.path.getsize("float32.pt")
    int8_size = os.path.getsize("int8.pt")
    print("float_size:", float_size)
    print("int8_size:", int8_size)
    print(f"ratio: {float_size}/{int8_size}: {float_size/int8_size:.3f}")
    os.remove("float32.pt")
    os.remove("int8.pt")
    """
    float_size: 40747
    int8_size: 10795
    ratio: 40747/10795: 3.775
    """