quantize_per_tensor_dynamic
./code/quantize_per_tensor_dynamic/main.cc
1// #include "ATen/native/quantized/cpu/QuantUtils.h" // for the latest pytorch
2
3#include "ATen/native/quantized/cpu/quant_utils.h" // for torch 1.10
4#include "ATen/ops/quantize_per_tensor_dynamic.h" // needs torch>=1.11
5#include "torch/script.h"
6
7// See
8// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/quantized/cpu/QuantUtils.h#L59
9static void TestChooseQuantizationParams() {
10 quant_utils::TensorQuantizationParams p;
11 p = quant_utils::ChooseQuantizationParams(-1 /*min*/, 2 /*max*/,
12 -128 /*qmin*/, 127 /*qmax*/);
13 std::cout << "zero_point: " << p.zero_point << "\n";
14 std::cout << "scale: " << p.scale << "\n";
15 /**
16 * scale = (max - min) / (qmax - qmin) = 3 / 255 = 0.0117647
17 * zero_point_min = qmin - min/scale = -128 - (-1)/scale = -43
18 * zero_point_max = qmax - max/scale = 127 - 2/scale = -43
19 *
20 * min_error = abs(qmin) -abs(min/scale) = 128 - 1/scale = 43
21 * max_error = abs(qmax) - abs(max/scale) = 127 - 2/scale = -43
22 *
23 * zero_point = (min_error < max_error) ? zero_point_min : zero_point_max
24 */
25}
26
27// See
28// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/quantized/QTensor.cpp#L14
29// and
30static void TestQuantizePerTensorDynamic() {
31 torch::Tensor r = torch::tensor({-1, 0, 2}, torch::kFloat32);
32 torch::Tensor q = torch::quantize_per_tensor_dynamic(r, torch::kQInt8, false);
33 std::cout << "q: " << q << "\n";
34#if 0
35q: -1
36 0
37 2
38[ QuantizedCPUQInt8Type{3}, qscheme: per_tensor_affine, scale: 0.0117647, zero_point: -43 ]
39#endif
40 std::cout << "q.int_repr(): " << q.int_repr() << "\n";
41#if 0
42q.int_repr(): -128
43 -43
44 127
45[ CPUCharType{3} ]
46#endif
47}
48
49int main() {
50 TestChooseQuantizationParams();
51 TestQuantizePerTensorDynamic();
52 return 0;
53}