#include #include #include #include #include #include #include #include #include // For quantize_val #include #include #include #include using namespace at; #ifndef ATEN_CPU_STATIC_DISPATCH TEST(TestQTensor, QuantDequantAPIs) { auto num_elements = 10; Tensor r = at::ones({num_elements}); const double scale = 1.0; const int64_t zero_point = 2; const Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8); ASSERT_EQ(qr.q_scale(), scale); ASSERT_EQ(qr.q_zero_point(), zero_point); ASSERT_TRUE(qr.is_quantized()); ASSERT_FALSE(r.is_quantized()); // int_repr Tensor int_repr = qr.int_repr(); auto* int_repr_data = int_repr.data_ptr(); for (const auto i : c10::irange(num_elements)) { ASSERT_EQ(int_repr_data[i], 3); } // Check for correct quantization auto r_data = r.data_ptr(); auto qr_data = qr.data_ptr(); for (const auto i : c10::irange(num_elements)) { ASSERT_EQ( native::quantize_val(scale, zero_point, r_data[i]).val_, qr_data[i].val_); } // Check for correct dequantization Tensor rqr = qr.dequantize(); auto rqr_data = rqr.data_ptr(); for (const auto i : c10::irange(num_elements)) { ASSERT_EQ(r_data[i], rqr_data[i]); } for (const auto i : c10::irange(num_elements)) { ASSERT_EQ( r_data[i], native::dequantize_val(qr.q_scale(), qr.q_zero_point(), qr_data[i])); } // Check for correct requantization double new_scale = 2.0; int64_t new_zero_point = 1; Tensor reqr = at::quantize_per_tensor(r, new_scale, new_zero_point, kQInt8); auto reqr_data = reqr.data_ptr(); for (const auto i : c10::irange(num_elements)) { reqr_data[i].val_ = native::requantize_val( scale, zero_point, new_scale, new_zero_point, qr_data[i]) .val_; const qint8 expected = native::quantize_val(new_scale, new_zero_point, rqr_data[i]); ASSERT_EQ(expected.val_, reqr_data[i].val_); } } TEST(TestQTensor, RoundingMode) { // We assume that quantization is defined as: // qx = clamp(zero_point + round(x / scale)) // If the zero_point is added before rounding, the result will be wrong. int32_t zero_point = 5; std::vector x_values{ -5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5}; std::vector qx_expect{ 0, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11}; // scale = 1.0 Tensor x = from_blob(x_values.data(), x_values.size()); Tensor qx = at::quantize_per_tensor(x, /*scale=*/1.0, zero_point, kQUInt8); auto qx_data = qx.data_ptr(); for (const auto idx : c10::irange(x_values.size())) { ASSERT_EQ(qx_expect[idx], qx_data[idx].val_) << "Tie breaking during rounding element " << idx << " failed!"; } } TEST(TestQTensor, Item) { Tensor r = at::ones({1}); const float scale = 1; const int32_t zero_point = 2; Tensor qr = at::quantize_per_tensor(r, scale, zero_point, kQUInt8); ASSERT_EQ(r.item().to(), qr.item().to()); } TEST(TestQTensor, EmptyQuantized) { float scale = 0.5; int zero_point = 10; int val = 100; int numel = 10; Tensor q = at::_empty_affine_quantized( {numel}, at::device(at::kCPU).dtype(kQUInt8), scale, zero_point); // Assigning to QTensor auto* q_data = q.data_ptr(); for (const auto i : c10::irange(numel)) { q_data[i].val_ = val; } // dequantize auto r = q.dequantize(); auto* r_data = r.data_ptr(); for (const auto i : c10::irange(numel)) { ASSERT_EQ(r_data[i], (val - zero_point) * scale); } } TEST(TestQTensor, EmptyPerchannelQuantized) { int numel = 10; auto scales = rand({numel}).toType(kDouble); auto zero_points = randint(10, {10}).toType(kLong); int val = 100; int ch_axis = 0; Tensor q = at::_empty_per_channel_affine_quantized( {numel}, scales, zero_points, ch_axis, at::device(at::kCPU).dtype(kQUInt8)); // Assigning to QTensor auto* q_data = q.data_ptr(); for (const auto i : c10::irange(numel)) { q_data[i].val_ = val; } // dequantize auto r = q.dequantize(); auto* r_data = r.data_ptr(); for (const auto i : c10::irange(numel)) { ASSERT_EQ( r_data[i], (val - zero_points[i].item().to()) * scales[i].item().to()); } } TEST(TestQTensor, QuantizePerChannel4d) { int C = 64, H = 10, W = 10; auto scales = rand({C}).toType(kDouble); auto zero_points = randint(10, {C}).toType(kLong); int ch_axis = 1; // create 4d tensor where each H x W image is a range(0, H*W) Tensor tensor = at::empty({1, C, H, W}, at::device(at::kCPU).dtype(kFloat)); auto* tensor_data = tensor.mutable_data_ptr(); for (int c = 0, i = 0; c < C; ++c) { for (int e = 0; e < H * W; ++e, ++i) { tensor_data[i] = e; } } // quantize and check values Tensor q = at::native::quantize_per_channel( tensor, scales, zero_points, ch_axis, kQUInt8); auto* q_data = (uint8_t*)q.data_ptr(); for (int c = 0, i = 0; c < C; ++c) { float inv_scale = 1.0f / static_cast(scales[c].item()); int64_t zero_point = zero_points[c].item(); for (int e = 0; e < H * W; ++e, ++i) { // downsize qval to 255 if val is greater than max uint8_t value // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions) int qval = std::min(zero_point + std::nearbyint(e * inv_scale), 255); ASSERT_EQ((int)q_data[i], qval); } } } TEST(TestQTensor, QuantizePerChannel4dChannelsLast) { int C = 64, H = 10, W = 10; auto scales = rand({C}).toType(kDouble); auto zero_points = randint(10, {C}).toType(kLong); int ch_axis = 1; // create 4d tensor where each H x W image is a range(0, H*W) Tensor tensor = at::empty( {1, C, H, W}, at::device(at::kCPU).dtype(kFloat).memory_format( at::MemoryFormat::ChannelsLast)); auto* tensor_data = tensor.data_ptr(); for (int e = 0, i = 0; e < H * W; ++e) { for (int c = 0; c < C; ++c, ++i) { tensor_data[i] = e; } } // quantize and check values Tensor q = at::native::quantize_per_channel( tensor, scales, zero_points, ch_axis, kQUInt8); auto* q_data = (uint8_t*)q.data_ptr(); for (int e = 0, i = 0; e < H * W; ++e) { for (int c = 0; c < C; ++c, ++i) { float inv_scale = 1.0f / static_cast(scales[c].item()); int64_t zero_point = zero_points[c].item(); // downsize qval to 255 if val is greater than max uint8_t value // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers,bugprone-narrowing-conversions) int qval = std::min(zero_point + std::nearbyint(e * inv_scale), 255); ASSERT_EQ((int)q_data[i], qval); } } } TEST(TestQTensor, FromBlobQuantizedPerTensor) { const float scale = 0.1; const int64_t zero_point = 10; std::vector shape = {5, 10}; auto numel = c10::multiply_integers(shape); TensorOptions options(at::kQUInt8); auto custom_vec = std::make_unique>(); custom_vec->resize(numel); uint8_t* custom_data = custom_vec->data(); for (const auto i : c10::irange(numel)) { custom_data[i] = i; } bool customDataDeleted{false}; auto deleteWhenDone = custom_vec.release(); auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) { ASSERT_EQ((void*)inp, (void*)custom_data); delete deleteWhenDone; customDataDeleted = true; }; { Tensor qtensor = at::from_blob_quantized_per_tensor_affine(custom_data, shape, deleter, scale, zero_point, options); uint8_t* q_data = (uint8_t*)qtensor.data_ptr(); for (const auto i : c10::irange(numel)) { ASSERT_EQ((int)custom_data[i], (int)q_data[i]); } for (int h = 0, i = 0; h < shape[0]; ++h) { for (int w = 0; w < shape[1]; ++w, ++i) { ASSERT_EQ( qtensor[h][w].item(), (custom_data[i] - zero_point) * scale); } } ASSERT_EQ((float)qtensor.q_scale(), (float)scale); ASSERT_EQ(qtensor.q_zero_point(), zero_point); } TORCH_CHECK(customDataDeleted); } TEST(TestQTensor, FromBlobQuantizedPerChannel) { int C = 64, H = 10, W = 5; std::vector shape = {1, C, H, W}; auto scales = rand({C}).toType(kDouble); auto zero_points = randint(10, {C}).toType(kLong); auto numel = c10::multiply_integers(shape); int ch_axis = 1; TensorOptions options(at::kQUInt8); auto custom_vec = std::make_unique>(); custom_vec->resize(numel); uint8_t* custom_data = custom_vec->data(); for (const auto i : c10::irange(numel)) { custom_data[i] = i; } bool customDataDeleted{false}; auto deleteWhenDone = custom_vec.release(); auto deleter = [deleteWhenDone, custom_data, &customDataDeleted](void* inp) { ASSERT_EQ((void*)inp, (void*)custom_data); delete deleteWhenDone; customDataDeleted = true; }; { Tensor qtensor = at::from_blob_quantized_per_channel_affine(custom_data, shape, deleter, scales, zero_points, ch_axis, options); uint8_t* q_data = (uint8_t*)qtensor.data_ptr(); for (const auto i : c10::irange(numel)) { ASSERT_EQ((int)custom_data[i], (int)q_data[i]); } ASSERT_TRUE(at::allclose(qtensor.q_per_channel_scales(), scales)); ASSERT_TRUE(at::allclose(qtensor.q_per_channel_zero_points(), zero_points)); ASSERT_TRUE(qtensor.is_quantized()); } TORCH_CHECK(customDataDeleted); } #if defined(__ARM_NEON__) || defined(__aarch64__) TEST(TestQTensor, TestArmVectorizedQuantizeDequantize) { const float scale = 7; const int numel = 132; std::vector x_values; for (const auto i : c10::irange(numel)) { x_values.push_back(9 * i); } const Tensor x = from_blob(x_values.data(), x_values.size()); auto test_for_datatype = [&]( const ScalarType scalar_type, const auto get_data_ptr, const auto quantize_val_with_datatype, const int zero_point_min, const int zero_point_max) { for (int zero_point : {zero_point_min, 10, zero_point_max}) { const Tensor q = at::quantize_per_tensor(x, scale, zero_point, scalar_type); auto* q_data = get_data_ptr(q); for (const auto i : c10::irange(numel)) { ASSERT_EQ( q_data[i].val_, quantize_val_with_datatype(scale, zero_point, x_values[i]).val_); } const Tensor r = q.dequantize(); const float* r_data = r.const_data_ptr(); for (const auto i : c10::irange(numel)) { ASSERT_FLOAT_EQ( r_data[i], native::dequantize_val(scale, zero_point, q_data[i])); } } }; // Unsigned Int 8 test_for_datatype( kQUInt8, [](Tensor q) { return q.data_ptr(); }, native::quantize_val, std::numeric_limits::min(), std::numeric_limits::max()); // Signed Int 8 test_for_datatype( kQInt8, [](Tensor q) { return q.data_ptr(); }, native::quantize_val, std::numeric_limits::min(), std::numeric_limits::max()); // Signed Int 32 (not optimized with vectorization) test_for_datatype( kQInt32, [](Tensor q) { return q.data_ptr(); }, native::quantize_val, std::numeric_limits::min(), std::numeric_limits::max()); } #endif // (__ARM_NEON__) || defined(__aarch64__) #endif // ATEN_CPU_STATIC_DISPATCH