/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include // Declares the operator #include #include #include #include #include #include using namespace ::testing; using exec_aten::ArrayRef; using exec_aten::optional; using exec_aten::ScalarType; using exec_aten::Tensor; using torch::executor::testing::TensorFactory; // To further emphasize the accuracy of our op_to, we TEST_F the conversion // from floating-point types to signed int types directly by the TEST_F cases // generated by core Pytorch directly. Such data is random generated in [-5, 5]. // clang-format off typedef std::map< std::type_index, std::variant< std::vector, std::vector>> FloatingTypeToDataMap; typedef std::map< std::type_index, std::variant< std::vector, std::vector, std::vector, std::vector, std::vector>> IntTypeToDataMap; // clang-format on class OpToDimOrderCopyTest : public OperatorTest { protected: Tensor& op__to_dim_order_copy_out( const Tensor& self, bool non_blocking, exec_aten::optional> dim_order, Tensor& out) { return torch::executor::dim_order_ops::_to_dim_order_copy_outf( context_, self, non_blocking, dim_order, out); } // Cast float vector to OUTPUT_CTYPE vector template std::vector vector_type_cast(std::vector input) { std::vector output(input.size()); std::transform( input.begin(), input.end(), output.begin(), [](INPUT_CTYPE x) { return static_cast(x); }); return output; } template struct ToTestCase { const std::vector sizes; const std::vector data_in; const std::vector data_out; }; // Each TEST_F has different combination of input and output types. Therefore // it is a little bit mess if create template TEST_F case and custom data // types for both input data and output data. We choose another way: for all // TEST_F cases, their data are all in double. And we are gonna cast them into // desired type when delievering them into tf.make function. Based on our // experiments, type cast of core PyTorch is same as static_cast in c++ in the // representable scope, so here we believe using static_cast to generate // ground truth is reasonable. template < typename INPUT_CTYPE, ScalarType INPUT_DTYPE, typename OUTPUT_CTYPE, ScalarType OUTPUT_DTYPE> void test_runner_static_cast( std::vector> test_cases) { TensorFactory tf_in; TensorFactory tf_out; for (const auto& test_case : test_cases) { auto data_in = vector_type_cast(test_case.data_in); auto data_out = vector_type_cast(data_in); Tensor input = tf_in.make(test_case.sizes, data_in); Tensor output = tf_out.zeros_like(input); std::vector dim_order_vec; for (int64_t i = 0; i < input.dim(); i++) { dim_order_vec.push_back(i); } ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); Tensor ret = op__to_dim_order_copy_out( /*self=*/input, /*non_blocking=*/false, dim_order, output); Tensor expected = tf_out.make(test_case.sizes, data_out); // The original tensor a should share same value with the out variable and // return variable of to function EXPECT_TENSOR_EQ(ret, output); EXPECT_TENSOR_EQ(ret, expected); } } template void test_runner_to_bool( std::vector test_case, std::vector data_out) { TensorFactory tf_in; TensorFactory tf_out; auto data_in = vector_type_cast(test_case); Tensor input = tf_in.make({(int)test_case.size()}, data_in); Tensor output = tf_out.zeros_like(input); std::vector dim_order_vec; for (int i = 0; i < input.dim(); i++) { dim_order_vec.push_back(i); } ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); Tensor ret = op__to_dim_order_copy_out( /*self=*/input, /*non_blocking=*/false, dim_order, output); Tensor expected = tf_out.make({(int)data_out.size()}, data_out); // The return value of op__to_dim_order_copy_out and the values written to // output should be the same. EXPECT_TENSOR_EQ(ret, output); // The return value of op__to_dim_order_copy_out and the values in expected // which are the reference values should be the same. EXPECT_TENSOR_EQ(ret, expected); } template void test_runner_from_bool( std::vector test_case, std::vector out) { TensorFactory tf_in; TensorFactory tf_out; auto data_out = vector_type_cast(out); Tensor input = tf_in.make({(int)test_case.size()}, test_case); Tensor output = tf_out.zeros_like(input); std::vector dim_order_vec; for (int64_t i = 0; i < input.dim(); i++) { dim_order_vec.push_back(i); } ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); Tensor ret = op__to_dim_order_copy_out( /*self=*/input, /*non_blocking=*/false, dim_order, output); Tensor expected = tf_out.make({(int)data_out.size()}, data_out); // The return value of op__to_dim_order_copy_out and the values written to // output should be the same. EXPECT_TENSOR_EQ(ret, output); // The return value of op__to_dim_order_copy_out and the values in expected // which are the reference values should be the same. EXPECT_TENSOR_EQ(ret, expected); } /* %python import torch torch.manual_seed(0) x = torch.rand(2, 3) res = x.to(non_blocking = False, memory_format = torch.preserve_format) op = "op__to_dim_order_copy_out" opt_setup_params = """ bool non_blocking = false; optional memory_format; """ opt_extra_params = "non_blocking, memory_format," out_args = "out_shape, dynamism" dtype = "ScalarType::Float" check = "EXPECT_TENSOR_EQ" */ void test_dynamic_shape( const std::vector& out_shape, enum torch::executor::TensorShapeDynamism dynamism) { /* %python %rewrite(unary_op) */ TensorFactory tf; Tensor x = tf.make( {2, 3}, {0.49625658988952637, 0.7682217955589294, 0.08847743272781372, 0.13203048706054688, 0.30742281675338745, 0.6340786814689636}); Tensor expected = tf.make( {2, 3}, {0.49625658988952637, 0.7682217955589294, 0.08847743272781372, 0.13203048706054688, 0.30742281675338745, 0.6340786814689636}); bool non_blocking = false; Tensor out = tf.zeros(out_shape, dynamism); std::vector dim_order_vec; for (int64_t i = 0; i < x.dim(); i++) { dim_order_vec.push_back(i); } ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); Tensor ret = op__to_dim_order_copy_out( /*self=*/x, non_blocking, dim_order, out); EXPECT_TENSOR_EQ(out, expected); EXPECT_TENSOR_EQ(ret, expected); } template < typename INPUT_CTYPE, ScalarType INPUT_DTYPE, typename OUTPUT_CTYPE, ScalarType OUTPUT_DTYPE> void test_runner_hardcode_data( FloatingTypeToDataMap floating_point_data, IntTypeToDataMap int_data) { TensorFactory tf_in; TensorFactory tf_out; if (typeid(OUTPUT_CTYPE) == typeid(uint8_t)) { // Would cause underflow when testing uint8_t. return; } ToTestCase test_case = { /*sizes=*/{3, 5}, /*data_in=*/ std::get>( floating_point_data[typeid(INPUT_CTYPE)]), /*data_out=*/ std::get>(int_data[typeid(OUTPUT_CTYPE)])}; Tensor input = tf_in.make(test_case.sizes, test_case.data_in); Tensor output = tf_out.zeros_like(input); std::vector dim_order_vec; for (int64_t i = 0; i < input.dim(); i++) { dim_order_vec.push_back(i); } ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); Tensor ret = op__to_dim_order_copy_out( /*self=*/input, /*non_blocking=*/false, dim_order, output); Tensor expected = tf_out.make(test_case.sizes, test_case.data_out); // The original tensor a should share same value with the out variable and // return variable of to function EXPECT_TENSOR_EQ(ret, output); EXPECT_TENSOR_EQ(ret, expected); } }; /* Here we temporary not try to implement or TEST_F the behavior about casting a * number can not be represented in some type to this type (e.g. inf to * int32_t nan to int64_t or 2147483648 to int32_t), because * - a. The result of such kind of cast is undefined according to c++ * standard; * - b. No explicit rules can be found in core pytorch for such transaction * (not same as static_cast or any other casting function in c++); * - c. If user tries to cast a unrepresentable value to certain type, they * should take the risk; * - d. Even though we can always use if/switch to cover these boundry cases, * the code will be lengthy and jumbled. I believe using these disordered * code to meet some undefine behavior is meaningless, and we can not * cover all such cases. */ // Regular TEST_F for to_copy.out // TEST_F if to_copy.out works well under all kinds of data pairs TEST_F(OpToDimOrderCopyTest, AllDtypesSupported) { std::vector> test_cases = { { /*sizes=*/{2, 4}, /*data_in=*/ {2.11, 3.2, 2.3, 4.0, 1.1, 5.2, 1.1, 6.3}, /*data_out=*/ {}, // data_out shouldn't be used in test_runner_static_cast }, { /*sizes=*/{3, 4, 0, 5}, /*data_in=*/{}, /*data_out=*/{}, }, { /*sizes=*/{}, /*data_in=*/{10.0}, /*data_out=*/{}, // data_out shouldn't be used in // test_runner_static_cast }, }; #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_static_cast< \ INPUT_CTYPE, \ ScalarType::INPUT_DTYPE, \ OUTPUT_CTYPE, \ ScalarType::OUTPUT_DTYPE>(test_cases); #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ ET_FORALL_REAL_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); ET_FORALL_REAL_TYPES(TEST_ENTRY); #undef TEST_ENTRY #undef TEST_KERNEL } TEST_F(OpToDimOrderCopyTest, BoolTests) { std::vector test_case_to_bool = {1.1, 2.2, 0}; std::vector result_to_bool = {true, true, false}; #define TEST_TO_BOOL(INPUT_CTYPE, INPUT_DTYPE) \ test_runner_to_bool( \ test_case_to_bool, result_to_bool); ET_FORALL_REAL_TYPES(TEST_TO_BOOL); std::vector test_case_from_bool = {true, true, false}; std::vector result_from_bool = {1.0, 1.0, 0}; #define TEST_FROM_BOOL(OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_from_bool( \ test_case_from_bool, result_from_bool); ET_FORALL_REAL_TYPES(TEST_FROM_BOOL); } TEST_F(OpToDimOrderCopyTest, NanInfSupported) { constexpr auto floatInfinity = std::numeric_limits::infinity(); std::vector> test_cases = {{ /*sizes=*/{2, 4}, /*data_in=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6}, /*data_out=*/{2, 3, NAN, 4, floatInfinity, 5, -floatInfinity, 6}, }}; #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_static_cast< \ INPUT_CTYPE, \ ScalarType::INPUT_DTYPE, \ OUTPUT_CTYPE, \ ScalarType::OUTPUT_DTYPE>(test_cases); #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ ET_FORALL_FLOAT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); ET_FORALL_FLOAT_TYPES(TEST_ENTRY); #undef TEST_ENTRY #undef TEST_KERNEL } TEST_F(OpToDimOrderCopyTest, HardcodeFloatConvertInt) { // Hardcode input and output generated from core PyTorch // clang-format off std::vector float_data = { -1.47900056838989257812, -4.59277725219726562500, 2.15365791320800781250, -2.55494546890258789062, 3.06999135017395019531, 3.27460670471191406250, -3.98865103721618652344, -4.81065988540649414062, 3.67902207374572753906, 3.72226405143737792969, 0.80567771196365356445, 2.23788332939147949219, -0.52035576105117797852, -1.58493483066558837891, -0.30919688940048217773}; std::vector double_data = { -1.47900053955270172068, -4.59277735274143061872, 2.15365796963871947156, -2.55494554556038755422, 3.06999137834642255029, 3.27460679459944969949, -3.98865109243288795682, -4.81065977167646074975, 3.67902198302105531980, 3.72226414774102742911, 0.80567768667100203572, 2.23788335717029518435, -0.52035578832931150828, -1.58493480710766210251, -0.30919688936285893988}; // clang-format on std::vector int64_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; std::vector int32_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; std::vector int16_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; std::vector int8_data = { -1, -4, 2, -2, 3, 3, -3, -4, 3, 3, 0, 2, 0, -1, 0}; // Gathering all floating point data together for better traversial FloatingTypeToDataMap floating_point_data; floating_point_data[typeid(float)] = float_data; floating_point_data[typeid(double)] = double_data; // Gathering all int data together for better traversial IntTypeToDataMap int_data; int_data[typeid(int64_t)] = int64_data; int_data[typeid(int32_t)] = int32_data; int_data[typeid(int16_t)] = int16_data; int_data[typeid(int8_t)] = int8_data; #define TEST_KERNEL(INPUT_CTYPE, INPUT_DTYPE, OUTPUT_CTYPE, OUTPUT_DTYPE) \ test_runner_hardcode_data< \ INPUT_CTYPE, \ ScalarType::INPUT_DTYPE, \ OUTPUT_CTYPE, \ ScalarType::OUTPUT_DTYPE>(floating_point_data, int_data); #define TEST_ENTRY(INPUT_CTYPE, INPUT_DTYPE) \ ET_FORALL_INT_TYPES_WITH2(INPUT_CTYPE, INPUT_DTYPE, TEST_KERNEL); ET_FORALL_FLOAT_TYPES(TEST_ENTRY); } TEST_F(OpToDimOrderCopyTest, MismatchedSizesDie) { if (torch::executor::testing::SupportedFeatures::get()->is_aten) { GTEST_SKIP() << "ATen kernel can handle mismatched sizes"; } TensorFactory tf; Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); Tensor out = tf.zeros({3, 2, 1, 1}); std::vector dim_order_vec; for (int64_t i = 0; i < input.dim(); i++) { dim_order_vec.push_back(i); } ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); ET_EXPECT_KERNEL_FAILURE( context_, op__to_dim_order_copy_out( /*self=*/input, /*non_blocking=*/false, dim_order, out)); } // Only contiguous memory is supported, the memory type MemoryFormat::Contiguous // should not be allowed. The function is expected death if using the illegal // memory format. TEST_F(OpToDimOrderCopyTest, MismatchedMemoryFormatDies) { if (torch::executor::testing::SupportedFeatures::get()->is_aten) { GTEST_SKIP() << "ATen kernel can handle non contiguous memory formats"; } TensorFactory tf_in; TensorFactory tf_out; Tensor input = tf_in.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); Tensor out = tf_out.zeros({3, 1, 1, 2}); std::vector dim_order_vec; for (int64_t i = 0; i < input.dim(); i++) { dim_order_vec.push_back(i); } // mutate dim_order_vec to create a illegal one. dim_order_vec[1] = 3; dim_order_vec[3] = 1; ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); ET_EXPECT_KERNEL_FAILURE( context_, op__to_dim_order_copy_out( /*self=*/input, /*non_blocking=*/false, dim_order, out)); } // Only blocking data transfer supported TEST_F(OpToDimOrderCopyTest, MismatchedBlockingDie) { if (torch::executor::testing::SupportedFeatures::get()->is_aten) { GTEST_SKIP() << "ATen kernel can handle non blocking data transfer"; } TensorFactory tf; Tensor input = tf.make(/*sizes=*/{3, 1, 1, 2}, /*data=*/{1, 2, 3, 4, 5, 6}); Tensor out = tf.zeros(/*sizes=*/{3, 1, 1, 2}); std::vector dim_order_vec; for (int64_t i = 0; i < input.dim(); i++) { dim_order_vec.push_back(i); } ArrayRef dim_order(dim_order_vec.data(), dim_order_vec.size()); ET_EXPECT_KERNEL_FAILURE( context_, op__to_dim_order_copy_out( /*self=*/input, /*non_blocking=*/true, dim_order, out)); } TEST_F(OpToDimOrderCopyTest, DynamicShapeUpperBoundSameAsExpected) { test_dynamic_shape( {2, 3}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND); } TEST_F(OpToDimOrderCopyTest, DynamicShapeUpperBoundLargerThanExpected) { test_dynamic_shape( {10, 10}, torch::executor::TensorShapeDynamism::DYNAMIC_BOUND); } TEST_F(OpToDimOrderCopyTest, DynamicShapeUnbound) { if (!torch::executor::testing::SupportedFeatures::get()->output_resize) { GTEST_SKIP() << "Dynamic shape unbound not supported"; } test_dynamic_shape( {1, 1}, torch::executor::TensorShapeDynamism::DYNAMIC_UNBOUND); } TEST_F(OpToDimOrderCopyTest, ContiguousToChannelsLast) { TensorFactory tf; Tensor x = tf.make_with_dimorder( {3, 5, 2, 2}, {0.2432, 0.5248, 0.5361, 0.8513, 0.8184, 0.8206, 0.7357, 0.9655, 0.6138, 0.1112, 0.2799, 0.1079, 0.9680, 0.2548, 0.0393, 0.6002, 0.2257, 0.8766, 0.2715, 0.1595, 0.2029, 0.7026, 0.6982, 0.8529, 0.4405, 0.6560, 0.9217, 0.6372, 0.2446, 0.6590, 0.3866, 0.7185, 0.4439, 0.5346, 0.3179, 0.4492, 0.3491, 0.6970, 0.8456, 0.2516, 0.2345, 0.2924, 0.7695, 0.0911, 0.8530, 0.8560, 0.6909, 0.7719, 0.8923, 0.5546, 0.6978, 0.8151, 0.3007, 0.3961, 0.8416, 0.4296, 0.7203, 0.8963, 0.3597, 0.5552}); Tensor out = tf.full_channels_last({3, 5, 2, 2}, 0.0); Tensor expected = tf.make_with_dimorder( {3, 5, 2, 2}, {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, /*dim_order=*/{0, 2, 3, 1}); std::vector dim_order_vec = {0, 2, 3, 1}; exec_aten::ArrayRef dim_order( dim_order_vec.data(), dim_order_vec.size()); Tensor ret = op__to_dim_order_copy_out( /*self*/ x, /*non_blocking*/ false, /*dim_order*/ dim_order, out); EXPECT_TENSOR_EQ(out, expected); EXPECT_TENSOR_EQ(ret, expected); } TEST_F(OpToDimOrderCopyTest, ChannelsLastToContiguous) { TensorFactory tf; Tensor out = tf.full({3, 5, 2, 2}, 0.0); Tensor x = tf.make_with_dimorder( {3, 5, 2, 2}, {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, /*dim_order=*/{0, 2, 3, 1}); Tensor expected = tf.make_with_dimorder( {3, 5, 2, 2}, {0.2432, 0.5248, 0.5361, 0.8513, 0.8184, 0.8206, 0.7357, 0.9655, 0.6138, 0.1112, 0.2799, 0.1079, 0.9680, 0.2548, 0.0393, 0.6002, 0.2257, 0.8766, 0.2715, 0.1595, 0.2029, 0.7026, 0.6982, 0.8529, 0.4405, 0.6560, 0.9217, 0.6372, 0.2446, 0.6590, 0.3866, 0.7185, 0.4439, 0.5346, 0.3179, 0.4492, 0.3491, 0.6970, 0.8456, 0.2516, 0.2345, 0.2924, 0.7695, 0.0911, 0.8530, 0.8560, 0.6909, 0.7719, 0.8923, 0.5546, 0.6978, 0.8151, 0.3007, 0.3961, 0.8416, 0.4296, 0.7203, 0.8963, 0.3597, 0.5552}); std::vector dim_order_vec = {0, 1, 2, 3}; exec_aten::ArrayRef dim_order( dim_order_vec.data(), dim_order_vec.size()); Tensor ret = op__to_dim_order_copy_out( /*self*/ x, /*non_blocking*/ false, /*dim_order*/ dim_order, out); EXPECT_TENSOR_EQ(out, expected); EXPECT_TENSOR_EQ(ret, expected); } TEST_F(OpToDimOrderCopyTest, PreserveChanneslLast) { TensorFactory tf; Tensor out = tf.full_channels_last({3, 5, 2, 2}, 0.0); Tensor x = tf.make_with_dimorder( {3, 5, 2, 2}, {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, /*dim_order=*/{0, 2, 3, 1}); Tensor expected = tf.make_with_dimorder( {3, 5, 2, 2}, {0.2432, 0.8184, 0.6138, 0.9680, 0.2257, 0.5248, 0.8206, 0.1112, 0.2548, 0.8766, 0.5361, 0.7357, 0.2799, 0.0393, 0.2715, 0.8513, 0.9655, 0.1079, 0.6002, 0.1595, 0.2029, 0.4405, 0.2446, 0.4439, 0.3491, 0.7026, 0.6560, 0.6590, 0.5346, 0.6970, 0.6982, 0.9217, 0.3866, 0.3179, 0.8456, 0.8529, 0.6372, 0.7185, 0.4492, 0.2516, 0.2345, 0.8530, 0.8923, 0.3007, 0.7203, 0.2924, 0.8560, 0.5546, 0.3961, 0.8963, 0.7695, 0.6909, 0.6978, 0.8416, 0.3597, 0.0911, 0.7719, 0.8151, 0.4296, 0.5552}, /*dim_order=*/{0, 2, 3, 1}); Tensor ret = op__to_dim_order_copy_out( /*self*/ x, /*non_blocking*/ false, /*dim_order*/ exec_aten::nullopt, out); EXPECT_TENSOR_EQ(out, expected); EXPECT_TENSOR_EQ(ret, expected); }