/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include #include #include #include #include using exec_aten::ScalarType; using exec_aten::Tensor; using executorch::aten::RuntimeContext; using executorch::runtime::ArrayRef; using torch::executor::Error; using torch::executor::optional; namespace cadence { namespace impl { namespace HiFi { namespace native { int prepare_data( const Tensor& in, Tensor& out, optional> dim_list, int* inp_shape, int* out_shape, int* p_axis, int num_inp_dims, int num_out_dims) { for (int i = 0; i < num_inp_dims; i++) { inp_shape[i] = in.size(i); } for (int i = 0; i < num_out_dims; i++) { out_shape[i] = out.size(i); } int num_axis_dims = 0; for (const auto& d : dim_list.value()) { if (d < 0) { p_axis[num_axis_dims] = num_inp_dims + d; num_axis_dims++; } else { p_axis[num_axis_dims] = d; num_axis_dims++; } } return num_axis_dims; } Tensor& mean_dim_out( RuntimeContext& ctx, const Tensor& in, optional> dim_list, bool keepdim, optional dtype, Tensor& out) { ET_KERNEL_CHECK( ctx, torch::executor::check_mean_dim_args(in, dim_list, keepdim, dtype, out), InvalidArgument, out); ET_KERNEL_CHECK( ctx, torch::executor::resize_reduction_out(in, dim_list, keepdim, out) == Error::Ok, InvalidArgument, out); constexpr auto name = "mean.out"; constexpr int kNnlibMaxDim = 4; bool optimized = 1; if (out.scalar_type() != ScalarType::Float) optimized = 0; if (in.dim() > kNnlibMaxDim) optimized = 0; if (optimized) { float* __restrict__ p_out = out.mutable_data_ptr(); const float* __restrict__ p_inp = (const float* __restrict__)in.const_data_ptr(); int num_elm = in.numel(); int num_inp_dims = in.dim(); int num_out_dims = out.dim(); int inp_shape[kNnlibMaxDim]; int out_shape[kNnlibMaxDim]; int p_axis[kNnlibMaxDim]; for (int i = 0; i < kNnlibMaxDim; i++) { out_shape[i] = 1; inp_shape[i] = 1; p_axis[i] = 1; } int num_axis_dims = prepare_data( in, out, dim_list, inp_shape, out_shape, p_axis, num_inp_dims, num_out_dims); if (num_axis_dims == num_inp_dims) { num_out_dims = 1; out_shape[0] = 1; } int scratch_size = xa_nn_reduce_getsize_nhwc( -3, inp_shape, num_inp_dims, p_axis, num_axis_dims, 1); void* __restrict__ p_scratch_in = (void* __restrict__)malloc(scratch_size); xa_nn_reduce_mean_4D_f32_f32( p_out, out_shape, p_inp, inp_shape, p_axis, num_out_dims, num_inp_dims, num_axis_dims, p_scratch_in); return out; } ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, name, CTYPE_IN, [&] { ET_SWITCH_FLOATH_TYPES(out.scalar_type(), ctx, name, CTYPE_OUT, [&] { CTYPE_OUT* out_data = out.mutable_data_ptr(); const size_t num = torch::executor::get_reduced_dim_product(in, dim_list); for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) { CTYPE_OUT sum = 0; if (in.numel() > 0) { sum = torch::executor::map_reduce_over_dim_list( [](CTYPE_IN v) { return static_cast(v); }, [](CTYPE_OUT outv, CTYPE_OUT acc) { return acc + outv; }, in, dim_list, out_ix); } out_data[out_ix] = sum / static_cast(num); } }); }); return out; } } // namespace native } // namespace HiFi } // namespace impl } // namespace cadence