// Copyright 2021 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#pragma once

#include <stddef.h>
#include <stdint.h>

#include <xnnpack/common.h>
#include <xnnpack/microparams.h>

#ifdef __cplusplus
extern "C" {
#endif


#define DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                              \
      size_t n,                                           \
      const uint8_t* input_a,                             \
      const uint8_t* input_b,                             \
      uint8_t* output,                                    \
      const union xnn_qu8_mul_minmax_params* params);

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld64_x16)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__neon_ld128_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld64_x16)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_rndnu_ukernel__neon_ld128_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x1)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x2)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmul_minmax_fp32_ukernel__scalar_x4)


DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld64_x16)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__neon_ld128_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16)

DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x1)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x2)
DECLARE_QU8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qu8_vmulc_minmax_fp32_ukernel__scalar_x4)


#define DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(fn_name) \
  XNN_INTERNAL void fn_name(                              \
      size_t n,                                           \
      const int8_t* input_a,                              \
      const int8_t* input_b,                              \
      int8_t* output,                                     \
      const union xnn_qs8_mul_minmax_params* params);

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld64_x16)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__neon_ld128_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld64_x16)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_rndnu_ukernel__neon_ld128_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld64_x16)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__neonv8_ld128_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x1)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x2)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmul_minmax_fp32_ukernel__scalar_x4)


DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld64_x16)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__neon_ld128_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld64_x16)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_rndnu_ukernel__neon_ld128_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld64_x16)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__neonv8_ld128_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x8)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__wasmsimd_mul32_ld64_x16)

DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x1)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x2)
DECLARE_QS8_VMUL_MINMAX_UKERNEL_FUNCTION(xnn_qs8_vmulc_minmax_fp32_ukernel__scalar_x4)


#ifdef __cplusplus
}  // extern "C"
#endif
