// Copyright 2022 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

$assert BATCH_TILE >= 1
#include <assert.h>
#include <stddef.h>
#include <stdint.h>

#include <xnnpack/math.h>
#include <xnnpack/window.h>


void xnn_s16_window_ukernel__scalar_x${BATCH_TILE}(
    size_t rows,
    size_t batch_size,
    const int16_t* input,
    const int16_t* weights,
    int16_t* output,
    uint32_t shift)
{
  assert(rows > 0);
  assert(batch_size != 0);
  assert(input != NULL);
  assert(weights != NULL);
  assert(output != NULL);
  assert(shift < 32);

  do {
    size_t n = batch_size;
    const int16_t* w = weights;
    $if BATCH_TILE > 1:
      for (; n >= ${BATCH_TILE}; n -= ${BATCH_TILE}) {
        $for N in range(BATCH_TILE):
          const int16_t vi${N} = input[${N}];
        input += ${BATCH_TILE};

        $for N in range(BATCH_TILE):
          const int16_t w${N} = w[${N}];
        w += ${BATCH_TILE};

        $for N in range(BATCH_TILE):
          int32_t vout${N} = (int32_t) vi${N} * (int32_t) w${N};

        $for N in range(BATCH_TILE):
          vout${N} = math_asr_s32(vout${N}, shift);

        $for N in range(BATCH_TILE):
          vout${N} = math_max_s32(vout${N}, INT16_MIN);

        $for N in range(BATCH_TILE):
          vout${N} = math_min_s32(vout${N}, INT16_MAX);

        $for N in range(BATCH_TILE):
          output[${N}] = (int16_t) vout${N};

        output += ${BATCH_TILE};
      }

    if XNN_UNLIKELY(n != 0) {
      do {
        const int32_t vi = (int32_t) *input++;
        const int32_t vw = (int32_t) *w++;
        int32_t vout = vi * vw;
        vout = math_asr_s32(vout, shift);
        vout = math_max_s32(vout, INT16_MIN);
        vout = math_min_s32(vout, INT16_MAX);
        *output++ = (int16_t) vout;
      } while (--n != 0);
    }
  } while (--rows != 0);
}
