/*
 * Copyright (C) 2018 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define LOG_TAG "Operations"

#include "PRelu.h"

#include <algorithm>
#include <functional>
#include <vector>

#include "IndexedShapeWrapper.h"
#include "OperationResolver.h"
#include "OperationsExecutionUtils.h"
#include "Tracing.h"

#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunused-parameter"
#pragma clang diagnostic ignored "-Wsign-compare"
#pragma clang diagnostic ignored "-Winvalid-partial-specialization"
#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
#pragma clang diagnostic pop
#endif  // NN_INCLUDE_CPU_IMPLEMENTATION

namespace android {
namespace nn {
namespace prelu {

#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
template <typename T>
inline bool eval(const std::function<T(const T&, const T&)>& func, const T* aData,
                 const Shape& aShape, const T* bData, const Shape& bShape, T* outputData,
                 const Shape& outputShape) {
    IndexedShapeWrapper aShapeIndexed(aShape);
    IndexedShapeWrapper bShapeIndexed(bShape);
    IndexedShapeWrapper outputShapeIndexed(outputShape);
    std::vector<uint32_t> curIndex(outputShape.dimensions.size(), 0);
    bool lastIndex = false;
    do {
        uint32_t outputFlatIndex;
        NN_RET_CHECK(outputShapeIndexed.indexToFlatIndex(curIndex, &outputFlatIndex));
        uint32_t aFlatIndex;
        NN_RET_CHECK(aShapeIndexed.broadcastedIndexToFlatIndex(curIndex, &aFlatIndex));
        uint32_t bFlatIndex;
        NN_RET_CHECK(bShapeIndexed.broadcastedIndexToFlatIndex(curIndex, &bFlatIndex));

        outputData[outputFlatIndex] = func(aData[aFlatIndex], bData[bFlatIndex]);

        NN_RET_CHECK(outputShapeIndexed.nextIndexInplace(&curIndex, &lastIndex));
    } while (!lastIndex);
    return true;
}

template <typename T>
bool evalQuant8(const T* aData, const Shape& aShape, const T* bData, const Shape& bShape,
                T* outputData, const Shape& outputShape) {
    const int32_t input_offset = -aShape.offset;
    const int32_t alpha_offset = -bShape.offset;
    const int32_t output_offset = outputShape.offset;
    const double input_product_scale = aShape.scale * bShape.scale;
    const double real_multiplier_pos = aShape.scale / outputShape.scale;
    const double real_multiplier_neg = input_product_scale / outputShape.scale;
    int32_t output_multiplier_pos, output_shift_pos;
    int32_t output_multiplier_neg, output_shift_neg;
    tflite::QuantizeMultiplier(real_multiplier_pos, &output_multiplier_pos, &output_shift_pos);
    tflite::QuantizeMultiplier(real_multiplier_neg, &output_multiplier_neg, &output_shift_neg);
    return eval<T>(
            [&](const T& val1, const T& val2) -> uint8_t {
                const int32_t input = input_offset + static_cast<int32_t>(val1);
                int32_t output_val;
                if (input >= 0) {
                    output_val =
                            output_offset + tflite::MultiplyByQuantizedMultiplier(
                                                    input, output_multiplier_pos, output_shift_pos);
                } else {
                    const int32_t alpha = alpha_offset + static_cast<int32_t>(val2);
                    output_val = output_offset +
                                 tflite::MultiplyByQuantizedMultiplier(
                                         input * alpha, output_multiplier_neg, output_shift_neg);
                }
                return saturateCast<T>(output_val);
            },
            aData, aShape, bData, bShape, outputData, outputShape);
}

bool prepare(IOperationExecutionContext* context) {
    Shape input = context->getInputShape(kInputTensor);
    Shape alpha = context->getInputShape(kAlphaTensor);
    NN_RET_CHECK(input.type == alpha.type);
    Shape output = context->getOutputShape(kOutputTensor);
    NN_RET_CHECK(calculateBroadcastedShape(input, alpha, &output));
    return context->setOutputShape(kOutputTensor, output);
}

bool execute(IOperationExecutionContext* context) {
    switch (context->getInputType(kInputTensor)) {
        case OperandType::TENSOR_FLOAT16:
            return eval<_Float16>(
                    [](const _Float16& val1, const _Float16& val2) -> _Float16 {
                        return val1 >= 0.0f ? val1 : val1 * val2;
                    },
                    context->getInputBuffer<_Float16>(kInputTensor),
                    context->getInputShape(kInputTensor),
                    context->getInputBuffer<_Float16>(kAlphaTensor),
                    context->getInputShape(kAlphaTensor),
                    context->getOutputBuffer<_Float16>(kOutputTensor),
                    context->getOutputShape(kOutputTensor));
        case OperandType::TENSOR_FLOAT32:
            return eval<float>(
                    [](const float& val1, const float& val2) -> float {
                        return val1 >= 0.0f ? val1 : val1 * val2;
                    },
                    context->getInputBuffer<float>(kInputTensor),
                    context->getInputShape(kInputTensor),
                    context->getInputBuffer<float>(kAlphaTensor),
                    context->getInputShape(kAlphaTensor),
                    context->getOutputBuffer<float>(kOutputTensor),
                    context->getOutputShape(kOutputTensor));
        case OperandType::TENSOR_QUANT8_ASYMM: {
            return evalQuant8(context->getInputBuffer<uint8_t>(kInputTensor),
                              context->getInputShape(kInputTensor),
                              context->getInputBuffer<uint8_t>(kAlphaTensor),
                              context->getInputShape(kAlphaTensor),
                              context->getOutputBuffer<uint8_t>(kOutputTensor),
                              context->getOutputShape(kOutputTensor));
        }
        case OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
            return evalQuant8(context->getInputBuffer<int8_t>(kInputTensor),
                              context->getInputShape(kInputTensor),
                              context->getInputBuffer<int8_t>(kAlphaTensor),
                              context->getInputShape(kAlphaTensor),
                              context->getOutputBuffer<int8_t>(kOutputTensor),
                              context->getOutputShape(kOutputTensor));
        }
        default:
            NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
    }
}
#endif  // NN_INCLUDE_CPU_IMPLEMENTATION

}  // namespace prelu

NN_REGISTER_OPERATION_DEFAULT_VALIDATION(PRELU, prelu::prepare, prelu::execute);

}  // namespace nn
}  // namespace android
