import operator_benchmark as op_bench

import torch


# 2D pooling will have input matrix of rank 3 or 4
qpool2d_long_configs = op_bench.config_list(
    attrs=(
        #  C    H    W   k       s       p
        (1, 3, 3, (3, 3), (1, 1), (0, 0)),  # dummy        # noqa: E201,E241
        (3, 64, 64, (3, 3), (2, 2), (1, 1)),  # dummy        # noqa: E201,E241
        # VGG16 pools with original input shape: (-1, 3, 224, 224)
        (64, 224, 224, (2, 2), (2, 2), (0, 0)),  # MaxPool2d-4  # noqa: E201
        (256, 56, 56, (2, 2), (2, 2), (0, 0)),  # MaxPool2d-16 # noqa: E241
    ),
    attr_names=("C", "H", "W", "k", "s", "p"),  # Input layout  # Pooling parameters
    cross_product_configs={
        "N": (1, 4),
        "contig": (False, True),
        "dtype": (torch.quint8,),
    },
    tags=("long",),
)

qpool2d_short_configs = op_bench.config_list(
    attrs=((1, 3, 3, (3, 3), (1, 1), (0, 0)),),  # dummy
    attr_names=("C", "H", "W", "k", "s", "p"),  # Input layout  # Pooling parameters
    cross_product_configs={
        "N": (2,),
        "contig": (True,),
        "dtype": (torch.qint32, torch.qint8, torch.quint8),
    },
    tags=("short",),
)

qadaptive_avgpool2d_long_configs = op_bench.cross_product_configs(
    input_size=(
        # VGG16 pools with original input shape: (-1, 3, 224, 224)
        (112, 112),  # MaxPool2d-9
    ),
    output_size=(
        (448, 448),
        # VGG16 pools with original input shape: (-1, 3, 224, 224)
        (224, 224),  # MaxPool2d-4
        (112, 112),  # MaxPool2d-9
        (56, 56),  # MaxPool2d-16 # noqa: E201,E241
        (14, 14),  # MaxPool2d-30 # noqa: E201,E241
    ),
    N=(1, 4),
    C=(1, 3, 64, 128),
    contig=(False, True),
    dtype=(torch.quint8,),
    tags=("long",),
)

qadaptive_avgpool2d_short_configs = op_bench.config_list(
    attrs=((4, 3, (224, 224), (112, 112), True),),
    attr_names=("N", "C", "input_size", "output_size", "contig"),
    cross_product_configs={
        "dtype": (torch.qint32, torch.qint8, torch.quint8),
    },
    tags=("short",),
)


class _QPool2dBenchmarkBase(op_bench.TorchBenchmarkBase):
    def setup(self, N, C, H, W, dtype, contig):
        # Input
        if N == 0:
            f_input = (torch.rand(C, H, W) - 0.5) * 256
        else:
            f_input = (torch.rand(N, C, H, W) - 0.5) * 256

        scale = 1.0
        zero_point = 0

        # Quantize the tensor
        self.q_input = torch.quantize_per_tensor(
            f_input, scale=scale, zero_point=zero_point, dtype=dtype
        )
        if not contig:
            # Permute into NHWC and back to make it non-contiguous
            if N == 0:
                self.q_input = self.q_input.permute(1, 2, 0).contiguous()
                self.q_input = self.q_input.permute(2, 0, 1)
            else:
                self.q_input = self.q_input.permute(0, 2, 3, 1).contiguous()
                self.q_input = self.q_input.permute(0, 3, 1, 2)

        self.inputs = {"q_input": self.q_input}

    def forward(self, q_input):
        return self.pool_op(q_input)


class QMaxPool2dBenchmark(_QPool2dBenchmarkBase):
    def init(self, N, C, H, W, k, s, p, contig, dtype):
        self.pool_op = torch.nn.MaxPool2d(
            kernel_size=k,
            stride=s,
            padding=p,
            dilation=(1, 1),
            ceil_mode=False,
            return_indices=False,
        )
        super().setup(N, C, H, W, dtype, contig)


class QAvgPool2dBenchmark(_QPool2dBenchmarkBase):
    def init(self, N, C, H, W, k, s, p, contig, dtype):
        self.pool_op = torch.nn.AvgPool2d(
            kernel_size=k, stride=s, padding=p, ceil_mode=False
        )
        super().setup(N, C, H, W, dtype, contig)


class QAdaptiveAvgPool2dBenchmark(_QPool2dBenchmarkBase):
    def init(self, N, C, input_size, output_size, contig, dtype):
        self.pool_op = torch.nn.AdaptiveAvgPool2d(output_size=output_size)
        super().setup(N, C, *input_size, dtype=dtype, contig=contig)


op_bench.generate_pt_test(
    qadaptive_avgpool2d_short_configs + qadaptive_avgpool2d_long_configs,
    QAdaptiveAvgPool2dBenchmark,
)
op_bench.generate_pt_test(
    qpool2d_short_configs + qpool2d_long_configs, QAvgPool2dBenchmark
)
op_bench.generate_pt_test(
    qpool2d_short_configs + qpool2d_long_configs, QMaxPool2dBenchmark
)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()