/*
 * Copyright © 2022 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "anv_nir.h"
#include "nir_builder.h"

/* This pass updates the block index in the resource_intel intrinsics if the
 * array index is constant.
 *
 * This pass must be run before anv_nir_compute_push_layout().
 */
static bool
update_resource_intel_block(nir_builder *b, nir_intrinsic_instr *intrin,
                            UNUSED void *data)
{
   if (intrin->intrinsic != nir_intrinsic_resource_intel)
      return false;

   /* If the array index in the descriptor binding is not const, we won't be
    * able to turn this load_ubo into a push constant.
    *
    * Also if not pushable, set the block to 0xffffffff.
    *
    * Otherwise we need to update the block index by adding the array index so
    * that when anv_nir_compute_push_layout() uses the block value it uses the
    * right surface in the array of the binding.
    */
   if (!nir_src_is_const(intrin->src[2]) ||
       !(nir_intrinsic_resource_access_intel(intrin) &
         nir_resource_intel_pushable)) {
      nir_intrinsic_set_resource_block_intel(intrin, 0xffffffff);
      nir_intrinsic_set_resource_access_intel(
         intrin,
         nir_intrinsic_resource_access_intel(intrin) &
         ~nir_resource_intel_pushable);
   } else {
      nir_intrinsic_set_resource_block_intel(
         intrin,
         nir_intrinsic_resource_block_intel(intrin) +
         nir_src_as_uint(intrin->src[2]));
   }

   return true;
}

bool
anv_nir_update_resource_intel_block(nir_shader *shader)
{
   return nir_shader_intrinsics_pass(shader, update_resource_intel_block,
                                       nir_metadata_all,
                                       NULL);
}

struct lower_resource_state {
   enum anv_descriptor_set_layout_type desc_type;
   const struct anv_physical_device *device;
};

/* This pass lower resource_intel surface_index source, combining the
 * descriptor set offset with the surface offset in the descriptor set.
 *
 * This pass must be run after anv_nir_compute_push_layout() because we want
 * the push constant selection to tell if the surface offset is constant. Once
 * combined the constant detection does not work anymore.
 */
static bool
lower_resource_intel(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
{
   if (intrin->intrinsic != nir_intrinsic_resource_intel)
      return false;

   const bool is_bindless =
      (nir_intrinsic_resource_access_intel(intrin) &
       nir_resource_intel_bindless) != 0;
   const bool is_sampler =
      (nir_intrinsic_resource_access_intel(intrin) &
       nir_resource_intel_sampler) != 0;
   const bool is_embedded_sampler =
      (nir_intrinsic_resource_access_intel(intrin) &
       nir_resource_intel_sampler_embedded) != 0;
   const struct lower_resource_state *state = data;

   /* Ignore binding table accesses & embedded samplers */
   if (is_embedded_sampler) {
      assert(state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER);
      return false;
   }

   if (!is_bindless)
      return true;

   b->cursor = nir_before_instr(&intrin->instr);

   nir_def *set_offset = intrin->src[0].ssa;
   nir_def *binding_offset = intrin->src[1].ssa;

   /* When using indirect descriptor, the surface handles are loaded from the
    * descriptor buffer and do not need any offset.
    */
   if (state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_DIRECT ||
       state->desc_type == ANV_PIPELINE_DESCRIPTOR_SET_LAYOUT_TYPE_BUFFER) {
      if (!state->device->uses_ex_bso) {
         /* We're trying to reduce the number of instructions in the shaders
          * to compute surface handles. The assumption is that we're using
          * more surface handles than sampler handles (UBO, SSBO, images,
          * etc...) so it's worth optimizing that case.
          *
          * Surface handles in the extended descriptor message have to be
          * shifted left by 6 prior to ex_bso (bits 31:12 in extended
          * descriptor, match bits 25:6 of the surface handle). We have to
          * combine 2 parts in the shader to build the final surface handle,
          * base offset of the descriptor set (in the push constant, located
          * in resource_intel::src[0]) and the relative descriptor offset
          * (resource_intel::src[1]).
          *
          * For convenience, up to here, resource_intel::src[1] is in bytes.
          * We now have to shift it left by 6 to match the shifted left by 6
          * done for the push constant value provided in
          * resource_intel::src[0]. That way the shader can just do a single
          * ADD and get the surface handle.
          */
         if (!is_sampler)
            binding_offset = nir_ishl_imm(b, binding_offset, 6);
      }

      nir_src_rewrite(&intrin->src[1],
                      nir_iadd(b, set_offset, binding_offset));
   }

   /* Now unused values : set offset, array index */
   nir_src_rewrite(&intrin->src[0], nir_imm_int(b, 0xdeaddeed));
   nir_src_rewrite(&intrin->src[2], nir_imm_int(b, 0xdeaddeed));

   return true;
}

bool
anv_nir_lower_resource_intel(nir_shader *shader,
                             const struct anv_physical_device *device,
                             enum anv_descriptor_set_layout_type desc_type)
{
   struct lower_resource_state state = {
      .desc_type = desc_type,
      .device = device,
   };
   return nir_shader_intrinsics_pass(shader, lower_resource_intel,
                                       nir_metadata_control_flow,
                                       &state);
}
