/*
 * Copyright © 2023 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */
#include "iris/iris_kmd_backend.h"

#include <sys/mman.h>

#include "common/intel_debug_identifier.h"
#include "common/intel_gem.h"
#include "common/i915/intel_gem.h"
#include "dev/intel_debug.h"

#include "drm-uapi/i915_drm.h"

#include "iris/iris_bufmgr.h"
#include "iris/iris_batch.h"
#include "iris/iris_context.h"

#define FILE_DEBUG_FLAG DEBUG_BUFMGR

static int
i915_gem_set_domain(struct iris_bufmgr *bufmgr, uint32_t handle,
                    uint32_t read_domains, uint32_t write_domains)
{
   struct drm_i915_gem_set_domain sd = {
      .handle = handle,
      .read_domains = read_domains,
      .write_domain = write_domains,
   };
   return intel_ioctl(iris_bufmgr_get_fd(bufmgr),
                      DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
}

static uint32_t
i915_gem_create(struct iris_bufmgr *bufmgr,
                const struct intel_memory_class_instance **regions,
                uint16_t regions_count, uint64_t size,
                enum iris_heap heap, unsigned alloc_flags)
{
   const struct intel_device_info *devinfo =
      iris_bufmgr_get_device_info(bufmgr);
   if (unlikely(!devinfo->mem.use_class_instance)) {
      struct drm_i915_gem_create create_legacy = { .size = size };

      assert(regions_count == 1 &&
             regions[0]->klass == I915_MEMORY_CLASS_SYSTEM);

      /* All new BOs we get from the kernel are zeroed, so we don't need to
       * worry about that here.
       */
      if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE,
                      &create_legacy))
         return 0;

      return create_legacy.handle;
   }

   struct drm_i915_gem_memory_class_instance i915_regions[2];
   assert(regions_count <= ARRAY_SIZE(i915_regions));
   for (uint16_t i = 0; i < regions_count; i++) {
      i915_regions[i].memory_class = regions[i]->klass;
      i915_regions[i].memory_instance = regions[i]->instance;
   }

   struct drm_i915_gem_create_ext create = {
      .size = size,
   };
   struct drm_i915_gem_create_ext_memory_regions ext_regions = {
      .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
      .num_regions = regions_count,
      .regions = (uintptr_t)i915_regions,
   };
   intel_i915_gem_add_ext(&create.extensions,
                          I915_GEM_CREATE_EXT_MEMORY_REGIONS,
                          &ext_regions.base);

   if (iris_bufmgr_vram_size(bufmgr) > 0 &&
       !intel_vram_all_mappable(devinfo) &&
       heap == IRIS_HEAP_DEVICE_LOCAL_PREFERRED)
      /* For lmem + smem placements, the NEEDS_CPU_ACCESS flag will avoid a
       * page fault when the CPU tries to access the BO.
       * Although it's counterintuitive, we cannot set this flag for
       * IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR because i915 does not
       * accept that flag for lmem only placements.
       * When lmem only BOs are accessed by the CPU, i915 will fault and
       * automatically migrate the BO to the lmem portion that is CPU
       * accessible.
       * The CPU_VISIBLE heap is still valuable for other reasons however
       * (e.g., it tells the functions which calculate the iris_mmap_mode
       * that it can be mapped).
       */
      create.flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;

   /* Protected param */
   struct drm_i915_gem_create_ext_protected_content protected_param = {
      .flags = 0,
   };
   if (alloc_flags & BO_ALLOC_PROTECTED) {
      intel_i915_gem_add_ext(&create.extensions,
                             I915_GEM_CREATE_EXT_PROTECTED_CONTENT,
                             &protected_param.base);
   }

   /* Set PAT param */
   struct drm_i915_gem_create_ext_set_pat set_pat_param = { 0 };
   if (devinfo->has_set_pat_uapi) {
      set_pat_param.pat_index = iris_heap_to_pat_entry(devinfo, heap)->index;
      intel_i915_gem_add_ext(&create.extensions,
                             I915_GEM_CREATE_EXT_SET_PAT,
                             &set_pat_param.base);
   }

   if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE_EXT,
                   &create))
      return 0;

   if (iris_bufmgr_vram_size(bufmgr) == 0)
      /* Calling set_domain() will allocate pages for the BO outside of the
       * struct mutex lock in the kernel, which is more efficient than waiting
       * to create them during the first execbuf that uses the BO.
       */
      i915_gem_set_domain(bufmgr, create.handle, I915_GEM_DOMAIN_CPU, 0);

   return create.handle;
}

static bool
i915_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
{
   uint32_t i915_state = state == IRIS_MADVICE_WILL_NEED ?
                                  I915_MADV_WILLNEED : I915_MADV_DONTNEED;
   struct drm_i915_gem_madvise madv = {
      .handle = bo->gem_handle,
      .madv = i915_state,
      .retained = 1,
   };

   intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_I915_GEM_MADVISE, &madv);

   return madv.retained;
}

static int
i915_bo_set_caching(struct iris_bo *bo, bool cached)
{
   struct drm_i915_gem_caching arg = {
      .handle = bo->gem_handle,
      .caching = cached ? I915_CACHING_CACHED : I915_CACHING_NONE,
   };
   return intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr),
                      DRM_IOCTL_I915_GEM_SET_CACHING, &arg);
}

static void *
i915_gem_mmap_offset(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
{
   struct drm_i915_gem_mmap_offset mmap_arg = {
      .handle = bo->gem_handle,
   };

   if (iris_bufmgr_get_device_info(bufmgr)->has_local_mem) {
      /* On discrete memory platforms, we cannot control the mmap caching mode
       * at mmap time.  Instead, it's fixed when the object is created (this
       * is a limitation of TTM).
       *
       * On DG1, our only currently enabled discrete platform, there is no
       * control over what mode we get.  For SMEM, we always get WB because
       * it's fast (probably what we want) and when the device views SMEM
       * across PCIe, it's always snooped.  The only caching mode allowed by
       * DG1 hardware for LMEM is WC.
       */
      if (iris_heap_is_device_local(bo->real.heap))
         assert(bo->real.mmap_mode == IRIS_MMAP_WC);
      else
         assert(bo->real.mmap_mode == IRIS_MMAP_WB);

      mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
   } else {
      /* Only integrated platforms get to select a mmap caching mode here */
      static const uint32_t mmap_offset_for_mode[] = {
         [IRIS_MMAP_UC]    = I915_MMAP_OFFSET_UC,
         [IRIS_MMAP_WC]    = I915_MMAP_OFFSET_WC,
         [IRIS_MMAP_WB]    = I915_MMAP_OFFSET_WB,
      };
      assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
      assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
      mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
   }

   /* Get the fake offset back */
   if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP_OFFSET,
                   &mmap_arg)) {
      DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
      return NULL;
   }

   /* And map it */
   void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
                    iris_bufmgr_get_fd(bufmgr), mmap_arg.offset);
   if (map == MAP_FAILED) {
      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
      return NULL;
   }

   return map;
}

static void *
i915_gem_mmap_legacy(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
{
   assert(iris_bufmgr_vram_size(bufmgr) == 0);
   assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
          bo->real.mmap_mode == IRIS_MMAP_WC);

   struct drm_i915_gem_mmap mmap_arg = {
      .handle = bo->gem_handle,
      .size = bo->size,
      .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
   };

   if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP,
                   &mmap_arg)) {
      DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
          __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
      return NULL;
   }

   return (void *)(uintptr_t) mmap_arg.addr_ptr;
}

static void *
i915_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
{
   assert(iris_bo_is_real(bo));

   if (likely(iris_bufmgr_get_device_info(bufmgr)->has_mmap_offset))
      return i915_gem_mmap_offset(bufmgr, bo);
   else
      return i915_gem_mmap_legacy(bufmgr, bo);
}

static enum pipe_reset_status
i915_batch_check_for_reset(struct iris_batch *batch)
{
   struct iris_screen *screen = batch->screen;
   enum pipe_reset_status status = PIPE_NO_RESET;
   struct drm_i915_reset_stats stats = { .ctx_id = batch->i915.ctx_id };

   if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
      DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));

   if (stats.batch_active != 0) {
      /* A reset was observed while a batch from this hardware context was
       * executing.  Assume that this context was at fault.
       */
      status = PIPE_GUILTY_CONTEXT_RESET;
   } else if (stats.batch_pending != 0) {
      /* A reset was observed while a batch from this context was in progress,
       * but the batch was not executing.  In this case, assume that the
       * context was not at fault.
       */
      status = PIPE_INNOCENT_CONTEXT_RESET;
   }

   return status;
}

/**
 * Submit the batch to the GPU via execbuffer2.
 */
static int
i915_batch_submit(struct iris_batch *batch)
{
   struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
   simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);

   iris_bo_unmap(batch->bo);

   struct drm_i915_gem_exec_object2 *validation_list =
      malloc(batch->exec_count * sizeof(*validation_list));

   size_t sz = (batch->max_gem_handle + 1) * sizeof(int);
   int *index_for_handle = malloc(sz);
   memset(index_for_handle, -1, sz);

   unsigned validation_count = 0;
   for (int i = 0; i < batch->exec_count; i++) {
      struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]);
      assert(bo->gem_handle != 0);

      bool written = BITSET_TEST(batch->bos_written, i);
      int prev_index = index_for_handle[bo->gem_handle];
      if (prev_index != -1) {
         if (written)
            validation_list[prev_index].flags |= EXEC_OBJECT_WRITE;
      } else {
         uint32_t flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
         flags |= bo->real.capture ? EXEC_OBJECT_CAPTURE : 0;
         flags |= bo == batch->screen->workaround_bo ? EXEC_OBJECT_ASYNC : 0;
         flags |= iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC;
         flags |= written ? EXEC_OBJECT_WRITE : 0;

         index_for_handle[bo->gem_handle] = validation_count;
         validation_list[validation_count] =
            (struct drm_i915_gem_exec_object2) {
               .handle = bo->gem_handle,
               .offset = bo->address,
               .flags  = flags,
            };
         ++validation_count;
      }
   }

   free(index_for_handle);

   /* The decode operation may map and wait on the batch buffer, which could
    * in theory try to grab bo_deps_lock. Let's keep it safe and decode
    * outside the lock.
    */
   if (INTEL_DEBUG(DEBUG_BATCH) &&
       intel_debug_batch_in_range(batch->ice->frame))
      iris_batch_decode_batch(batch);

   simple_mtx_lock(bo_deps_lock);

   iris_batch_update_syncobjs(batch);

   if ((INTEL_DEBUG(DEBUG_BATCH) &&
        intel_debug_batch_in_range(batch->ice->frame)) ||
       INTEL_DEBUG(DEBUG_SUBMIT)) {
      iris_dump_fence_list(batch);
      iris_dump_bo_list(batch);
   }

   /* The requirement for using I915_EXEC_NO_RELOC are:
    *
    *   The addresses written in the objects must match the corresponding
    *   reloc.address which in turn must match the corresponding
    *   execobject.offset.
    *
    *   Any render targets written to in the batch must be flagged with
    *   EXEC_OBJECT_WRITE.
    *
    *   To avoid stalling, execobject.offset should match the current
    *   address of that object within the active context.
    */
   struct drm_i915_gem_execbuffer2 execbuf = {
      .buffers_ptr = (uintptr_t) validation_list,
      .buffer_count = validation_count,
      .batch_start_offset = 0,
      /* This must be QWord aligned. */
      .batch_len = ALIGN(batch->primary_batch_size, 8),
      .flags = batch->i915.exec_flags |
               I915_EXEC_NO_RELOC |
               I915_EXEC_BATCH_FIRST |
               I915_EXEC_HANDLE_LUT,
      .rsvd1 = batch->i915.ctx_id, /* rsvd1 is actually the context ID */
   };

   if (iris_batch_num_fences(batch)) {
      execbuf.flags |= I915_EXEC_FENCE_ARRAY;
      execbuf.num_cliprects = iris_batch_num_fences(batch);
      execbuf.cliprects_ptr =
         (uintptr_t)util_dynarray_begin(&batch->exec_fences);
   }

   int ret = 0;
   if (!batch->screen->devinfo->no_hw) {
      do {
         ret = intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
      } while (ret && errno == ENOMEM);

      if (ret)
    ret = -errno;
   }

   simple_mtx_unlock(bo_deps_lock);

   for (int i = 0; i < batch->exec_count; i++) {
      struct iris_bo *bo = batch->exec_bos[i];

      bo->idle = false;
      bo->index = -1;

      iris_get_backing_bo(bo)->idle = false;

      iris_bo_unreference(bo);
   }

   free(validation_list);

   return ret;
}

static bool
i915_gem_vm_bind(struct iris_bo *bo)
{
   /*
    * i915 does not support VM_BIND yet. The binding operation happens at
    * submission when we supply BO handle & offset in the execbuffer list.
    */
   return true;
}

static bool
i915_gem_vm_unbind(struct iris_bo *bo)
{
   return true;
}

static int
i915_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
{
   struct drm_gem_close close = {
      .handle = bo->gem_handle,
   };
   return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
}

static uint32_t
i915_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
{
   const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
   struct drm_i915_gem_userptr arg = {
      .user_ptr = (uintptr_t)ptr,
      .user_size = size,
      .flags = devinfo->has_userptr_probe ? I915_USERPTR_PROBE : 0,
   };
   if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_USERPTR, &arg))
      return 0;

   if (!devinfo->has_userptr_probe) {
      /* Check the buffer for validity before we try and use it in a batch */
      if (i915_gem_set_domain(bufmgr, arg.handle, I915_GEM_DOMAIN_CPU, 0)) {
         struct drm_gem_close close = {
               .handle = arg.handle,
         };
         intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
         return 0;
      }
   }

   return arg.handle;
}

const struct iris_kmd_backend *i915_get_backend(void)
{
   static const struct iris_kmd_backend i915_backend = {
      .gem_create = i915_gem_create,
      .gem_create_userptr = i915_gem_create_userptr,
      .gem_close = i915_gem_close,
      .bo_madvise = i915_bo_madvise,
      .bo_set_caching = i915_bo_set_caching,
      .gem_mmap = i915_gem_mmap,
      .batch_check_for_reset = i915_batch_check_for_reset,
      .batch_submit = i915_batch_submit,
      .gem_vm_bind = i915_gem_vm_bind,
      .gem_vm_unbind = i915_gem_vm_unbind,
   };
   return &i915_backend;
}