/*
 * Copyright (c) 2022 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#ifndef INTEL_GENX_STATE_BRW_H
#define INTEL_GENX_STATE_BRW_H

#ifndef GFX_VERx10
#error This file should only be included by genX files.
#endif

#include <stdbool.h>

#include "dev/intel_device_info.h"
#include "genxml/gen_macros.h"

#ifdef __cplusplus
extern "C" {
#endif

#if GFX_VER >= 7

static inline void
intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
                            const struct intel_device_info *devinfo,
                            const struct brw_wm_prog_data *prog_data,
                            unsigned rasterization_samples,
                            enum intel_msaa_flags msaa_flags)
{
   assert(rasterization_samples != 0);

   bool enable_8  = prog_data->dispatch_8;
   bool enable_16 = prog_data->dispatch_16;
   bool enable_32 = prog_data->dispatch_32;
   uint8_t dispatch_multi = prog_data->dispatch_multi;

#if GFX_VER >= 20
   if (ps->RenderTargetFastClearEnable) {
      /* Bspec 57340 (r59562):
       *
       *   Clearing shader must use SIMD16 dispatch mode.
       *
       * The spec doesn't state if a fast-clear shader can be multi-poly. We
       * just assume it can't.
       */
      assert(enable_16);
      enable_32 = enable_8 = false;
      dispatch_multi = 0;
   }
#elif GFX_VER >= 9
   /* SKL PRMs, Volume 2a: Command Reference: Instructions:
    *    3DSTATE_PS_BODY::8 Pixel Dispatch Enable:
    *
    *    "When Render Target Fast Clear Enable is ENABLED or Render Target
    *     Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit must be
    *     DISABLED."
    */
   if (ps->RenderTargetFastClearEnable ||
       ps->RenderTargetResolveType == RESOLVE_PARTIAL ||
       ps->RenderTargetResolveType == RESOLVE_FULL)
      enable_8 = false;
#elif GFX_VER == 8
   /* BDW has the same wording as SKL, except some of the fields mentioned
    * don't exist...
    */
   if (ps->RenderTargetFastClearEnable ||
       ps->RenderTargetResolveEnable)
      enable_8 = false;
#endif

   const bool is_persample_dispatch =
      brw_wm_prog_data_is_persample(prog_data, msaa_flags);

   if (is_persample_dispatch) {
      /* TGL PRMs, Volume 2d: Command Reference: Structures:
       *    3DSTATE_PS_BODY::32 Pixel Dispatch Enable:
       *
       *    "Must not be enabled when dispatch rate is sample AND NUM_MULTISAMPLES > 1."
       */
      if (GFX_VER >= 12 && rasterization_samples > 1)
         enable_32 = false;

      /* Starting with SandyBridge (where we first get MSAA), the different
       * pixel dispatch combinations are grouped into classifications A
       * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1).  On most hardware
       * generations, the only configurations supporting persample dispatch
       * are those in which only one dispatch width is enabled.
       *
       * The Gfx12 hardware spec has a similar dispatch grouping table, but
       * the following conflicting restriction applies (from the page on
       * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader:
       *
       *  "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also
       *   enabled."
       */
      if (enable_32 || enable_16)
         enable_8 = false;
      if (GFX_VER < 12 && enable_32)
         enable_16 = false;
   }

   /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say:
    *
    *    "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16,
    *     SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch
    *     mode."
    *
    * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8.
    */
   if (GFX_VER >= 9 && rasterization_samples == 16 && !is_persample_dispatch) {
      assert(enable_8 || enable_16);
      enable_32 = false;
   }

   assert(enable_8 || enable_16 || enable_32 ||
          (GFX_VER >= 12 && dispatch_multi));
   assert(!dispatch_multi || (GFX_VER >= 12 && !enable_8));

#if GFX_VER >= 20
   if (dispatch_multi) {
      ps->Kernel0Enable = true;
      ps->Kernel0SIMDWidth = (dispatch_multi == 32 ? PS_SIMD32 : PS_SIMD16);
      ps->Kernel0MaximumPolysperThread =
         prog_data->max_polygons - 1;
      switch (dispatch_multi / prog_data->max_polygons) {
      case 8:
         ps->Kernel0PolyPackingPolicy = POLY_PACK8_FIXED;
         break;
      case 16:
         ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED;
         break;
      default:
         unreachable("Invalid polygon width");
      }
   } else if (enable_16) {
      ps->Kernel0Enable = true;
      ps->Kernel0SIMDWidth = PS_SIMD16;
      ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED;
   }

   if (enable_32) {
      ps->Kernel1Enable = true;
      ps->Kernel1SIMDWidth = PS_SIMD32;
   } else if (enable_16 && dispatch_multi == 16) {
      ps->Kernel1Enable = true;
      ps->Kernel1SIMDWidth = PS_SIMD16;
   }
#else
   ps->_8PixelDispatchEnable = enable_8 || (GFX_VER == 12 && dispatch_multi);
   ps->_16PixelDispatchEnable = enable_16;
   ps->_32PixelDispatchEnable = enable_32;
#endif
}

#endif

#ifdef __cplusplus
}
#endif

#endif /* INTEL_GENX_STATE_BRW_H */
