/*
 * Copyright © Microsoft Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "dzn_private.h"

#include "spirv/nir_spirv.h"

#include "dxil_nir.h"
#include "nir_to_dxil.h"
#include "dxil_spirv_nir.h"
#include "spirv_to_dxil.h"

#include "dxil_validator.h"

#include "vk_alloc.h"
#include "vk_util.h"
#include "vk_format.h"
#include "vk_pipeline.h"
#include "vk_pipeline_cache.h"

#include "util/u_debug.h"

#define d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, __id, __type, __desc) \
   __type *__desc; \
   do { \
      struct { \
         D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type; \
         __type desc; \
      } *__wrapper; \
      (__stream)->SizeInBytes = ALIGN_POT((__stream)->SizeInBytes, alignof(void *)); \
      __wrapper = (void *)((uint8_t *)(__stream)->pPipelineStateSubobjectStream + (__stream)->SizeInBytes); \
      (__stream)->SizeInBytes += sizeof(*__wrapper); \
      assert((__stream)->SizeInBytes <= __maxstreamsz); \
      __wrapper->type = __id; \
      __desc = &__wrapper->desc; \
      memset(__desc, 0, sizeof(*__desc)); \
   } while (0)

#define d3d12_pipeline_state_stream_new_desc_abbrev(__stream, __maxstreamsz, __id, __type, __desc) \
   d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ ## __id, __type, __desc)

#define d3d12_gfx_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
   d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)

#define d3d12_compute_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
   d3d12_pipeline_state_stream_new_desc_abbrev(__stream, MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)

static bool
gfx_pipeline_variant_key_equal(const void *a, const void *b)
{
   return !memcmp(a, b, sizeof(struct dzn_graphics_pipeline_variant_key));
}

static uint32_t
gfx_pipeline_variant_key_hash(const void *key)
{
   return _mesa_hash_data(key, sizeof(struct dzn_graphics_pipeline_variant_key));
}

static bool
gfx_pipeline_cmd_signature_key_equal(const void *a, const void *b)
{
   return !memcmp(a, b, sizeof(struct dzn_indirect_draw_cmd_sig_key));
}

static uint32_t
gfx_pipeline_cmd_signature_key_hash(const void *key)
{
   return _mesa_hash_data(key, sizeof(struct dzn_indirect_draw_cmd_sig_key));
}

struct dzn_cached_blob {
   struct vk_pipeline_cache_object base;
   uint8_t hash[SHA1_DIGEST_LENGTH];
   const void *data;
   size_t size;
};

static bool
dzn_cached_blob_serialize(struct vk_pipeline_cache_object *object,
                          struct blob *blob)
{
   struct dzn_cached_blob *cached_blob =
      container_of(object, struct dzn_cached_blob, base);

   blob_write_bytes(blob, cached_blob->data, cached_blob->size);
   return true;
}

static void
dzn_cached_blob_destroy(struct vk_device *device,
                        struct vk_pipeline_cache_object *object)
{
   struct dzn_cached_blob *shader =
      container_of(object, struct dzn_cached_blob, base);

   vk_free(&device->alloc, shader);
}

static struct vk_pipeline_cache_object *
dzn_cached_blob_create(struct vk_device *device,
                       const void *hash,
                       const void *data,
                       size_t data_size);

static struct vk_pipeline_cache_object *
dzn_cached_blob_deserialize(struct vk_pipeline_cache *cache,
                            const void *key_data, size_t key_size,
                            struct blob_reader *blob)
{
   size_t data_size = blob->end - blob->current;
   assert(key_size == SHA1_DIGEST_LENGTH);

   return dzn_cached_blob_create(cache->base.device, key_data,
                                 blob_read_bytes(blob, data_size), data_size);
}

const struct vk_pipeline_cache_object_ops dzn_cached_blob_ops = {
   .serialize = dzn_cached_blob_serialize,
   .deserialize = dzn_cached_blob_deserialize,
   .destroy = dzn_cached_blob_destroy,
};


static struct vk_pipeline_cache_object *
dzn_cached_blob_create(struct vk_device *device,
                       const void *hash,
                       const void *data,
                       size_t data_size)
{
   VK_MULTIALLOC(ma);
   VK_MULTIALLOC_DECL(&ma, struct dzn_cached_blob, blob, 1);
   VK_MULTIALLOC_DECL(&ma, uint8_t, copy, data_size);

   if (!vk_multialloc_alloc(&ma, &device->alloc,
                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
      return NULL;

   memcpy(blob->hash, hash, sizeof(blob->hash));

   vk_pipeline_cache_object_init(device, &blob->base,
                                 &dzn_cached_blob_ops,
                                 blob->hash, sizeof(blob->hash));

   if (data)
      memcpy(copy, data, data_size);
   blob->data = copy;
   blob->size = data_size;

   return &blob->base;
}

static VkResult
dzn_graphics_pipeline_prepare_for_variants(struct dzn_device *device,
                                           struct dzn_graphics_pipeline *pipeline)
{
   if (pipeline->variants)
      return VK_SUCCESS;

   pipeline->variants =
      _mesa_hash_table_create(NULL,
                              gfx_pipeline_variant_key_hash,
                              gfx_pipeline_variant_key_equal);
   if (!pipeline->variants)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   return VK_SUCCESS;
}

static dxil_spirv_shader_stage
to_dxil_shader_stage(VkShaderStageFlagBits in)
{
   switch (in) {
   case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
   case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
   case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
   case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
   case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
   case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
   default: unreachable("Unsupported stage");
   }
}

struct dzn_nir_options {
   enum dxil_spirv_yz_flip_mode yz_flip_mode;
   uint16_t y_flip_mask, z_flip_mask;
   bool force_sample_rate_shading;
   bool lower_view_index;
   bool lower_view_index_to_rt_layer;
   enum pipe_format *vi_conversions;
   const nir_shader_compiler_options *nir_opts;
};

static VkResult
dzn_pipeline_get_nir_shader(struct dzn_device *device,
                            const struct dzn_pipeline_layout *layout,
                            struct vk_pipeline_cache *cache,
                            const uint8_t *hash,
                            VkPipelineCreateFlags2KHR pipeline_flags,
                            const VkPipelineShaderStageCreateInfo *stage_info,
                            gl_shader_stage stage,
                            const struct dzn_nir_options *options,
                            struct dxil_spirv_metadata *metadata,
                            nir_shader **nir)
{
   if (cache) {
      *nir = vk_pipeline_cache_lookup_nir(cache, hash, SHA1_DIGEST_LENGTH,
                                          options->nir_opts, NULL, NULL);
      if (*nir) {
         /* This bit is explicitly added into the info before caching, since this sysval wouldn't
          * actually be present for this bit to be set by info gathering. */
         if ((*nir)->info.stage == MESA_SHADER_VERTEX &&
             BITSET_TEST((*nir)->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX))
            metadata->needs_draw_sysvals = true;
         return VK_SUCCESS;
      }
   }

   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);
   const struct spirv_to_nir_options *spirv_opts = dxil_spirv_nir_get_spirv_options();

   VkResult result =
      vk_pipeline_shader_stage_to_nir(&device->vk, pipeline_flags, stage_info,
                                      spirv_opts, options->nir_opts, NULL, nir);
   if (result != VK_SUCCESS)
      return result;

   struct dxil_spirv_runtime_conf conf = {
      .runtime_data_cbv = {
         .register_space = DZN_REGISTER_SPACE_SYSVALS,
         .base_shader_register = 0,
      },
      .push_constant_cbv = {
         .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
         .base_shader_register = 0,
      },
      .first_vertex_and_base_instance_mode = pdev->options21.ExtendedCommandInfoSupported ?
            DXIL_SPIRV_SYSVAL_TYPE_NATIVE : DXIL_SPIRV_SYSVAL_TYPE_RUNTIME_DATA,
      .workgroup_id_mode = DXIL_SPIRV_SYSVAL_TYPE_RUNTIME_DATA,
      .yz_flip = {
         .mode = options->yz_flip_mode,
         .y_mask = options->y_flip_mask,
         .z_mask = options->z_flip_mask,
      },
      .declared_read_only_images_as_srvs = !device->bindless,
      .inferred_read_only_images_as_srvs = !device->bindless,
      .force_sample_rate_shading = options->force_sample_rate_shading,
      .lower_view_index = options->lower_view_index,
      .lower_view_index_to_rt_layer = options->lower_view_index_to_rt_layer,
      .shader_model_max = dzn_get_shader_model(pdev),
   };

   dxil_spirv_nir_passes(*nir, &conf, metadata);

   if (stage == MESA_SHADER_VERTEX) {
      bool needs_conv = false;
      for (uint32_t i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
         if (options->vi_conversions[i] != PIPE_FORMAT_NONE)
            needs_conv = true;
      }

      if (needs_conv)
         NIR_PASS_V(*nir, dxil_nir_lower_vs_vertex_conversion, options->vi_conversions);
   }

   if (cache) {
      /* Cache this additional metadata */
      if (metadata->needs_draw_sysvals)
         BITSET_SET((*nir)->info.system_values_read, SYSTEM_VALUE_FIRST_VERTEX);
      vk_pipeline_cache_add_nir(cache, hash, SHA1_DIGEST_LENGTH, *nir);
   }

   return VK_SUCCESS;
}

static bool
adjust_resource_index_binding(struct nir_builder *builder,
                              nir_intrinsic_instr *intrin,
                              void *cb_data)
{
   if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
      return false;

   const struct dzn_pipeline_layout *layout = cb_data;
   unsigned set = nir_intrinsic_desc_set(intrin);
   unsigned binding = nir_intrinsic_binding(intrin);

   if (set >= layout->set_count ||
       binding >= layout->binding_translation[set].binding_count)
      return false;

   binding = layout->binding_translation[set].base_reg[binding];
   nir_intrinsic_set_binding(intrin, binding);

   return true;
}

static void
adjust_to_bindless_cb(struct dxil_spirv_binding_remapping *inout, void *context)
{
   const struct dzn_pipeline_layout *layout = context;
   assert(inout->descriptor_set < layout->set_count);
   uint32_t new_binding = layout->binding_translation[inout->descriptor_set].base_reg[inout->binding];
   switch (layout->binding_translation[inout->descriptor_set].binding_class[inout->binding]) {
   case DZN_PIPELINE_BINDING_DYNAMIC_BUFFER:
      inout->descriptor_set = layout->set_count;
      FALLTHROUGH;
   case DZN_PIPELINE_BINDING_STATIC_SAMPLER:
      if (inout->is_sampler) {
         inout->descriptor_set = ~0;
         break;
      }
      FALLTHROUGH;
   case DZN_PIPELINE_BINDING_NORMAL:
      inout->binding = new_binding;
      break;
   default:
      unreachable("Invalid binding type");
   }
}

static bool
adjust_var_bindings(nir_shader *shader,
                    struct dzn_device *device,
                    const struct dzn_pipeline_layout *layout,
                    uint8_t *bindings_hash)
{
   uint32_t modes = nir_var_image | nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo;
   struct mesa_sha1 bindings_hash_ctx;

   if (bindings_hash)
      _mesa_sha1_init(&bindings_hash_ctx);

   nir_foreach_variable_with_modes(var, shader, modes) {
      if (var->data.mode == nir_var_uniform) {
         const struct glsl_type *type = glsl_without_array(var->type);

         if (!glsl_type_is_sampler(type) && !glsl_type_is_texture(type))
            continue;
      }

      unsigned s = var->data.descriptor_set, b = var->data.binding;

      if (s >= layout->set_count)
         continue;

      assert(b < layout->binding_translation[s].binding_count);
      if (!device->bindless)
         var->data.binding = layout->binding_translation[s].base_reg[b];

      if (bindings_hash) {
         _mesa_sha1_update(&bindings_hash_ctx, &s, sizeof(s));
         _mesa_sha1_update(&bindings_hash_ctx, &b, sizeof(b));
         _mesa_sha1_update(&bindings_hash_ctx, &var->data.binding, sizeof(var->data.binding));
      }
   }

   if (bindings_hash)
      _mesa_sha1_final(&bindings_hash_ctx, bindings_hash);

   if (device->bindless) {
      struct dxil_spirv_nir_lower_bindless_options options = {
         .dynamic_buffer_binding = layout->dynamic_buffer_count ? layout->set_count : ~0,
         .num_descriptor_sets = layout->set_count,
         .callback_context = (void *)layout,
         .remap_binding = adjust_to_bindless_cb
      };
      bool ret = dxil_spirv_nir_lower_bindless(shader, &options);
      /* We skipped remapping variable bindings in the hashing loop, but if there's static
       * samplers still declared, we need to remap those now. */
      nir_foreach_variable_with_modes(var, shader, nir_var_uniform) {
         assert(glsl_type_is_sampler(glsl_without_array(var->type)));
         var->data.binding = layout->binding_translation[var->data.descriptor_set].base_reg[var->data.binding];
      }
      return ret;
   } else {
      return nir_shader_intrinsics_pass(shader, adjust_resource_index_binding,
                                          nir_metadata_all, (void *)layout);
   }
}

enum dxil_shader_model
   dzn_get_shader_model(const struct dzn_physical_device *pdev)
{
   static_assert(D3D_SHADER_MODEL_6_0 == 0x60 && SHADER_MODEL_6_0 == 0x60000, "Validating math below");
   static_assert(D3D_SHADER_MODEL_6_8 == 0x68 && SHADER_MODEL_6_8 == 0x60008, "Validating math below");
   return ((pdev->shader_model & 0xf0) << 12) | (pdev->shader_model & 0xf);
}

static VkResult
dzn_pipeline_compile_shader(struct dzn_device *device,
                            nir_shader *nir,
                            uint32_t input_clip_size,
                            D3D12_SHADER_BYTECODE *slot)
{
   struct dzn_instance *instance =
      container_of(device->vk.physical->instance, struct dzn_instance, vk);
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);
   struct nir_to_dxil_options opts = {
      .environment = DXIL_ENVIRONMENT_VULKAN,
      .lower_int16 = !pdev->options4.Native16BitShaderOpsSupported &&
      /* Don't lower 16-bit types if they can only come from min-precision */
         (device->vk.enabled_extensions.KHR_shader_float16_int8 ||
          device->vk.enabled_features.shaderFloat16 ||
          device->vk.enabled_features.shaderInt16),
      .shader_model_max = dzn_get_shader_model(pdev),
      .input_clip_size = input_clip_size,
#ifdef _WIN32
      .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
#endif
   };
   struct blob dxil_blob;
   VkResult result = VK_SUCCESS;

   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
   if (instance->debug_flags & DZN_DEBUG_NIR)
      nir_print_shader(nir, stderr);

   if (nir_to_dxil(nir, &opts, NULL, &dxil_blob)) {
      blob_finish_get_buffer(&dxil_blob, (void **)&slot->pShaderBytecode,
                             (size_t *)&slot->BytecodeLength);
   } else {
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
   }

   if (dxil_blob.allocated)
      blob_finish(&dxil_blob);

   if (result != VK_SUCCESS)
      return result;

#ifdef _WIN32
   char *err;
   bool res = dxil_validate_module(instance->dxil_validator,
                                   (void *)slot->pShaderBytecode,
                                   slot->BytecodeLength, &err);

   if (instance->debug_flags & DZN_DEBUG_DXIL) {
      char *disasm = dxil_disasm_module(instance->dxil_validator,
                                        (void *)slot->pShaderBytecode,
                                        slot->BytecodeLength);
      if (disasm) {
         fprintf(stderr,
                 "== BEGIN SHADER ============================================\n"
                 "%s\n"
                 "== END SHADER ==============================================\n",
                  disasm);
         ralloc_free(disasm);
      }
   }

   if (!res && !(instance->debug_flags & DZN_DEBUG_EXPERIMENTAL)) {
      if (err) {
         mesa_loge(
               "== VALIDATION ERROR =============================================\n"
               "%s\n"
               "== END ==========================================================\n",
               err);
         ralloc_free(err);
      }
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
   }
#endif

   return VK_SUCCESS;
}

static D3D12_SHADER_BYTECODE *
dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC *stream,
                                 gl_shader_stage in)
{
   switch (in) {
   case MESA_SHADER_VERTEX: {
      d3d12_gfx_pipeline_state_stream_new_desc(stream, VS, D3D12_SHADER_BYTECODE, desc);
      return desc;
   }
   case MESA_SHADER_TESS_CTRL: {
      d3d12_gfx_pipeline_state_stream_new_desc(stream, HS, D3D12_SHADER_BYTECODE, desc);
      return desc;
   }
   case MESA_SHADER_TESS_EVAL: {
      d3d12_gfx_pipeline_state_stream_new_desc(stream, DS, D3D12_SHADER_BYTECODE, desc);
      return desc;
   }
   case MESA_SHADER_GEOMETRY: {
      d3d12_gfx_pipeline_state_stream_new_desc(stream, GS, D3D12_SHADER_BYTECODE, desc);
      return desc;
   }
   case MESA_SHADER_FRAGMENT: {
      d3d12_gfx_pipeline_state_stream_new_desc(stream, PS, D3D12_SHADER_BYTECODE, desc);
      return desc;
   }
   default: unreachable("Unsupported stage");
   }
}

struct dzn_cached_dxil_shader_header {
   gl_shader_stage stage;
   size_t size;
   uint8_t data[0];
};

static VkResult
dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache *cache,
                                      const uint8_t *dxil_hash,
                                      gl_shader_stage *stage,
                                      D3D12_SHADER_BYTECODE *bc)
{
   *stage = MESA_SHADER_NONE;

   if (!cache)
      return VK_SUCCESS;

   struct vk_pipeline_cache_object *cache_obj = NULL;

   cache_obj =
      vk_pipeline_cache_lookup_object(cache, dxil_hash, SHA1_DIGEST_LENGTH,
                                      &dzn_cached_blob_ops,
                                      NULL);
   if (!cache_obj)
      return VK_SUCCESS;

   struct dzn_cached_blob *cached_blob =
      container_of(cache_obj, struct dzn_cached_blob, base);
   VkResult ret = VK_SUCCESS;

   assert(sizeof(struct dzn_cached_dxil_shader_header) <= cached_blob->size);

   const struct dzn_cached_dxil_shader_header *info =
      (struct dzn_cached_dxil_shader_header *)(cached_blob->data);

   assert(sizeof(struct dzn_cached_dxil_shader_header) + info->size <= cached_blob->size);
   assert(info->stage > MESA_SHADER_NONE && info->stage < MESA_VULKAN_SHADER_STAGES);
   assert(info->size > 0);

   void *code = malloc(info->size);
   if (!code) {
      ret = vk_error(cache->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
      goto out;
   }

   memcpy(code, info->data, info->size);

   bc->pShaderBytecode = code;
   bc->BytecodeLength = info->size;
   *stage = info->stage;

out:
   vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
   return ret;
}

static void
dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache *cache,
                                   const uint8_t *dxil_hash,
                                   gl_shader_stage stage,
                                   const D3D12_SHADER_BYTECODE *bc)
{
   size_t size = sizeof(struct dzn_cached_dxil_shader_header) +
                 bc->BytecodeLength;

   struct vk_pipeline_cache_object *cache_obj =
      dzn_cached_blob_create(cache->base.device, dxil_hash, NULL, size);
   if (!cache_obj)
      return;

   struct dzn_cached_blob *cached_blob =
      container_of(cache_obj, struct dzn_cached_blob, base);
   struct dzn_cached_dxil_shader_header *info =
      (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
   info->stage = stage;
   info->size = bc->BytecodeLength;
   memcpy(info->data, bc->pShaderBytecode, bc->BytecodeLength);

   cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
   vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
}

struct dzn_cached_gfx_pipeline_header {
   uint32_t stages : 30;
   uint32_t needs_draw_sysvals : 1;
   uint32_t rast_disabled_from_missing_position : 1;
   uint32_t input_count;
};

static VkResult
dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
                                       struct vk_pipeline_cache *cache,
                                       const uint8_t *pipeline_hash,
                                       bool *cache_hit)
{
   *cache_hit = false;

   if (!cache)
      return VK_SUCCESS;

   struct vk_pipeline_cache_object *cache_obj = NULL;

   cache_obj =
      vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
                                      &dzn_cached_blob_ops,
                                      NULL);
   if (!cache_obj)
      return VK_SUCCESS;

   struct dzn_cached_blob *cached_blob =
      container_of(cache_obj, struct dzn_cached_blob, base);
   D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc =
      &pipeline->templates.stream_desc;

   const struct dzn_cached_gfx_pipeline_header *info =
      (const struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
   size_t offset = ALIGN_POT(sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));

   assert(cached_blob->size >= sizeof(*info));

   if (info->input_count > 0) {
      const D3D12_INPUT_ELEMENT_DESC *inputs =
         (const D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);

      assert(cached_blob->size >= offset + sizeof(*inputs) * info->input_count);

      memcpy(pipeline->templates.inputs, inputs,
             sizeof(*inputs) * info->input_count);
      d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
      desc->pInputElementDescs = pipeline->templates.inputs;
      desc->NumElements = info->input_count;
      offset += sizeof(*inputs) * info->input_count;
   }

   assert(cached_blob->size == offset + util_bitcount(info->stages) * SHA1_DIGEST_LENGTH);

   u_foreach_bit(s, info->stages) {
      uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
      gl_shader_stage stage;

      D3D12_SHADER_BYTECODE *slot =
         dzn_pipeline_get_gfx_shader_slot(stream_desc, s);

      VkResult ret =
         dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, slot);
      if (ret != VK_SUCCESS)
         return ret;

      assert(stage == s);
      offset += SHA1_DIGEST_LENGTH;
   }

   pipeline->rast_disabled_from_missing_position = info->rast_disabled_from_missing_position;
   pipeline->needs_draw_sysvals = info->needs_draw_sysvals;

   *cache_hit = true;

   vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
   return VK_SUCCESS;
}

static void
dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
                                    struct vk_pipeline_cache *cache,
                                    uint32_t vertex_input_count,
                                    const uint8_t *pipeline_hash,
                                    const uint8_t *const *dxil_hashes)
{
   size_t offset =
      ALIGN_POT(sizeof(struct dzn_cached_gfx_pipeline_header), alignof(D3D12_INPUT_ELEMENT_DESC)) +
      (sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_input_count);
   uint32_t stages = 0;

   for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
      if (pipeline->templates.shaders[i].bc) {
         stages |= BITFIELD_BIT(i);
         offset += SHA1_DIGEST_LENGTH;
      }
   }

   struct vk_pipeline_cache_object *cache_obj =
      dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, offset);
   if (!cache_obj)
      return;

   struct dzn_cached_blob *cached_blob =
      container_of(cache_obj, struct dzn_cached_blob, base);

   offset = 0;
   struct dzn_cached_gfx_pipeline_header *info =
      (struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);

   info->input_count = vertex_input_count;
   info->stages = stages;
   info->needs_draw_sysvals = pipeline->needs_draw_sysvals;
   info->rast_disabled_from_missing_position = pipeline->rast_disabled_from_missing_position;

   offset = ALIGN_POT(offset + sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));

   D3D12_INPUT_ELEMENT_DESC *inputs =
      (D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
   memcpy(inputs, pipeline->templates.inputs,
          sizeof(*inputs) * vertex_input_count);
   offset += sizeof(*inputs) * vertex_input_count;

   u_foreach_bit(s, stages) {
      uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;

      memcpy(dxil_hash, dxil_hashes[s], SHA1_DIGEST_LENGTH);
      offset += SHA1_DIGEST_LENGTH;
   }

   cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
   vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
}

static void
dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC *attribs,
                                   enum pipe_format *vi_conversions,
                                   uint8_t *result)
{
   struct mesa_sha1 ctx;

   _mesa_sha1_init(&ctx);
   _mesa_sha1_update(&ctx, attribs, sizeof(*attribs) * MAX_VERTEX_GENERIC_ATTRIBS);
   _mesa_sha1_update(&ctx, vi_conversions, sizeof(*vi_conversions) * MAX_VERTEX_GENERIC_ATTRIBS);
   _mesa_sha1_final(&ctx, result);
}

static VkResult
dzn_graphics_pipeline_compile_shaders(struct dzn_device *device,
                                      struct dzn_graphics_pipeline *pipeline,
                                      struct vk_pipeline_cache *cache,
                                      const struct dzn_pipeline_layout *layout,
                                      D3D12_PIPELINE_STATE_STREAM_DESC *out,
                                      D3D12_INPUT_ELEMENT_DESC *attribs,
                                      enum pipe_format *vi_conversions,
                                      const VkGraphicsPipelineCreateInfo *info)
{
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);
   const VkPipelineViewportStateCreateInfo *vp_info =
      info->pRasterizationState->rasterizerDiscardEnable ?
      NULL : info->pViewportState;
   struct {
      const VkPipelineShaderStageCreateInfo *info;
      uint8_t spirv_hash[SHA1_DIGEST_LENGTH];
      uint8_t dxil_hash[SHA1_DIGEST_LENGTH];
      uint8_t nir_hash[SHA1_DIGEST_LENGTH];
      uint8_t link_hashes[SHA1_DIGEST_LENGTH][2];
   } stages[MESA_VULKAN_SHADER_STAGES] = { 0 };
   const uint8_t *dxil_hashes[MESA_VULKAN_SHADER_STAGES] = { 0 };
   uint8_t attribs_hash[SHA1_DIGEST_LENGTH];
   uint8_t pipeline_hash[SHA1_DIGEST_LENGTH];
   gl_shader_stage last_raster_stage = MESA_SHADER_NONE;
   uint32_t active_stage_mask = 0;
   VkResult ret;

   /* First step: collect stage info in a table indexed by gl_shader_stage
    * so we can iterate over stages in pipeline order or reverse pipeline
    * order.
    */
   for (uint32_t i = 0; i < info->stageCount; i++) {
      gl_shader_stage stage =
         vk_to_mesa_shader_stage(info->pStages[i].stage);

      assert(stage <= MESA_SHADER_FRAGMENT);

      if ((stage == MESA_SHADER_VERTEX ||
           stage == MESA_SHADER_TESS_EVAL ||
           stage == MESA_SHADER_GEOMETRY) &&
          last_raster_stage < stage)
         last_raster_stage = stage;

      if (stage == MESA_SHADER_FRAGMENT &&
          info->pRasterizationState &&
          (info->pRasterizationState->rasterizerDiscardEnable ||
           info->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
         /* Disable rasterization (AKA leave fragment shader NULL) when
          * front+back culling or discard is set.
          */
         continue;
      }

      stages[stage].info = &info->pStages[i];
      active_stage_mask |= BITFIELD_BIT(stage);
   }

   pipeline->use_gs_for_polygon_mode_point =
      info->pRasterizationState &&
      info->pRasterizationState->polygonMode == VK_POLYGON_MODE_POINT &&
      !(active_stage_mask & (1 << MESA_SHADER_GEOMETRY));
   if (pipeline->use_gs_for_polygon_mode_point)
      last_raster_stage = MESA_SHADER_GEOMETRY;

   enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
   uint16_t y_flip_mask = 0, z_flip_mask = 0;
   bool lower_view_index =
      !pipeline->multiview.native_view_instancing &&
      pipeline->multiview.view_mask > 1;

   if (pipeline->vp.dynamic) {
      yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
   } else if (vp_info) {
      for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
         if (vp_info->pViewports[i].height > 0)
            y_flip_mask |= BITFIELD_BIT(i);

         if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
            z_flip_mask |= BITFIELD_BIT(i);
      }

      if (y_flip_mask && z_flip_mask)
         yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
      else if (z_flip_mask)
         yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
      else if (y_flip_mask)
         yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
   }

   bool force_sample_rate_shading =
      !info->pRasterizationState->rasterizerDiscardEnable &&
      info->pMultisampleState &&
      info->pMultisampleState->sampleShadingEnable;

   if (cache) {
      dzn_graphics_pipeline_hash_attribs(attribs, vi_conversions, attribs_hash);

      struct mesa_sha1 pipeline_hash_ctx;

      _mesa_sha1_init(&pipeline_hash_ctx);
      _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
      _mesa_sha1_update(&pipeline_hash_ctx, attribs_hash, sizeof(attribs_hash));
      _mesa_sha1_update(&pipeline_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
      _mesa_sha1_update(&pipeline_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
      _mesa_sha1_update(&pipeline_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
      _mesa_sha1_update(&pipeline_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
      _mesa_sha1_update(&pipeline_hash_ctx, &lower_view_index, sizeof(lower_view_index));
      _mesa_sha1_update(&pipeline_hash_ctx, &pipeline->use_gs_for_polygon_mode_point, sizeof(pipeline->use_gs_for_polygon_mode_point));

      u_foreach_bit(stage, active_stage_mask) {
         const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size =
            (const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *)
            vk_find_struct_const(stages[stage].info->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
         enum gl_subgroup_size subgroup_enum = subgroup_size && subgroup_size->requiredSubgroupSize >= 8 ?
            subgroup_size->requiredSubgroupSize : SUBGROUP_SIZE_FULL_SUBGROUPS;

         vk_pipeline_hash_shader_stage(pipeline->base.flags, stages[stage].info, NULL, stages[stage].spirv_hash);
         _mesa_sha1_update(&pipeline_hash_ctx, &subgroup_enum, sizeof(subgroup_enum));
         _mesa_sha1_update(&pipeline_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
         _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[stage].hash, sizeof(layout->stages[stage].hash));
      }
      _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);

      bool cache_hit;
      ret = dzn_pipeline_cache_lookup_gfx_pipeline(pipeline, cache, pipeline_hash,
                                                   &cache_hit);
      if (ret != VK_SUCCESS)
         return ret;

      if (cache_hit)
         return VK_SUCCESS;
   }

   /* Second step: get NIR shaders for all stages. */
   nir_shader_compiler_options nir_opts;
   unsigned supported_bit_sizes = (pdev->options4.Native16BitShaderOpsSupported ? 16 : 0) | 32 | 64;
   dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
   nir_opts.lower_base_vertex = true;
   u_foreach_bit(stage, active_stage_mask) {
      struct mesa_sha1 nir_hash_ctx;

      if (cache) {
         _mesa_sha1_init(&nir_hash_ctx);
         _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
         if (stage != MESA_SHADER_FRAGMENT) {
            _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
            _mesa_sha1_update(&nir_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
         }
         if (stage == MESA_SHADER_VERTEX)
            _mesa_sha1_update(&nir_hash_ctx, attribs_hash, sizeof(attribs_hash));
         if (stage == last_raster_stage) {
            _mesa_sha1_update(&nir_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
            _mesa_sha1_update(&nir_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
            _mesa_sha1_update(&nir_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
            _mesa_sha1_update(&nir_hash_ctx, &lower_view_index, sizeof(lower_view_index));
         }
         _mesa_sha1_update(&nir_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
         _mesa_sha1_final(&nir_hash_ctx, stages[stage].nir_hash);
      }

      struct dzn_nir_options options = {
         .yz_flip_mode = stage == last_raster_stage ? yz_flip_mode : DXIL_SPIRV_YZ_FLIP_NONE,
         .y_flip_mask = y_flip_mask,
         .z_flip_mask = z_flip_mask,
         .force_sample_rate_shading = stage == MESA_SHADER_FRAGMENT ? force_sample_rate_shading : false,
         .lower_view_index = lower_view_index,
         .lower_view_index_to_rt_layer = stage == last_raster_stage ? lower_view_index : false,
         .vi_conversions = vi_conversions,
         .nir_opts = &nir_opts,
      };

      struct dxil_spirv_metadata metadata = { 0 };
      ret = dzn_pipeline_get_nir_shader(device, layout,
                                        cache, stages[stage].nir_hash,
                                        pipeline->base.flags,
                                        stages[stage].info, stage,
                                        &options, &metadata,
                                        &pipeline->templates.shaders[stage].nir);
      if (ret != VK_SUCCESS)
         return ret;

      if (stage == MESA_SHADER_VERTEX)
         pipeline->needs_draw_sysvals = metadata.needs_draw_sysvals;
   }

   if (pipeline->use_gs_for_polygon_mode_point) {
      /* TODO: Cache; handle TES */
      struct dzn_nir_point_gs_info gs_info = {
         .cull_mode = info->pRasterizationState->cullMode,
         .front_ccw = info->pRasterizationState->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE,
         .depth_bias = info->pRasterizationState->depthBiasEnable,
         .depth_bias_dynamic = pipeline->zsa.dynamic_depth_bias,
         .ds_fmt = pipeline->zsa.ds_fmt,
         .constant_depth_bias = info->pRasterizationState->depthBiasConstantFactor,
         .slope_scaled_depth_bias = info->pRasterizationState->depthBiasSlopeFactor,
         .depth_bias_clamp = info->pRasterizationState->depthBiasClamp,
         .runtime_data_cbv = {
            .register_space = DZN_REGISTER_SPACE_SYSVALS,
            .base_shader_register = 0,
         }
      };
      pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir =
         dzn_nir_polygon_point_mode_gs(pipeline->templates.shaders[MESA_SHADER_VERTEX].nir,
                                       &gs_info);

      struct dxil_spirv_runtime_conf conf = {
         .runtime_data_cbv = {
            .register_space = DZN_REGISTER_SPACE_SYSVALS,
            .base_shader_register = 0,
         },
         .yz_flip = {
            .mode = yz_flip_mode,
            .y_mask = y_flip_mask,
            .z_mask = z_flip_mask,
         },
      };

      bool requires_runtime_data;
      NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_GEOMETRY].nir, dxil_spirv_nir_lower_yz_flip,
                 &conf, &requires_runtime_data);

      active_stage_mask |= (1 << MESA_SHADER_GEOMETRY);
      memcpy(stages[MESA_SHADER_GEOMETRY].spirv_hash, stages[MESA_SHADER_VERTEX].spirv_hash, SHA1_DIGEST_LENGTH);

      if ((active_stage_mask & (1 << MESA_SHADER_FRAGMENT)) &&
          BITSET_TEST(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir->info.system_values_read, SYSTEM_VALUE_FRONT_FACE))
         NIR_PASS_V(pipeline->templates.shaders[MESA_SHADER_FRAGMENT].nir, dxil_nir_forward_front_face);
   }

   /* Third step: link those NIR shaders. We iterate in reverse order
    * so we can eliminate outputs that are never read by the next stage.
    */
   uint32_t link_mask = active_stage_mask;
   while (link_mask != 0) {
      gl_shader_stage stage = util_last_bit(link_mask) - 1;
      link_mask &= ~BITFIELD_BIT(stage);
      gl_shader_stage prev_stage = util_last_bit(link_mask) - 1;

      struct dxil_spirv_runtime_conf conf = {
         .runtime_data_cbv = {
            .register_space = DZN_REGISTER_SPACE_SYSVALS,
            .base_shader_register = 0,
      }};

      assert(pipeline->templates.shaders[stage].nir);
      struct dxil_spirv_metadata metadata = { 0 };
      dxil_spirv_nir_link(pipeline->templates.shaders[stage].nir,
                          prev_stage != MESA_SHADER_NONE ?
                          pipeline->templates.shaders[prev_stage].nir : NULL,
                          &conf, &metadata);

      if (prev_stage != MESA_SHADER_NONE) {
         memcpy(stages[stage].link_hashes[0], stages[prev_stage].spirv_hash, SHA1_DIGEST_LENGTH);
         memcpy(stages[prev_stage].link_hashes[1], stages[stage].spirv_hash, SHA1_DIGEST_LENGTH);
      }
   }

   u_foreach_bit(stage, active_stage_mask) {
      uint8_t bindings_hash[SHA1_DIGEST_LENGTH];

      NIR_PASS_V(pipeline->templates.shaders[stage].nir, adjust_var_bindings, device, layout,
                 cache ? bindings_hash : NULL);

      if (cache) {
         struct mesa_sha1 dxil_hash_ctx;

         _mesa_sha1_init(&dxil_hash_ctx);
         _mesa_sha1_update(&dxil_hash_ctx, stages[stage].nir_hash, sizeof(stages[stage].nir_hash));
         _mesa_sha1_update(&dxil_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
         _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[0], sizeof(stages[stage].link_hashes[0]));
         _mesa_sha1_update(&dxil_hash_ctx, stages[stage].link_hashes[1], sizeof(stages[stage].link_hashes[1]));
         _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
         _mesa_sha1_final(&dxil_hash_ctx, stages[stage].dxil_hash);
         dxil_hashes[stage] = stages[stage].dxil_hash;

         gl_shader_stage cached_stage;
         D3D12_SHADER_BYTECODE bc;
         ret = dzn_pipeline_cache_lookup_dxil_shader(cache, stages[stage].dxil_hash, &cached_stage, &bc);
         if (ret != VK_SUCCESS)
            return ret;

         if (cached_stage != MESA_SHADER_NONE) {
            assert(cached_stage == stage);
            D3D12_SHADER_BYTECODE *slot =
               dzn_pipeline_get_gfx_shader_slot(out, stage);
            *slot = bc;
            pipeline->templates.shaders[stage].bc = slot;
         }
      }
   }

   uint32_t vert_input_count = 0;
   if (pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
      /* Now, declare one D3D12_INPUT_ELEMENT_DESC per VS input variable, so
       * we can handle location overlaps properly.
       */
      nir_foreach_shader_in_variable(var, pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
         assert(var->data.location >= VERT_ATTRIB_GENERIC0);
         unsigned loc = var->data.location - VERT_ATTRIB_GENERIC0;
         assert(vert_input_count < D3D12_VS_INPUT_REGISTER_COUNT);
         assert(loc < MAX_VERTEX_GENERIC_ATTRIBS);

         pipeline->templates.inputs[vert_input_count] = attribs[loc];
         pipeline->templates.inputs[vert_input_count].SemanticIndex = vert_input_count;
         var->data.driver_location = vert_input_count++;
      }

      if (vert_input_count > 0) {
         d3d12_gfx_pipeline_state_stream_new_desc(out, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
         desc->pInputElementDescs = pipeline->templates.inputs;
         desc->NumElements = vert_input_count;
      }
   }

   /* Last step: translate NIR shaders into DXIL modules */
   u_foreach_bit(stage, active_stage_mask) {
      gl_shader_stage prev_stage =
         util_last_bit(active_stage_mask & BITFIELD_MASK(stage)) - 1;
      uint32_t prev_stage_output_clip_size = 0;
      if (stage == MESA_SHADER_FRAGMENT) {
         /* Disable rasterization if the last geometry stage doesn't
          * write the position.
          */
         if (prev_stage == MESA_SHADER_NONE ||
             !(pipeline->templates.shaders[prev_stage].nir->info.outputs_written & VARYING_BIT_POS)) {
            pipeline->rast_disabled_from_missing_position = true;
            /* Clear a cache hit if there was one. */
            pipeline->templates.shaders[stage].bc = NULL;
            continue;
         }
      } else if (prev_stage != MESA_SHADER_NONE) {
         prev_stage_output_clip_size = pipeline->templates.shaders[prev_stage].nir->info.clip_distance_array_size;
      }

      /* Cache hit, we can skip the compilation. */
      if (pipeline->templates.shaders[stage].bc)
         continue;

      D3D12_SHADER_BYTECODE *slot =
         dzn_pipeline_get_gfx_shader_slot(out, stage);

      ret = dzn_pipeline_compile_shader(device, pipeline->templates.shaders[stage].nir, prev_stage_output_clip_size, slot);
      if (ret != VK_SUCCESS)
         return ret;

      pipeline->templates.shaders[stage].bc = slot;

      if (cache)
         dzn_pipeline_cache_add_dxil_shader(cache, stages[stage].dxil_hash, stage, slot);
   }

   if (cache)
      dzn_pipeline_cache_add_gfx_pipeline(pipeline, cache, vert_input_count, pipeline_hash,
                                          dxil_hashes);

   return VK_SUCCESS;
}

VkFormat
dzn_graphics_pipeline_patch_vi_format(VkFormat format)
{
   switch (format) {
   case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
   case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
   case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
   case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
   case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
   case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
   case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
      return VK_FORMAT_R32_UINT;
   case VK_FORMAT_R8G8B8A8_SSCALED:
      return VK_FORMAT_R8G8B8A8_SINT;
   case VK_FORMAT_R8G8B8A8_USCALED:
      return VK_FORMAT_R8G8B8A8_UINT;
   case VK_FORMAT_R16G16B16A16_USCALED:
      return VK_FORMAT_R16G16B16A16_UINT;
   case VK_FORMAT_R16G16B16A16_SSCALED:
      return VK_FORMAT_R16G16B16A16_SINT;
   default:
      return format;
   }
}

static VkResult
dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline,
                                   const VkGraphicsPipelineCreateInfo *in,
                                   D3D12_INPUT_ELEMENT_DESC *inputs,
                                   enum pipe_format *vi_conversions)
{
   const VkPipelineVertexInputStateCreateInfo *in_vi =
      in->pVertexInputState;
   const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisors =
      (const VkPipelineVertexInputDivisorStateCreateInfoEXT *)
      vk_find_struct_const(in_vi, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);

   if (!in_vi->vertexAttributeDescriptionCount)
      return VK_SUCCESS;

   D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];

   pipeline->vb.count = 0;
   for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
      const struct VkVertexInputBindingDescription *bdesc =
         &in_vi->pVertexBindingDescriptions[i];

      pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
      pipeline->vb.strides[bdesc->binding] = bdesc->stride;
      if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
         slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
      } else {
         assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
         slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
      }
   }

   for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
      const VkVertexInputAttributeDescription *attr =
         &in_vi->pVertexAttributeDescriptions[i];
      const VkVertexInputBindingDivisorDescriptionEXT *divisor = NULL;

      if (slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA &&
          divisors) {
         for (uint32_t d = 0; d < divisors->vertexBindingDivisorCount; d++) {
            if (attr->binding == divisors->pVertexBindingDivisors[d].binding) {
               divisor = &divisors->pVertexBindingDivisors[d];
               break;
            }
         }
      }

      VkFormat patched_format = dzn_graphics_pipeline_patch_vi_format(attr->format);
      if (patched_format != attr->format)
         vi_conversions[attr->location] = vk_format_to_pipe_format(attr->format);

      /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
      inputs[attr->location] = (D3D12_INPUT_ELEMENT_DESC) {
         .SemanticName = "TEXCOORD",
         .Format = dzn_buffer_get_dxgi_format(patched_format),
         .InputSlot = attr->binding,
         .InputSlotClass = slot_class[attr->binding],
         .InstanceDataStepRate =
            divisor ? divisor->divisor :
            slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0,
         .AlignedByteOffset = attr->offset,
      };
   }

   return VK_SUCCESS;
}

static D3D12_PRIMITIVE_TOPOLOGY_TYPE
to_prim_topology_type(VkPrimitiveTopology in)
{
   switch (in) {
   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
   default: unreachable("Invalid primitive topology");
   }
}

static D3D12_PRIMITIVE_TOPOLOGY
to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points, bool support_triangle_fan)
{
   switch (in) {
   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
   /* Triangle fans are emulated using an intermediate index buffer. */
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return support_triangle_fan ?
      D3D_PRIMITIVE_TOPOLOGY_TRIANGLEFAN : D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
      assert(patch_control_points);
      return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
   default: unreachable("Invalid primitive topology");
   }
}

static VkResult
dzn_graphics_pipeline_translate_ia(struct dzn_device *device,
                                   struct dzn_graphics_pipeline *pipeline,
                                   D3D12_PIPELINE_STATE_STREAM_DESC *out,
                                   const VkGraphicsPipelineCreateInfo *in)
{
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);
   const VkPipelineInputAssemblyStateCreateInfo *in_ia =
      in->pInputAssemblyState;
   bool has_tes = false;
   for (uint32_t i = 0; i < in->stageCount; i++) {
      if (in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
          in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
         has_tes = true;
         break;
      }
   }
   const VkPipelineTessellationStateCreateInfo *in_tes =
      has_tes ? in->pTessellationState : NULL;
   VkResult ret = VK_SUCCESS;

   d3d12_gfx_pipeline_state_stream_new_desc(out, PRIMITIVE_TOPOLOGY, D3D12_PRIMITIVE_TOPOLOGY_TYPE, prim_top_type);
   *prim_top_type = to_prim_topology_type(in_ia->topology);
   pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN && !pdev->options15.TriangleFanSupported;
   pipeline->ia.topology =
      to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0,
                       pdev->options15.TriangleFanSupported);

   if (in_ia->primitiveRestartEnable) {
      d3d12_gfx_pipeline_state_stream_new_desc(out, IB_STRIP_CUT_VALUE, D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, ib_strip_cut);
      pipeline->templates.desc_offsets.ib_strip_cut =
         (uintptr_t)ib_strip_cut - (uintptr_t)out->pPipelineStateSubobjectStream;
      *ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
      ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
   }

   return ret;
}

static D3D12_FILL_MODE
translate_polygon_mode(VkPolygonMode in)
{
   switch (in) {
   case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
   case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
   case VK_POLYGON_MODE_POINT:
      /* This is handled elsewhere */
      return D3D12_FILL_MODE_SOLID;
   default: unreachable("Unsupported polygon mode");
   }
}

static D3D12_CULL_MODE
translate_cull_mode(VkCullModeFlags in)
{
   switch (in) {
   case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
   case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
   case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
   /* Front+back face culling is equivalent to 'rasterization disabled' */
   case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
   default: unreachable("Unsupported cull mode");
   }
}

static int32_t
translate_depth_bias(double depth_bias)
{
   if (depth_bias > INT32_MAX)
      return INT32_MAX;
   else if (depth_bias < INT32_MIN)
      return INT32_MIN;

   return depth_bias;
}

static void
dzn_graphics_pipeline_translate_rast(struct dzn_device *device,
                                     struct dzn_graphics_pipeline *pipeline,
                                     D3D12_PIPELINE_STATE_STREAM_DESC *out,
                                     const VkGraphicsPipelineCreateInfo *in)
{
   struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
   const VkPipelineRasterizationStateCreateInfo *in_rast =
      in->pRasterizationState;
   const VkPipelineViewportStateCreateInfo *in_vp =
      in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState;
   const VkPipelineMultisampleStateCreateInfo *in_ms =
      in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;

   if (in_vp) {
      pipeline->vp.count = in_vp->viewportCount;
      if (in_vp->pViewports) {
         for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
            dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
      }

      pipeline->scissor.count = in_vp->scissorCount;
      if (in_vp->pScissors) {
         for (uint32_t i = 0; i < in_vp->scissorCount; i++)
            dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
      }
   }

   if (pdev->options19.NarrowQuadrilateralLinesSupported) {
      assert(pdev->options16.DynamicDepthBiasSupported);
      d3d12_gfx_pipeline_state_stream_new_desc(out, RASTERIZER2, D3D12_RASTERIZER_DESC2, desc);
      pipeline->templates.desc_offsets.rast =
         (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
      desc->DepthClipEnable = !in_rast->depthClampEnable;
      desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
      desc->CullMode = translate_cull_mode(in_rast->cullMode);
      desc->FrontCounterClockwise =
         in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
      if (in_rast->depthBiasEnable) {
         desc->DepthBias = in_rast->depthBiasConstantFactor;
         desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
         desc->DepthBiasClamp = in_rast->depthBiasClamp;
      }
      desc->LineRasterizationMode = D3D12_LINE_RASTERIZATION_MODE_QUADRILATERAL_NARROW;
   } else {
      static_assert(sizeof(D3D12_RASTERIZER_DESC) == sizeof(D3D12_RASTERIZER_DESC1), "Casting between these");
      D3D12_PIPELINE_STATE_SUBOBJECT_TYPE rast_type = pdev->options16.DynamicDepthBiasSupported ?
         D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1 :
         D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER;
      d3d12_pipeline_state_stream_new_desc(out, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, rast_type, D3D12_RASTERIZER_DESC, desc);
      pipeline->templates.desc_offsets.rast =
         (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
      desc->DepthClipEnable = !in_rast->depthClampEnable;
      desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
      desc->CullMode = translate_cull_mode(in_rast->cullMode);
      desc->FrontCounterClockwise =
         in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
      if (in_rast->depthBiasEnable) {
         if (rast_type == D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER1)
            ((D3D12_RASTERIZER_DESC1 *)desc)->DepthBias = in_rast->depthBiasConstantFactor;
         else
            desc->DepthBias = translate_depth_bias(in_rast->depthBiasConstantFactor);
         desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
         desc->DepthBiasClamp = in_rast->depthBiasClamp;
      }

      /* The Vulkan conformance tests use different reference rasterizers for single-sampled
       * and multi-sampled lines. The single-sampled lines can be bresenham lines, but multi-
       * sampled need to be quadrilateral lines. This still isn't *quite* sufficient, because
       * D3D only supports a line width of 1.4 (per spec), but Vulkan requires us to support
       * 1.0 (and without claiming wide lines, that's all we can support).
       */
      if (in_ms && in_ms->rasterizationSamples > 1)
         desc->MultisampleEnable = true;
   }

   assert(in_rast->lineWidth == 1.0f);
}

static void
dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline,
                                   D3D12_PIPELINE_STATE_STREAM_DESC *out,
                                   const VkGraphicsPipelineCreateInfo *in)
{
   const VkPipelineRasterizationStateCreateInfo *in_rast =
      in->pRasterizationState;
   const VkPipelineMultisampleStateCreateInfo *in_ms =
      in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;

   if (!in_ms)
      return;

   /* TODO: minSampleShading (use VRS), alphaToOneEnable */
   d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_DESC, DXGI_SAMPLE_DESC, desc);
   desc->Count = in_ms ? in_ms->rasterizationSamples : 1;
   desc->Quality = 0;

   if (!in_ms->pSampleMask)
      return;

   d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_MASK, UINT, mask);
   *mask = *in_ms->pSampleMask;
}

static D3D12_STENCIL_OP
translate_stencil_op(VkStencilOp in)
{
   switch (in) {
   case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
   case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
   case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
   case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
   case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
   case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
   case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
   case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
   default: unreachable("Invalid stencil op");
   }
}

static void
translate_stencil_test(struct dzn_graphics_pipeline *pipeline,
                       D3D12_DEPTH_STENCIL_DESC2 *out,
                       const VkGraphicsPipelineCreateInfo *in)
{
   const VkPipelineDepthStencilStateCreateInfo *in_zsa =
      in->pDepthStencilState;

   bool front_test_uses_ref =
      !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
      in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
      in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
      (pipeline->zsa.stencil_test.dynamic_compare_mask ||
       in_zsa->front.compareMask != 0);
   bool back_test_uses_ref =
      !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
      in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
      in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
      (pipeline->zsa.stencil_test.dynamic_compare_mask ||
       in_zsa->back.compareMask != 0);

   if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
      pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
   else if (front_test_uses_ref)
      pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
   else
      pipeline->zsa.stencil_test.front.compare_mask = 0;

   if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
      pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
   else if (back_test_uses_ref)
      pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
   else
      pipeline->zsa.stencil_test.back.compare_mask = 0;

   bool back_wr_uses_ref =
      !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
      ((in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
        in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
       (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
        (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
        in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
       (in_zsa->depthTestEnable &&
        in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
        in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE));
   bool front_wr_uses_ref =
      !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
      ((in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
        in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
       (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
        (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
        in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
       (in_zsa->depthTestEnable &&
        in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
        in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE));

   pipeline->zsa.stencil_test.front.write_mask =
      (pipeline->zsa.stencil_test.dynamic_write_mask ||
       (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
      0 : in_zsa->front.writeMask;
   pipeline->zsa.stencil_test.back.write_mask =
      (pipeline->zsa.stencil_test.dynamic_write_mask ||
       (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
      0 : in_zsa->back.writeMask;

   pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
   pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;

   pipeline->zsa.stencil_test.front.ref =
      pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
   pipeline->zsa.stencil_test.back.ref =
      pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;

   out->FrontFace.StencilReadMask = pipeline->zsa.stencil_test.front.compare_mask;
   out->BackFace.StencilReadMask = pipeline->zsa.stencil_test.back.compare_mask;
   out->FrontFace.StencilWriteMask = pipeline->zsa.stencil_test.front.write_mask;
   out->BackFace.StencilWriteMask = pipeline->zsa.stencil_test.back.write_mask;
}

static void
dzn_graphics_pipeline_translate_zsa(struct dzn_device *device,
                                    struct dzn_graphics_pipeline *pipeline,
                                    D3D12_PIPELINE_STATE_STREAM_DESC *out,
                                    const VkGraphicsPipelineCreateInfo *in)
{
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);

   const VkPipelineRasterizationStateCreateInfo *in_rast =
      in->pRasterizationState;
   const VkPipelineDepthStencilStateCreateInfo *in_zsa =
      in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState;
   const VkPipelineRenderingCreateInfo *ri = vk_find_struct_const(in, PIPELINE_RENDERING_CREATE_INFO);

   if (!in_zsa ||
       in_rast->cullMode == VK_CULL_MODE_FRONT_AND_BACK) {
      /* Ensure depth is disabled if the rasterizer should be disabled / everything culled */
      if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
         d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
         pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
         memset(stream_desc, 0, sizeof(*stream_desc));
      } else {
         d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
         pipeline->templates.desc_offsets.ds = (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
         memset(stream_desc, 0, sizeof(*stream_desc));
      }
      return;
   }

   D3D12_DEPTH_STENCIL_DESC2 desc;
   memset(&desc, 0, sizeof(desc));

   bool has_no_depth = ri && ri->depthAttachmentFormat == VK_FORMAT_UNDEFINED;
   bool has_no_stencil = ri && ri->stencilAttachmentFormat == VK_FORMAT_UNDEFINED;

   desc.DepthEnable = !has_no_depth &&
      (in_zsa->depthTestEnable || in_zsa->depthBoundsTestEnable);
   if (desc.DepthEnable) {
      desc.DepthWriteMask =
         in_zsa->depthWriteEnable ?
         D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
      desc.DepthFunc =
         in_zsa->depthTestEnable ?
         dzn_translate_compare_op(in_zsa->depthCompareOp) :
         D3D12_COMPARISON_FUNC_ALWAYS;
   }
   pipeline->zsa.depth_bounds.enable = in_zsa->depthBoundsTestEnable;
   pipeline->zsa.depth_bounds.min = in_zsa->minDepthBounds;
   pipeline->zsa.depth_bounds.max = in_zsa->maxDepthBounds;
   desc.DepthBoundsTestEnable = in_zsa->depthBoundsTestEnable;
   desc.StencilEnable = in_zsa->stencilTestEnable && !has_no_stencil;
   if (desc.StencilEnable) {
      desc.FrontFace.StencilFailOp = translate_stencil_op(in_zsa->front.failOp);
      desc.FrontFace.StencilDepthFailOp = translate_stencil_op(in_zsa->front.depthFailOp);
      desc.FrontFace.StencilPassOp = translate_stencil_op(in_zsa->front.passOp);
      desc.FrontFace.StencilFunc = dzn_translate_compare_op(in_zsa->front.compareOp);
      desc.BackFace.StencilFailOp = translate_stencil_op(in_zsa->back.failOp);
      desc.BackFace.StencilDepthFailOp = translate_stencil_op(in_zsa->back.depthFailOp);
      desc.BackFace.StencilPassOp = translate_stencil_op(in_zsa->back.passOp);
      desc.BackFace.StencilFunc = dzn_translate_compare_op(in_zsa->back.compareOp);

      pipeline->zsa.stencil_test.enable = true;

      translate_stencil_test(pipeline, &desc, in);
   }

   if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
      d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL2, D3D12_DEPTH_STENCIL_DESC2, stream_desc);
      pipeline->templates.desc_offsets.ds =
         (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;
      *stream_desc = desc;
   } else {
      d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, stream_desc);
      pipeline->templates.desc_offsets.ds =
         (uintptr_t)stream_desc - (uintptr_t)out->pPipelineStateSubobjectStream;

      stream_desc->DepthEnable = desc.DepthEnable;
      stream_desc->DepthWriteMask = desc.DepthWriteMask;
      stream_desc->DepthFunc = desc.DepthFunc;
      stream_desc->DepthBoundsTestEnable = desc.DepthBoundsTestEnable;
      stream_desc->StencilEnable = desc.StencilEnable;
      stream_desc->FrontFace.StencilFailOp = desc.FrontFace.StencilFailOp;
      stream_desc->FrontFace.StencilDepthFailOp = desc.FrontFace.StencilDepthFailOp;
      stream_desc->FrontFace.StencilPassOp = desc.FrontFace.StencilPassOp;
      stream_desc->FrontFace.StencilFunc = desc.FrontFace.StencilFunc;
      stream_desc->BackFace.StencilFailOp = desc.BackFace.StencilFailOp;
      stream_desc->BackFace.StencilDepthFailOp = desc.BackFace.StencilDepthFailOp;
      stream_desc->BackFace.StencilPassOp = desc.BackFace.StencilPassOp;
      stream_desc->BackFace.StencilFunc = desc.BackFace.StencilFunc;

      /* No support for independent front/back, just pick front (if set, else back) */
      stream_desc->StencilReadMask = desc.FrontFace.StencilReadMask ? desc.FrontFace.StencilReadMask : desc.BackFace.StencilReadMask;
      stream_desc->StencilWriteMask = desc.FrontFace.StencilWriteMask ? desc.FrontFace.StencilWriteMask : desc.BackFace.StencilWriteMask;
   }
}

static D3D12_BLEND
translate_blend_factor(VkBlendFactor in, bool is_alpha, bool support_alpha_blend_factor)
{
   switch (in) {
   case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
   case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
   case VK_BLEND_FACTOR_SRC_COLOR:
      return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
      return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR;
   case VK_BLEND_FACTOR_DST_COLOR:
      return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR;
   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
      return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR;
   case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
   case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
   case VK_BLEND_FACTOR_CONSTANT_COLOR:
      return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
      return support_alpha_blend_factor ? D3D12_BLEND_ALPHA_FACTOR : D3D12_BLEND_BLEND_FACTOR;
   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
      return is_alpha && support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
      return support_alpha_blend_factor ? D3D12_BLEND_INV_ALPHA_FACTOR : D3D12_BLEND_INV_BLEND_FACTOR;
   case VK_BLEND_FACTOR_SRC1_COLOR:
      return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
      return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR;
   case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
   default: unreachable("Invalid blend factor");
   }
}

static D3D12_BLEND_OP
translate_blend_op(VkBlendOp in)
{
   switch (in) {
   case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
   case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
   case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
   case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
   case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
   default: unreachable("Invalid blend op");
   }
}

static D3D12_LOGIC_OP
translate_logic_op(VkLogicOp in)
{
   switch (in) {
   case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
   case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
   case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
   case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
   case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
   case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
   case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
   case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
   case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
   case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
   case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
   case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
   case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
   case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
   case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
   case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
   default: unreachable("Invalid logic op");
   }
}

static void
dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline,
                                      D3D12_PIPELINE_STATE_STREAM_DESC *out,
                                      const VkGraphicsPipelineCreateInfo *in)
{
   const VkPipelineRasterizationStateCreateInfo *in_rast =
      in->pRasterizationState;
   const VkPipelineColorBlendStateCreateInfo *in_blend =
      in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState;
   const VkPipelineMultisampleStateCreateInfo *in_ms =
      in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;

   if (!in_blend || !in_ms)
      return;

   struct dzn_device *device =
      container_of(pipeline->base.base.device, struct dzn_device, vk);
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);
   bool support_alpha_blend_factor = pdev->options13.AlphaBlendFactorSupported;

   d3d12_gfx_pipeline_state_stream_new_desc(out, BLEND, D3D12_BLEND_DESC, desc);
   D3D12_LOGIC_OP logicop =
      in_blend->logicOpEnable ?
      translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
   desc->AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
   memcpy(pipeline->blend.constants, in_blend->blendConstants,
          sizeof(pipeline->blend.constants));

   for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
      if (i > 0 &&
          memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
                 sizeof(*in_blend->pAttachments)) != 0)
         desc->IndependentBlendEnable = true;

      desc->RenderTarget[i].BlendEnable =
         in_blend->pAttachments[i].blendEnable;
      desc->RenderTarget[i].RenderTargetWriteMask =
         in_blend->pAttachments[i].colorWriteMask;

      if (in_blend->logicOpEnable) {
         desc->RenderTarget[i].LogicOpEnable = true;
         desc->RenderTarget[i].LogicOp = logicop;
      } else {
         desc->RenderTarget[i].SrcBlend =
            translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false, support_alpha_blend_factor);
         desc->RenderTarget[i].DestBlend =
            translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false, support_alpha_blend_factor);
         desc->RenderTarget[i].BlendOp =
            translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
         desc->RenderTarget[i].SrcBlendAlpha =
            translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true, support_alpha_blend_factor);
         desc->RenderTarget[i].DestBlendAlpha =
            translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true, support_alpha_blend_factor);
         desc->RenderTarget[i].BlendOpAlpha =
            translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
      }
   }
}


static void
dzn_pipeline_init(struct dzn_pipeline *pipeline,
                  struct dzn_device *device,
                  VkPipelineBindPoint type,
                  VkPipelineCreateFlags2KHR flags,
                  struct dzn_pipeline_layout *layout,
                  D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc)
{
   pipeline->type = type;
   pipeline->flags = flags;
   pipeline->root.sets_param_count = layout->root.sets_param_count;
   pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
   pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
   pipeline->root.dynamic_buffer_bindless_param_idx = layout->root.dynamic_buffer_bindless_param_idx;
   STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
   memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
   pipeline->root.sig = layout->root.sig;
   ID3D12RootSignature_AddRef(pipeline->root.sig);

   STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
   memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));

   STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
   memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
   pipeline->set_count = layout->set_count;
   pipeline->dynamic_buffer_count = layout->dynamic_buffer_count;
   vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);

   ASSERTED uint32_t max_streamsz =
      type == VK_PIPELINE_BIND_POINT_GRAPHICS ?
      MAX_GFX_PIPELINE_STATE_STREAM_SIZE :
      MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE;

   d3d12_pipeline_state_stream_new_desc_abbrev(stream_desc, max_streamsz, ROOT_SIGNATURE,
                                               ID3D12RootSignature *, root_sig);
   *root_sig = pipeline->root.sig;
}

static void
dzn_pipeline_finish(struct dzn_pipeline *pipeline)
{
   if (pipeline->state)
      ID3D12PipelineState_Release(pipeline->state);
   if (pipeline->root.sig)
      ID3D12RootSignature_Release(pipeline->root.sig);

   vk_object_base_finish(&pipeline->base);
}

static void dzn_graphics_pipeline_delete_variant(struct hash_entry *he)
{
   struct dzn_graphics_pipeline_variant *variant = he->data;

   if (variant->state)
      ID3D12PipelineState_Release(variant->state);
}

static void dzn_graphics_pipeline_delete_cmd_sig(struct hash_entry *he)
{
   ID3D12CommandSignature_Release((ID3D12CommandSignature *)he->data);
}

static void
dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline *pipeline)
{
   for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
      ralloc_free(pipeline->templates.shaders[i].nir);
      pipeline->templates.shaders[i].nir = NULL;
   }
}

static void
dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline *pipeline)
{
   for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
      if (pipeline->templates.shaders[i].bc) {
         free((void *)pipeline->templates.shaders[i].bc->pShaderBytecode);
         pipeline->templates.shaders[i].bc = NULL;
      }
   }
}

static void
dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline,
                              const VkAllocationCallbacks *alloc)
{
   if (!pipeline)
      return;

   _mesa_hash_table_destroy(pipeline->variants,
                            dzn_graphics_pipeline_delete_variant);

   dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
   dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);

   for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
      if (pipeline->indirect_cmd_sigs[i])
         ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]);
   }
   _mesa_hash_table_destroy(pipeline->custom_stride_cmd_sigs,
                            dzn_graphics_pipeline_delete_cmd_sig);

   dzn_pipeline_finish(&pipeline->base);
   vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
}

static VkResult
dzn_graphics_pipeline_create(struct dzn_device *device,
                             VkPipelineCache cache,
                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
                             const VkAllocationCallbacks *pAllocator,
                             VkPipeline *out)
{
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);
   const VkPipelineRenderingCreateInfo *ri = (const VkPipelineRenderingCreateInfo *)
      vk_find_struct_const(pCreateInfo, PIPELINE_RENDERING_CREATE_INFO);
   VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
   VK_FROM_HANDLE(vk_render_pass, pass, pCreateInfo->renderPass);
   VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
   uint32_t color_count = 0;
   VkFormat color_fmts[MAX_RTS] = { 0 };
   VkFormat zs_fmt = VK_FORMAT_UNDEFINED;
   VkResult ret;
   HRESULT hres = 0;
   D3D12_VIEW_INSTANCE_LOCATION vi_locs[D3D12_MAX_VIEW_INSTANCE_COUNT];

   struct dzn_graphics_pipeline *pipeline =
      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (!pipeline)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = &pipeline->templates.stream_desc;
   stream_desc->pPipelineStateSubobjectStream = pipeline->templates.stream_buf;

   dzn_pipeline_init(&pipeline->base, device,
                     VK_PIPELINE_BIND_POINT_GRAPHICS,
                     vk_graphics_pipeline_create_flags(pCreateInfo),
                     layout, stream_desc);
   D3D12_INPUT_ELEMENT_DESC attribs[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
   enum pipe_format vi_conversions[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };

   ret = dzn_graphics_pipeline_translate_vi(pipeline, pCreateInfo,
                                            attribs, vi_conversions);
   if (ret != VK_SUCCESS)
      goto out;

   d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, FLAGS, D3D12_PIPELINE_STATE_FLAGS, flags);
   *flags = D3D12_PIPELINE_STATE_FLAG_NONE;

   if (pCreateInfo->pDynamicState) {
      for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
         switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
         case VK_DYNAMIC_STATE_VIEWPORT:
            pipeline->vp.dynamic = true;
            break;
         case VK_DYNAMIC_STATE_SCISSOR:
            pipeline->scissor.dynamic = true;
            break;
         case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
            pipeline->zsa.stencil_test.dynamic_ref = true;
            break;
         case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
            pipeline->zsa.stencil_test.dynamic_compare_mask = true;
            ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
            if (ret)
               goto out;
            break;
         case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
            pipeline->zsa.stencil_test.dynamic_write_mask = true;
            ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
            if (ret)
               goto out;
            break;
         case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
            pipeline->blend.dynamic_constants = true;
            break;
         case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
            pipeline->zsa.depth_bounds.dynamic = true;
            break;
         case VK_DYNAMIC_STATE_DEPTH_BIAS:
            pipeline->zsa.dynamic_depth_bias = true;
            if (pdev->options16.DynamicDepthBiasSupported) {
               *flags |= D3D12_PIPELINE_STATE_FLAG_DYNAMIC_DEPTH_BIAS;
            } else {
               ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
               if (ret)
                  goto out;
            }
            break;
         case VK_DYNAMIC_STATE_LINE_WIDTH:
            /* Nothing to do since we just support lineWidth = 1. */
            break;
         default: unreachable("Unsupported dynamic state");
         }
      }
   }

   ret = dzn_graphics_pipeline_translate_ia(device, pipeline, stream_desc, pCreateInfo);
   if (ret)
      goto out;

   dzn_graphics_pipeline_translate_rast(device, pipeline, stream_desc, pCreateInfo);
   dzn_graphics_pipeline_translate_ms(pipeline, stream_desc, pCreateInfo);
   dzn_graphics_pipeline_translate_zsa(device, pipeline, stream_desc, pCreateInfo);
   dzn_graphics_pipeline_translate_blend(pipeline, stream_desc, pCreateInfo);

   unsigned view_mask = 0;
   if (pass) {
      const struct vk_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
      color_count = subpass->color_count;
      for (uint32_t i = 0; i < subpass->color_count; i++) {
         uint32_t idx = subpass->color_attachments[i].attachment;

         if (idx == VK_ATTACHMENT_UNUSED) continue;

         const struct vk_render_pass_attachment *attachment =
            &pass->attachments[idx];

         color_fmts[i] = attachment->format;
      }

      if (subpass->depth_stencil_attachment &&
          subpass->depth_stencil_attachment->attachment != VK_ATTACHMENT_UNUSED) {
         const struct vk_render_pass_attachment *attachment =
            &pass->attachments[subpass->depth_stencil_attachment->attachment];

         zs_fmt = attachment->format;
      }

      view_mask = subpass->view_mask;
   } else if (ri) {
      color_count = ri->colorAttachmentCount;
      memcpy(color_fmts, ri->pColorAttachmentFormats,
             sizeof(color_fmts[0]) * color_count);
      if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
         zs_fmt = ri->depthAttachmentFormat;
      else if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
         zs_fmt = ri->stencilAttachmentFormat;

      view_mask = ri->viewMask;
   }

   if (color_count > 0) {
      d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, RENDER_TARGET_FORMATS, struct D3D12_RT_FORMAT_ARRAY, rts);
      rts->NumRenderTargets = color_count;
      for (uint32_t i = 0; i < color_count; i++) {
         rts->RTFormats[i] =
            dzn_image_get_dxgi_format(pdev, color_fmts[i],
                                      VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
                                      VK_IMAGE_ASPECT_COLOR_BIT);
      }
   }

   if (zs_fmt != VK_FORMAT_UNDEFINED) {
      d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, DEPTH_STENCIL_FORMAT, DXGI_FORMAT, ds_fmt);
      *ds_fmt =
         dzn_image_get_dxgi_format(pdev, zs_fmt,
                                   VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
                                   VK_IMAGE_ASPECT_DEPTH_BIT |
                                   VK_IMAGE_ASPECT_STENCIL_BIT);
      pipeline->zsa.ds_fmt = *ds_fmt;
   }

   pipeline->multiview.view_mask = MAX2(view_mask, 1);
   if (view_mask != 0 && /* Is multiview */
       view_mask != 1 && /* Is non-trivially multiview */
       (view_mask & ~((1 << D3D12_MAX_VIEW_INSTANCE_COUNT) - 1)) == 0 && /* Uses only views 0 thru 3 */
       pdev->options3.ViewInstancingTier > D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED /* Actually supported */) {
      d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, VIEW_INSTANCING, D3D12_VIEW_INSTANCING_DESC, vi);
      vi->pViewInstanceLocations = vi_locs;
      for (uint32_t i = 0; i < D3D12_MAX_VIEW_INSTANCE_COUNT; ++i) {
         vi_locs[i].RenderTargetArrayIndex = i;
         vi_locs[i].ViewportArrayIndex = 0;
         if (view_mask & (1 << i))
            vi->ViewInstanceCount = i + 1;
      }
      vi->Flags = D3D12_VIEW_INSTANCING_FLAG_ENABLE_VIEW_INSTANCE_MASKING;
      pipeline->multiview.native_view_instancing = true;
   }

   ret = dzn_graphics_pipeline_compile_shaders(device, pipeline, pcache,
                                               layout, stream_desc,
                                               attribs, vi_conversions,
                                               pCreateInfo);
   if (ret != VK_SUCCESS)
      goto out;

   /* If we have no position output from a pre-rasterizer stage, we need to make sure that
    * depth is disabled, to fully disable the rasterizer. We can only know this after compiling
    * or loading the shaders.
    */
   if (pipeline->rast_disabled_from_missing_position) {
      if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
         D3D12_DEPTH_STENCIL_DESC2 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
         if (ds)
            ds->DepthEnable = ds->StencilEnable = false;
      } else {
         D3D12_DEPTH_STENCIL_DESC1 *ds = dzn_graphics_pipeline_get_desc(pipeline, pipeline->templates.stream_buf, ds);
         if (ds)
            ds->DepthEnable = ds->StencilEnable = false;
      }
   }

   if (!pipeline->variants) {
      hres = ID3D12Device4_CreatePipelineState(device->dev, stream_desc,
                                               &IID_ID3D12PipelineState,
                                               (void **)&pipeline->base.state);
      if (FAILED(hres)) {
         ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
         goto out;
      }

      dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
   }

   dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
   ret = VK_SUCCESS;

out:
   if (ret != VK_SUCCESS)
      dzn_graphics_pipeline_destroy(pipeline, pAllocator);
   else
      *out = dzn_graphics_pipeline_to_handle(pipeline);

   return ret;
}

static void
mask_key_for_stencil_state(struct dzn_physical_device *pdev,
                           struct dzn_graphics_pipeline *pipeline,
                           const struct dzn_graphics_pipeline_variant_key *key,
                           struct dzn_graphics_pipeline_variant_key *masked_key)
{
   if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
      const D3D12_DEPTH_STENCIL_DESC2 *ds_templ =
         dzn_graphics_pipeline_get_desc_template(pipeline, ds);
      if (ds_templ && ds_templ->StencilEnable) {
         if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
             ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
            masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
         if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
             ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
            masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
         if (pipeline->zsa.stencil_test.dynamic_write_mask) {
            masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
            masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
         }
      }
   } else {
      const D3D12_DEPTH_STENCIL_DESC1 *ds_templ =
         dzn_graphics_pipeline_get_desc_template(pipeline, ds);
      if (ds_templ && ds_templ->StencilEnable) {
         if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
             ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
            masked_key->stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
         if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
             ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
            masked_key->stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
         if (pipeline->zsa.stencil_test.dynamic_write_mask) {
            masked_key->stencil_test.front.write_mask = key->stencil_test.front.write_mask;
            masked_key->stencil_test.back.write_mask = key->stencil_test.back.write_mask;
         }
      }
   }
}

static void
update_stencil_state(struct dzn_physical_device *pdev,
                     struct dzn_graphics_pipeline *pipeline,
                     uintptr_t *stream_buf,
                     const struct dzn_graphics_pipeline_variant_key *masked_key)
{
   if (pdev->options14.IndependentFrontAndBackStencilRefMaskSupported) {
      D3D12_DEPTH_STENCIL_DESC2 *ds =
         dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
      if (ds && ds->StencilEnable) {
         if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
            if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
                  ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
               ds->FrontFace.StencilReadMask = masked_key->stencil_test.front.compare_mask;
            }

            if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
                  ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
               ds->BackFace.StencilReadMask = masked_key->stencil_test.back.compare_mask;
            }
         }

         if (pipeline->zsa.stencil_test.dynamic_write_mask) {
            ds->FrontFace.StencilWriteMask = masked_key->stencil_test.front.write_mask;
            ds->BackFace.StencilWriteMask = masked_key->stencil_test.back.write_mask;
         }
      }
   } else {
      D3D12_DEPTH_STENCIL_DESC1 *ds =
         dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
      if (ds && ds->StencilEnable) {
         if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
            if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
                  ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
               ds->StencilReadMask = masked_key->stencil_test.front.compare_mask;
            }

            if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
                  ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
               ds->StencilReadMask = masked_key->stencil_test.back.compare_mask;
            }

            if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
                  ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS &&
                  ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
                  ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
               assert(masked_key->stencil_test.front.compare_mask == masked_key->stencil_test.back.compare_mask);
         }

         if (pipeline->zsa.stencil_test.dynamic_write_mask) {
            assert(!masked_key->stencil_test.front.write_mask ||
                     !masked_key->stencil_test.back.write_mask ||
                     masked_key->stencil_test.front.write_mask == masked_key->stencil_test.back.write_mask);
            ds->StencilWriteMask =
               masked_key->stencil_test.front.write_mask |
               masked_key->stencil_test.back.write_mask;
         }
      }
   }
}

ID3D12PipelineState *
dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline *pipeline,
                                const struct dzn_graphics_pipeline_variant_key *key)
{
   if (!pipeline->variants)
      return pipeline->base.state;

   struct dzn_device *device =
      container_of(pipeline->base.base.device, struct dzn_device, vk);
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);

   struct dzn_graphics_pipeline_variant_key masked_key = { 0 };

   if (dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
      masked_key.ib_strip_cut = key->ib_strip_cut;

   if (!pdev->options16.DynamicDepthBiasSupported &&
       dzn_graphics_pipeline_get_desc_template(pipeline, rast) &&
       pipeline->zsa.dynamic_depth_bias)
      masked_key.depth_bias = key->depth_bias;

   mask_key_for_stencil_state(pdev, pipeline, key, &masked_key);

   struct hash_entry *he =
      _mesa_hash_table_search(pipeline->variants, &masked_key);

   struct dzn_graphics_pipeline_variant *variant;

   if (!he) {
      variant = rzalloc(pipeline->variants, struct dzn_graphics_pipeline_variant);
      variant->key = masked_key;

      uintptr_t stream_buf[MAX_GFX_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
      D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
         .SizeInBytes = pipeline->templates.stream_desc.SizeInBytes,
         .pPipelineStateSubobjectStream = stream_buf,
      };

      memcpy(stream_buf, pipeline->templates.stream_buf, stream_desc.SizeInBytes);

      D3D12_INDEX_BUFFER_STRIP_CUT_VALUE *ib_strip_cut =
         dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ib_strip_cut);
      if (ib_strip_cut)
         *ib_strip_cut = masked_key.ib_strip_cut;

      D3D12_RASTERIZER_DESC *rast =
         dzn_graphics_pipeline_get_desc(pipeline, stream_buf, rast);
      if (!pdev->options16.DynamicDepthBiasSupported && rast && pipeline->zsa.dynamic_depth_bias) {
         rast->DepthBias = translate_depth_bias(masked_key.depth_bias.constant_factor);
         rast->DepthBiasClamp = masked_key.depth_bias.clamp;
         rast->SlopeScaledDepthBias = masked_key.depth_bias.slope_factor;
      }

      update_stencil_state(pdev, pipeline, stream_buf, &masked_key);

      ASSERTED HRESULT hres = ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
                                                                &IID_ID3D12PipelineState,
                                                                (void**)(&variant->state));
      assert(!FAILED(hres));
      he = _mesa_hash_table_insert(pipeline->variants, &variant->key, variant);
      assert(he);
   } else {
      variant = he->data;
   }

   if (variant->state)
      ID3D12PipelineState_AddRef(variant->state);

   if (pipeline->base.state)
      ID3D12PipelineState_Release(pipeline->base.state);

   pipeline->base.state = variant->state;
   return variant->state;
}

#define DZN_INDIRECT_CMD_SIG_MAX_ARGS 4

ID3D12CommandSignature *
dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline,
                                           struct dzn_indirect_draw_cmd_sig_key key)
{
   assert(key.value < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);

   struct dzn_device *device = container_of(pipeline->base.base.device, struct dzn_device, vk);

   uint32_t cmd_arg_count = 0;
   D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
   uint32_t stride = 0;

   if (key.triangle_fan) {
      assert(key.indexed);
      cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
      };
      stride += sizeof(D3D12_INDEX_BUFFER_VIEW);
   }

   if (key.draw_params) {
      cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
         .Constant = {
            .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
            .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
            .Num32BitValuesToSet = 2,
         },
      };
      stride += sizeof(uint32_t) * 2;
   }

   if (key.draw_id) {
      struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk);
      if (pdev->options21.ExecuteIndirectTier >= D3D12_EXECUTE_INDIRECT_TIER_1_1) {
         cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
            .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INCREMENTING_CONSTANT,
            .IncrementingConstant = {
               .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
               .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
            },
         };
      } else {
         cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC){
            .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
            .Constant = {
               .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
               .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
               .Num32BitValuesToSet = 1,
            },
         };
         stride += sizeof(uint32_t);
      }
   }

   cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
      .Type = key.indexed ?
              D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
              D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
   };
   stride += key.indexed ? sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) :
                           sizeof(D3D12_DRAW_ARGUMENTS);

   assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
   assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
   ID3D12CommandSignature *cmdsig = NULL;

   if (key.custom_stride == 0 || key.custom_stride == stride)
      cmdsig = pipeline->indirect_cmd_sigs[key.value];
   else {
      if (!pipeline->custom_stride_cmd_sigs) {
         pipeline->custom_stride_cmd_sigs =
            _mesa_hash_table_create(NULL, gfx_pipeline_cmd_signature_key_hash, gfx_pipeline_cmd_signature_key_equal);
      }
      struct hash_entry *entry = _mesa_hash_table_search(pipeline->custom_stride_cmd_sigs, &key);
      if (entry)
         cmdsig = entry->data;
   }

   if (cmdsig)
      return cmdsig;

   D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
      .ByteStride = key.custom_stride ? key.custom_stride : stride,
      .NumArgumentDescs = cmd_arg_count,
      .pArgumentDescs = cmd_args,
   };
   /* A root signature should be specified iff root params are changing */
   ID3D12RootSignature *root_sig = key.draw_id || key.draw_params ?
      pipeline->base.root.sig : NULL;
   HRESULT hres =
      ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
                                           root_sig,
                                           &IID_ID3D12CommandSignature,
                                           (void **)&cmdsig);
   if (FAILED(hres))
      return NULL;

   if (key.custom_stride == 0 || key.custom_stride == stride)
      pipeline->indirect_cmd_sigs[key.value] = cmdsig;
   else
      _mesa_hash_table_insert(pipeline->custom_stride_cmd_sigs, &key, cmdsig);
   return cmdsig;
}

VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateGraphicsPipelines(VkDevice dev,
                            VkPipelineCache pipelineCache,
                            uint32_t count,
                            const VkGraphicsPipelineCreateInfo *pCreateInfos,
                            const VkAllocationCallbacks *pAllocator,
                            VkPipeline *pPipelines)
{
   VK_FROM_HANDLE(dzn_device, device, dev);
   VkResult result = VK_SUCCESS;

   unsigned i;
   for (i = 0; i < count; i++) {
      result = dzn_graphics_pipeline_create(device,
                                            pipelineCache,
                                            &pCreateInfos[i],
                                            pAllocator,
                                            &pPipelines[i]);
      if (result != VK_SUCCESS) {
         pPipelines[i] = VK_NULL_HANDLE;

         /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
          * is not obvious what error should be report upon 2 different failures.
          */
         if (result != VK_PIPELINE_COMPILE_REQUIRED)
            break;

         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
            break;
      }
   }

   for (; i < count; i++)
      pPipelines[i] = VK_NULL_HANDLE;

   return result;
}

static void
dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline,
                             const VkAllocationCallbacks *alloc)
{
   if (!pipeline)
      return;

   if (pipeline->indirect_cmd_sig)
      ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig);

   dzn_pipeline_finish(&pipeline->base);
   vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
}

static VkResult
dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache *cache,
                                           uint8_t *pipeline_hash,
                                           D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
                                           D3D12_SHADER_BYTECODE *dxil,
                                           bool *cache_hit)
{
   *cache_hit = false;

   if (!cache)
      return VK_SUCCESS;

   struct vk_pipeline_cache_object *cache_obj = NULL;

   cache_obj =
      vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
                                      &dzn_cached_blob_ops,
                                      NULL);
   if (!cache_obj)
      return VK_SUCCESS;

   struct dzn_cached_blob *cached_blob =
      container_of(cache_obj, struct dzn_cached_blob, base);

   assert(cached_blob->size == SHA1_DIGEST_LENGTH);

   const uint8_t *dxil_hash = cached_blob->data;
   gl_shader_stage stage;

   VkResult ret =
      dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, dxil);

   if (ret != VK_SUCCESS || stage == MESA_SHADER_NONE)
      goto out;

   assert(stage == MESA_SHADER_COMPUTE);

   d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, slot);
   *slot = *dxil;
   *cache_hit = true;

out:
   vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
   return ret;
}

static void
dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache *cache,
                                        uint8_t *pipeline_hash,
                                        uint8_t *dxil_hash)
{
   struct vk_pipeline_cache_object *cache_obj =
      dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, SHA1_DIGEST_LENGTH);
   if (!cache_obj)
      return;

   struct dzn_cached_blob *cached_blob =
      container_of(cache_obj, struct dzn_cached_blob, base);

   memcpy((void *)cached_blob->data, dxil_hash, SHA1_DIGEST_LENGTH);

   cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
   vk_pipeline_cache_object_unref(cache->base.device, cache_obj);
}

static VkResult
dzn_compute_pipeline_compile_shader(struct dzn_device *device,
                                    struct dzn_compute_pipeline *pipeline,
                                    struct vk_pipeline_cache *cache,
                                    const struct dzn_pipeline_layout *layout,
                                    D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
                                    D3D12_SHADER_BYTECODE *shader,
                                    const VkComputePipelineCreateInfo *info)
{
   struct dzn_physical_device *pdev =
      container_of(device->vk.physical, struct dzn_physical_device, vk);
   uint8_t spirv_hash[SHA1_DIGEST_LENGTH], pipeline_hash[SHA1_DIGEST_LENGTH], nir_hash[SHA1_DIGEST_LENGTH];
   VkResult ret = VK_SUCCESS;
   nir_shader *nir = NULL;

   if (cache) {
      struct mesa_sha1 pipeline_hash_ctx;

      _mesa_sha1_init(&pipeline_hash_ctx);
      vk_pipeline_hash_shader_stage(pipeline->base.flags, &info->stage, NULL, spirv_hash);
      _mesa_sha1_update(&pipeline_hash_ctx, &device->bindless, sizeof(device->bindless));
      _mesa_sha1_update(&pipeline_hash_ctx, spirv_hash, sizeof(spirv_hash));
      _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[MESA_SHADER_COMPUTE].hash,
                        sizeof(layout->stages[MESA_SHADER_COMPUTE].hash));
      _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);

      bool cache_hit = false;
      ret = dzn_pipeline_cache_lookup_compute_pipeline(cache, pipeline_hash,
                                                       stream_desc, shader,
                                                       &cache_hit);
      if (ret != VK_SUCCESS || cache_hit)
         goto out;
   }

   if (cache) {
      struct mesa_sha1 nir_hash_ctx;
      _mesa_sha1_init(&nir_hash_ctx);
      _mesa_sha1_update(&nir_hash_ctx, &device->bindless, sizeof(device->bindless));
      _mesa_sha1_update(&nir_hash_ctx, spirv_hash, sizeof(spirv_hash));
      _mesa_sha1_final(&nir_hash_ctx, nir_hash);
   }
   nir_shader_compiler_options nir_opts;
   const unsigned supported_bit_sizes = 16 | 32 | 64;
   dxil_get_nir_compiler_options(&nir_opts, dzn_get_shader_model(pdev), supported_bit_sizes, supported_bit_sizes);
   struct dzn_nir_options options = {
      .nir_opts = &nir_opts,
   };
   struct dxil_spirv_metadata metadata = { 0 };
   ret = dzn_pipeline_get_nir_shader(device, layout, cache, nir_hash,
                                     pipeline->base.flags, &info->stage,
                                     MESA_SHADER_COMPUTE,
                                     &options, &metadata, &nir);
   if (ret != VK_SUCCESS)
      return ret;

   uint8_t bindings_hash[SHA1_DIGEST_LENGTH], dxil_hash[SHA1_DIGEST_LENGTH];

   NIR_PASS_V(nir, adjust_var_bindings, device, layout, cache ? bindings_hash : NULL);

   if (cache) {
      struct mesa_sha1 dxil_hash_ctx;

      _mesa_sha1_init(&dxil_hash_ctx);
      _mesa_sha1_update(&dxil_hash_ctx, nir_hash, sizeof(nir_hash));
      _mesa_sha1_update(&dxil_hash_ctx, spirv_hash, sizeof(spirv_hash));
      _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
      _mesa_sha1_final(&dxil_hash_ctx, dxil_hash);

      gl_shader_stage stage;

      ret = dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, shader);
      if (ret != VK_SUCCESS)
         goto out;

      if (stage != MESA_SHADER_NONE) {
         assert(stage == MESA_SHADER_COMPUTE);
         d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
         *cs = *shader;
         dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
         goto out;
      }
   }

   ret = dzn_pipeline_compile_shader(device, nir, 0, shader);
   if (ret != VK_SUCCESS)
      goto out;

   d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
   *cs = *shader;

   if (cache) {
      dzn_pipeline_cache_add_dxil_shader(cache, dxil_hash, MESA_SHADER_COMPUTE, shader);
      dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
   }

out:
   ralloc_free(nir);
   return ret;
}

static VkResult
dzn_compute_pipeline_create(struct dzn_device *device,
                            VkPipelineCache cache,
                            const VkComputePipelineCreateInfo *pCreateInfo,
                            const VkAllocationCallbacks *pAllocator,
                            VkPipeline *out)
{
   VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
   VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);

   struct dzn_compute_pipeline *pipeline =
      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (!pipeline)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   uintptr_t state_buf[MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
   D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
      .pPipelineStateSubobjectStream = state_buf,
   };

   dzn_pipeline_init(&pipeline->base, device,
                     VK_PIPELINE_BIND_POINT_COMPUTE,
                     vk_compute_pipeline_create_flags(pCreateInfo),
                     layout, &stream_desc);

   D3D12_SHADER_BYTECODE shader = { 0 };
   VkResult ret =
      dzn_compute_pipeline_compile_shader(device, pipeline, pcache, layout,
                                          &stream_desc, &shader, pCreateInfo);
   if (ret != VK_SUCCESS)
      goto out;

   if (FAILED(ID3D12Device4_CreatePipelineState(device->dev, &stream_desc,
                                                &IID_ID3D12PipelineState,
                                                (void **)&pipeline->base.state)))
      ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

out:
   free((void *)shader.pShaderBytecode);
   if (ret != VK_SUCCESS)
      dzn_compute_pipeline_destroy(pipeline, pAllocator);
   else
      *out = dzn_compute_pipeline_to_handle(pipeline);

   return ret;
}

ID3D12CommandSignature *
dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline)
{
   if (pipeline->indirect_cmd_sig)
      return pipeline->indirect_cmd_sig;

   struct dzn_device *device =
      container_of(pipeline->base.base.device, struct dzn_device, vk);

   D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
      {
         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
         .Constant = {
            .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
            .DestOffsetIn32BitValues = 0,
            .Num32BitValuesToSet = 3,
         },
      },
      {
         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
      },
   };

   D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
      .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
      .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
      .pArgumentDescs = indirect_dispatch_args,
   };

   HRESULT hres =
      ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc,
                                           pipeline->base.root.sig,
                                           &IID_ID3D12CommandSignature,
                                           (void **)&pipeline->indirect_cmd_sig);
   if (FAILED(hres))
      return NULL;

   return pipeline->indirect_cmd_sig;
}

VKAPI_ATTR VkResult VKAPI_CALL
dzn_CreateComputePipelines(VkDevice dev,
                           VkPipelineCache pipelineCache,
                           uint32_t count,
                           const VkComputePipelineCreateInfo *pCreateInfos,
                           const VkAllocationCallbacks *pAllocator,
                           VkPipeline *pPipelines)
{
   VK_FROM_HANDLE(dzn_device, device, dev);
   VkResult result = VK_SUCCESS;

   unsigned i;
   for (i = 0; i < count; i++) {
      result = dzn_compute_pipeline_create(device,
                                           pipelineCache,
                                           &pCreateInfos[i],
                                           pAllocator,
                                           &pPipelines[i]);
      if (result != VK_SUCCESS) {
         pPipelines[i] = VK_NULL_HANDLE;

         /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
          * is not obvious what error should be report upon 2 different failures.
          */
         if (result != VK_PIPELINE_COMPILE_REQUIRED)
            break;

         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
            break;
      }
   }

   for (; i < count; i++)
      pPipelines[i] = VK_NULL_HANDLE;

   return result;
}

VKAPI_ATTR void VKAPI_CALL
dzn_DestroyPipeline(VkDevice device,
                    VkPipeline pipeline,
                    const VkAllocationCallbacks *pAllocator)
{
   VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);

   if (!pipe)
      return;

   if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
      struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base);
      dzn_graphics_pipeline_destroy(gfx, pAllocator);
   } else {
      assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
      struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base);
      dzn_compute_pipeline_destroy(compute, pAllocator);
   }
}