/*
 * Copyright © 2022 Imagination Technologies Ltd.
 *
 * based in part on anv driver which is:
 * Copyright © 2015 Intel Corporation
 *
 * based in part on v3dv driver which is:
 * Copyright © 2019 Raspberry Pi
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#include <assert.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <vulkan/vulkan.h>
#include <xf86drm.h>

#include "git_sha1.h"
#include "hwdef/rogue_hw_utils.h"
#include "pvr_bo.h"
#include "pvr_border.h"
#include "pvr_clear.h"
#include "pvr_csb.h"
#include "pvr_csb_enum_helpers.h"
#include "pvr_debug.h"
#include "pvr_device_info.h"
#include "pvr_dump_info.h"
#include "pvr_hardcode.h"
#include "pvr_job_render.h"
#include "pvr_limits.h"
#include "pvr_pds.h"
#include "pvr_private.h"
#include "pvr_robustness.h"
#include "pvr_tex_state.h"
#include "pvr_types.h"
#include "pvr_uscgen.h"
#include "pvr_util.h"
#include "pvr_winsys.h"
#include "rogue/rogue.h"
#include "util/build_id.h"
#include "util/log.h"
#include "util/macros.h"
#include "util/mesa-sha1.h"
#include "util/os_misc.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
#include "vk_alloc.h"
#include "vk_extensions.h"
#include "vk_log.h"
#include "vk_object.h"
#include "vk_physical_device_features.h"
#include "vk_physical_device_properties.h"
#include "vk_sampler.h"
#include "vk_util.h"

#define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
#define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
#define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)

/* After PVR_SECONDARY_DEVICE_THRESHOLD devices per instance are created,
 * devices will have a smaller global free list size, as usually this use-case
 * implies smaller amounts of work spread out. The free list can still grow as
 * required.
 */
#define PVR_SECONDARY_DEVICE_THRESHOLD (4U)
#define PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE (512U * 1024U)

/* The grow threshold is a percentage. This is intended to be 12.5%, but has
 * been rounded up since the percentage is treated as an integer.
 */
#define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U

#if defined(VK_USE_PLATFORM_DISPLAY_KHR)
#   define PVR_USE_WSI_PLATFORM_DISPLAY true
#else
#   define PVR_USE_WSI_PLATFORM_DISPLAY false
#endif

#if PVR_USE_WSI_PLATFORM_DISPLAY
#   define PVR_USE_WSI_PLATFORM true
#else
#   define PVR_USE_WSI_PLATFORM false
#endif

#define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)

/* Amount of padding required for VkBuffers to ensure we don't read beyond
 * a page boundary.
 */
#define PVR_BUFFER_MEMORY_PADDING_SIZE 4

/* Default size in bytes used by pvr_CreateDevice() for setting up the
 * suballoc_general, suballoc_pds and suballoc_usc suballocators.
 *
 * TODO: Investigate if a different default size can improve the overall
 * performance of internal driver allocations.
 */
#define PVR_SUBALLOCATOR_GENERAL_SIZE (128 * 1024)
#define PVR_SUBALLOCATOR_PDS_SIZE (128 * 1024)
#define PVR_SUBALLOCATOR_TRANSFER_SIZE (128 * 1024)
#define PVR_SUBALLOCATOR_USC_SIZE (128 * 1024)
#define PVR_SUBALLOCATOR_VIS_TEST_SIZE (128 * 1024)

struct pvr_drm_device_config {
   struct pvr_drm_device_info {
      const char *name;
      size_t len;
   } render, display;
};

#define DEF_CONFIG(render_, display_)                               \
   {                                                                \
      .render = { .name = render_, .len = sizeof(render_) - 1 },    \
      .display = { .name = display_, .len = sizeof(display_) - 1 }, \
   }

/* This is the list of supported DRM render/display driver configs. */
static const struct pvr_drm_device_config pvr_drm_configs[] = {
   DEF_CONFIG("mediatek,mt8173-gpu", "mediatek-drm"),
   DEF_CONFIG("ti,am62-gpu", "ti,am625-dss"),
};

#undef DEF_CONFIG

static const struct vk_instance_extension_table pvr_instance_extensions = {
   .KHR_display = PVR_USE_WSI_PLATFORM_DISPLAY,
   .KHR_external_fence_capabilities = true,
   .KHR_external_memory_capabilities = true,
   .KHR_external_semaphore_capabilities = true,
   .KHR_get_display_properties2 = PVR_USE_WSI_PLATFORM_DISPLAY,
   .KHR_get_physical_device_properties2 = true,
   .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM,
   .KHR_surface = PVR_USE_WSI_PLATFORM,
#ifndef VK_USE_PLATFORM_WIN32_KHR
   .EXT_headless_surface = PVR_USE_WSI_PLATFORM,
#endif
   .EXT_debug_report = true,
   .EXT_debug_utils = true,
};

static void pvr_physical_device_get_supported_extensions(
   struct vk_device_extension_table *extensions)
{
   *extensions = (struct vk_device_extension_table){
      .KHR_bind_memory2 = true,
      .KHR_copy_commands2 = true,
      /* TODO: enable this extension when the conformance tests get
       * updated to version 1.3.6.0, the current version does not
       * include the imagination driver ID, which will make a dEQP
       * test fail
       */
      .KHR_driver_properties = false,
      .KHR_external_fence = true,
      .KHR_external_fence_fd = true,
      .KHR_external_memory = true,
      .KHR_external_memory_fd = true,
      .KHR_format_feature_flags2 = true,
      .KHR_external_semaphore = PVR_USE_WSI_PLATFORM,
      .KHR_external_semaphore_fd = PVR_USE_WSI_PLATFORM,
      .KHR_get_memory_requirements2 = true,
      .KHR_image_format_list = true,
      .KHR_index_type_uint8 = true,
      .KHR_shader_expect_assume = true,
      .KHR_swapchain = PVR_USE_WSI_PLATFORM,
      .KHR_timeline_semaphore = true,
      .KHR_uniform_buffer_standard_layout = true,
      .EXT_external_memory_dma_buf = true,
      .EXT_host_query_reset = true,
      .EXT_index_type_uint8 = true,
      .EXT_memory_budget = true,
      .EXT_private_data = true,
      .EXT_scalar_block_layout = true,
      .EXT_texel_buffer_alignment = true,
      .EXT_tooling_info = true,
   };
}

static void pvr_physical_device_get_supported_features(
   const struct pvr_device_info *const dev_info,
   struct vk_features *const features)
{
   *features = (struct vk_features){
      /* Vulkan 1.0 */
      .robustBufferAccess = true,
      .fullDrawIndexUint32 = true,
      .imageCubeArray = true,
      .independentBlend = false,
      .geometryShader = false,
      .tessellationShader = false,
      .sampleRateShading = true,
      .dualSrcBlend = false,
      .logicOp = false,
      .multiDrawIndirect = true,
      .drawIndirectFirstInstance = true,
      .depthClamp = true,
      .depthBiasClamp = true,
      .fillModeNonSolid = false,
      .depthBounds = false,
      .wideLines = true,
      .largePoints = true,
      .alphaToOne = false,
      .multiViewport = false,
      .samplerAnisotropy = false,
      .textureCompressionETC2 = true,
      .textureCompressionASTC_LDR = false,
      .textureCompressionBC = false,
      .occlusionQueryPrecise = false,
      .pipelineStatisticsQuery = false,
      .vertexPipelineStoresAndAtomics = true,
      .fragmentStoresAndAtomics = true,
      .shaderTessellationAndGeometryPointSize = false,
      .shaderImageGatherExtended = false,
      .shaderStorageImageExtendedFormats = true,
      .shaderStorageImageMultisample = false,
      .shaderStorageImageReadWithoutFormat = true,
      .shaderStorageImageWriteWithoutFormat = false,
      .shaderUniformBufferArrayDynamicIndexing = true,
      .shaderSampledImageArrayDynamicIndexing = true,
      .shaderStorageBufferArrayDynamicIndexing = true,
      .shaderStorageImageArrayDynamicIndexing = true,
      .shaderClipDistance = false,
      .shaderCullDistance = false,
      .shaderFloat64 = false,
      .shaderInt64 = true,
      .shaderInt16 = true,
      .shaderResourceResidency = false,
      .shaderResourceMinLod = false,
      .sparseBinding = false,
      .sparseResidencyBuffer = false,
      .sparseResidencyImage2D = false,
      .sparseResidencyImage3D = false,
      .sparseResidency2Samples = false,
      .sparseResidency4Samples = false,
      .sparseResidency8Samples = false,
      .sparseResidency16Samples = false,
      .sparseResidencyAliased = false,
      .variableMultisampleRate = false,
      .inheritedQueries = false,

      /* VK_KHR_index_type_uint8 */
      .indexTypeUint8 = true,

      /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
      .timelineSemaphore = true,

      /* Vulkan 1.2 / VK_KHR_uniform_buffer_standard_layout */
      .uniformBufferStandardLayout = true,

      /* Vulkan 1.2 / VK_EXT_host_query_reset */
      .hostQueryReset = true,

      /* Vulkan 1.3 / VK_EXT_private_data */
      .privateData = true,

      /* Vulkan 1.2 / VK_EXT_scalar_block_layout */
      .scalarBlockLayout = true,

      /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
      .texelBufferAlignment = true,

      /* VK_KHR_shader_expect_assume */
      .shaderExpectAssume = true,
   };
}

static bool pvr_physical_device_init_pipeline_cache_uuid(
   const struct pvr_device_info *const dev_info,
   uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])
{
   struct mesa_sha1 sha1_ctx;
   unsigned build_id_len;
   uint8_t sha1[20];
   uint64_t bvnc;

   const struct build_id_note *note =
      build_id_find_nhdr_for_addr(pvr_physical_device_init_pipeline_cache_uuid);
   if (!note) {
      mesa_loge("Failed to find build-id");
      return false;
   }

   build_id_len = build_id_length(note);
   if (build_id_len < 20) {
      mesa_loge("Build-id too short. It needs to be a SHA");
      return false;
   }

   bvnc = pvr_get_packed_bvnc(dev_info);

   _mesa_sha1_init(&sha1_ctx);
   _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
   _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
   _mesa_sha1_final(&sha1_ctx, sha1);
   memcpy(pipeline_cache_uuid_out, sha1, VK_UUID_SIZE);

   return true;
}

struct pvr_descriptor_limits {
   uint32_t max_per_stage_resources;
   uint32_t max_per_stage_samplers;
   uint32_t max_per_stage_uniform_buffers;
   uint32_t max_per_stage_storage_buffers;
   uint32_t max_per_stage_sampled_images;
   uint32_t max_per_stage_storage_images;
   uint32_t max_per_stage_input_attachments;
};

static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(
   const struct pvr_device_info *dev_info,
   const struct pvr_device_runtime_info *dev_runtime_info)
{
   enum pvr_descriptor_cs_level {
      /* clang-format off */
      CS4096, /* 6XT and some XE cores with large CS. */
      CS2560, /* Mid range Rogue XE cores. */
      CS2048, /* Low end Rogue XE cores. */
      CS1536, /* Ultra-low-end 9XEP. */
      CS680,  /* lower limits for older devices. */
      CS408,  /* 7XE. */
      /* clang-format on */
   };

   static const struct pvr_descriptor_limits descriptor_limits[] = {
      [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
      [CS2560] = {  648U, 128U, 128U, 128U, 128U, 128U, 8U, },
      [CS2048] = {  584U, 128U,  96U,  64U, 128U, 128U, 8U, },
      [CS1536] = {  456U,  64U,  96U,  64U, 128U,  64U, 8U, },
      [CS680]  = {  224U,  32U,  64U,  36U,  48U,   8U, 8U, },
      [CS408]  = {  128U,  16U,  40U,  28U,  16U,   8U, 8U, },
   };

   const uint32_t common_size =
      pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
                                                 dev_runtime_info,
                                                 UINT32_MAX,
                                                 1);
   enum pvr_descriptor_cs_level cs_level;

   if (common_size >= 2048) {
      cs_level = CS2048;
   } else if (common_size >= 1526) {
      cs_level = CS1536;
   } else if (common_size >= 680) {
      cs_level = CS680;
   } else if (common_size >= 408) {
      cs_level = CS408;
   } else {
      mesa_loge("This core appears to have a very limited amount of shared "
                "register space and may not meet the Vulkan spec limits.");
      abort();
   }

   return &descriptor_limits[cs_level];
}

static bool pvr_physical_device_get_properties(
   const struct pvr_physical_device *const pdevice,
   struct vk_properties *const properties)
{
   const struct pvr_device_info *const dev_info = &pdevice->dev_info;
   const struct pvr_device_runtime_info *const dev_runtime_info =
      &pdevice->dev_runtime_info;
   const struct pvr_descriptor_limits *descriptor_limits =
      pvr_get_physical_device_descriptor_limits(dev_info, dev_runtime_info);

   /* Default value based on the minimum value found in all existing cores. */
   const uint32_t max_multisample =
      PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);

   /* Default value based on the minimum value found in all existing cores. */
   const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);

   /* Default value based on the minimum value found in all existing cores. */
   const uint32_t uvs_pba_entries =
      PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);

   /* Default value based on the minimum value found in all existing cores. */
   const uint32_t num_user_clip_planes =
      PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);

   const uint32_t sub_pixel_precision =
      PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;

   const uint32_t max_render_size = rogue_get_render_size_max(dev_info);

   const uint32_t max_sample_bits = ((max_multisample << 1) - 1);

   const uint32_t max_user_vertex_components =
      ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;

   /* The workgroup invocations are limited by the case where we have a compute
    * barrier - each slot has a fixed number of invocations, the whole workgroup
    * may need to span multiple slots. As each slot will WAIT at the barrier
    * until the last invocation completes, all have to be schedulable at the
    * same time.
    *
    * Typically all Rogue cores have 16 slots. Some of the smallest cores are
    * reduced to 14.
    *
    * The compute barrier slot exhaustion scenario can be tested with:
    * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
    *    .atomicwrite*guard*comp
    */

   /* Default value based on the minimum value found in all existing cores. */
   const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);

   /* Default value based on the minimum value found in all existing cores. */
   const uint32_t max_instances_per_pds_task =
      PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);

   const uint32_t max_compute_work_group_invocations =
      (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;

   bool ret;

   *properties = (struct vk_properties){
      /* Vulkan 1.0 */
      .apiVersion = PVR_API_VERSION,
      .driverVersion = vk_get_driver_version(),
      .vendorID = VK_VENDOR_ID_IMAGINATION,
      .deviceID = dev_info->ident.device_id,
      .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
      /* deviceName and pipelineCacheUUID are filled below .*/

      .maxImageDimension1D = max_render_size,
      .maxImageDimension2D = max_render_size,
      .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
      .maxImageDimensionCube = max_render_size,
      .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
      .maxTexelBufferElements = 64U * 1024U,
      .maxUniformBufferRange = 128U * 1024U * 1024U,
      .maxStorageBufferRange = 128U * 1024U * 1024U,
      .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
      .maxMemoryAllocationCount = UINT32_MAX,
      .maxSamplerAllocationCount = UINT32_MAX,
      .bufferImageGranularity = 1U,
      .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
      /* Maximum number of descriptor sets that can be bound simultaneously. */
      .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
      .maxPerStageResources = descriptor_limits->max_per_stage_resources,
      .maxPerStageDescriptorSamplers =
         descriptor_limits->max_per_stage_samplers,
      .maxPerStageDescriptorUniformBuffers =
         descriptor_limits->max_per_stage_uniform_buffers,
      .maxPerStageDescriptorStorageBuffers =
         descriptor_limits->max_per_stage_storage_buffers,
      .maxPerStageDescriptorSampledImages =
         descriptor_limits->max_per_stage_sampled_images,
      .maxPerStageDescriptorStorageImages =
         descriptor_limits->max_per_stage_storage_images,
      .maxPerStageDescriptorInputAttachments =
         descriptor_limits->max_per_stage_input_attachments,
      .maxDescriptorSetSamplers = 256U,
      .maxDescriptorSetUniformBuffers = 256U,
      .maxDescriptorSetUniformBuffersDynamic =
         PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
      .maxDescriptorSetStorageBuffers = 256U,
      .maxDescriptorSetStorageBuffersDynamic =
         PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
      .maxDescriptorSetSampledImages = 256U,
      .maxDescriptorSetStorageImages = 256U,
      .maxDescriptorSetInputAttachments = 256U,

      /* Vertex Shader Limits */
      .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
      .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
      .maxVertexInputAttributeOffset = 0xFFFF,
      .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
      .maxVertexOutputComponents = max_user_vertex_components,

      /* Tessellation Limits */
      .maxTessellationGenerationLevel = 0,
      .maxTessellationPatchSize = 0,
      .maxTessellationControlPerVertexInputComponents = 0,
      .maxTessellationControlPerVertexOutputComponents = 0,
      .maxTessellationControlPerPatchOutputComponents = 0,
      .maxTessellationControlTotalOutputComponents = 0,
      .maxTessellationEvaluationInputComponents = 0,
      .maxTessellationEvaluationOutputComponents = 0,

      /* Geometry Shader Limits */
      .maxGeometryShaderInvocations = 0,
      .maxGeometryInputComponents = 0,
      .maxGeometryOutputComponents = 0,
      .maxGeometryOutputVertices = 0,
      .maxGeometryTotalOutputComponents = 0,

      /* Fragment Shader Limits */
      .maxFragmentInputComponents = max_user_vertex_components,
      .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
      .maxFragmentDualSrcAttachments = 0,
      .maxFragmentCombinedOutputResources =
         descriptor_limits->max_per_stage_storage_buffers +
         descriptor_limits->max_per_stage_storage_images +
         PVR_MAX_COLOR_ATTACHMENTS,

      /* Compute Shader Limits */
      .maxComputeSharedMemorySize = 16U * 1024U,
      .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
      .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
      .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
                                   max_compute_work_group_invocations,
                                   64U },

      /* Rasterization Limits */
      .subPixelPrecisionBits = sub_pixel_precision,
      .subTexelPrecisionBits = 8U,
      .mipmapPrecisionBits = 8U,

      .maxDrawIndexedIndexValue = UINT32_MAX,
      .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
      .maxSamplerLodBias = 16.0f,
      .maxSamplerAnisotropy = 1.0f,
      .maxViewports = PVR_MAX_VIEWPORTS,

      .maxViewportDimensions[0] = max_render_size,
      .maxViewportDimensions[1] = max_render_size,
      .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
      .viewportBoundsRange[1] = 2U * max_render_size,

      .viewportSubPixelBits = 0,
      .minMemoryMapAlignment = pdevice->ws->page_size,
      .minTexelBufferOffsetAlignment = 16U,
      .minUniformBufferOffsetAlignment = 4U,
      .minStorageBufferOffsetAlignment = 4U,

      .minTexelOffset = -8,
      .maxTexelOffset = 7U,
      .minTexelGatherOffset = -8,
      .maxTexelGatherOffset = 7,
      .minInterpolationOffset = -0.5,
      .maxInterpolationOffset = 0.5,
      .subPixelInterpolationOffsetBits = 4U,

      .maxFramebufferWidth = max_render_size,
      .maxFramebufferHeight = max_render_size,
      .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,

      .framebufferColorSampleCounts = max_sample_bits,
      .framebufferDepthSampleCounts = max_sample_bits,
      .framebufferStencilSampleCounts = max_sample_bits,
      .framebufferNoAttachmentsSampleCounts = max_sample_bits,
      .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
      .sampledImageColorSampleCounts = max_sample_bits,
      .sampledImageIntegerSampleCounts = max_sample_bits,
      .sampledImageDepthSampleCounts = max_sample_bits,
      .sampledImageStencilSampleCounts = max_sample_bits,
      .storageImageSampleCounts = max_sample_bits,
      .maxSampleMaskWords = 1U,
      .timestampComputeAndGraphics = false,
      .timestampPeriod = 0.0f,
      .maxClipDistances = num_user_clip_planes,
      .maxCullDistances = num_user_clip_planes,
      .maxCombinedClipAndCullDistances = num_user_clip_planes,
      .discreteQueuePriorities = 2U,
      .pointSizeRange[0] = 1.0f,
      .pointSizeRange[1] = 511.0f,
      .pointSizeGranularity = 0.0625f,
      .lineWidthRange[0] = 1.0f / 16.0f,
      .lineWidthRange[1] = 16.0f,
      .lineWidthGranularity = 1.0f / 16.0f,
      .strictLines = false,
      .standardSampleLocations = true,
      .optimalBufferCopyOffsetAlignment = 4U,
      .optimalBufferCopyRowPitchAlignment = 4U,
      .nonCoherentAtomSize = 1U,

      /* Vulkan 1.2 / VK_KHR_driver_properties */
      .driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA,
      .driverName = "Imagination open-source Mesa driver",
      .driverInfo = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1,
      .conformanceVersion = {
         .major = 1,
         .minor = 3,
         .subminor = 4,
         .patch = 1,
      },

      /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
      .maxTimelineSemaphoreValueDifference = UINT64_MAX,

      /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
      .storageTexelBufferOffsetAlignmentBytes = 16,
      .storageTexelBufferOffsetSingleTexelAlignment = true,
      .uniformTexelBufferOffsetAlignmentBytes = 16,
      .uniformTexelBufferOffsetSingleTexelAlignment = false,
   };

   snprintf(properties->deviceName,
            sizeof(properties->deviceName),
            "Imagination PowerVR %s %s",
            dev_info->ident.series_name,
            dev_info->ident.public_name);

   ret = pvr_physical_device_init_pipeline_cache_uuid(
      dev_info,
      properties->pipelineCacheUUID);
   if (!ret)
      return false;

   return true;
}

VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
{
   *pApiVersion = PVR_API_VERSION;
   return VK_SUCCESS;
}

VkResult
pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
                                         uint32_t *pPropertyCount,
                                         VkExtensionProperties *pProperties)
{
   if (pLayerName)
      return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);

   return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
                                                     pPropertyCount,
                                                     pProperties);
}

static void pvr_physical_device_destroy(struct vk_physical_device *vk_pdevice)
{
   struct pvr_physical_device *pdevice =
      container_of(vk_pdevice, struct pvr_physical_device, vk);

   /* Be careful here. The device might not have been initialized. This can
    * happen since initialization is done in vkEnumeratePhysicalDevices() but
    * finish is done in vkDestroyInstance(). Make sure that you check for NULL
    * before freeing or that the freeing functions accept NULL pointers.
    */

   if (pdevice->compiler)
      ralloc_free(pdevice->compiler);

   pvr_wsi_finish(pdevice);

   if (pdevice->ws)
      pvr_winsys_destroy(pdevice->ws);

   vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
   vk_free(&pdevice->vk.instance->alloc, pdevice->display_path);

   vk_physical_device_finish(&pdevice->vk);

   vk_free(&pdevice->vk.instance->alloc, pdevice);
}

void pvr_DestroyInstance(VkInstance _instance,
                         const VkAllocationCallbacks *pAllocator)
{
   PVR_FROM_HANDLE(pvr_instance, instance, _instance);

   if (!instance)
      return;

   VG(VALGRIND_DESTROY_MEMPOOL(instance));

   vk_instance_finish(&instance->vk);
   vk_free(&instance->vk.alloc, instance);
}

static uint64_t pvr_compute_heap_size(void)
{
   /* Query the total ram from the system */
   uint64_t total_ram;
   if (!os_get_total_physical_memory(&total_ram))
      return 0;

   /* We don't want to burn too much ram with the GPU. If the user has 4GiB
    * or less, we use at most half. If they have more than 4GiB, we use 3/4.
    */
   uint64_t available_ram;
   if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
      available_ram = total_ram / 2U;
   else
      available_ram = total_ram * 3U / 4U;

   return available_ram;
}

static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
                                         struct pvr_instance *instance,
                                         drmDevicePtr drm_render_device,
                                         drmDevicePtr drm_display_device)
{
   struct vk_physical_device_dispatch_table dispatch_table;
   struct vk_device_extension_table supported_extensions;
   struct vk_properties supported_properties;
   struct vk_features supported_features;
   struct pvr_winsys *ws;
   char *display_path;
   char *render_path;
   VkResult result;

   if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
      return vk_errorf(instance,
                       VK_ERROR_INCOMPATIBLE_DRIVER,
                       "WARNING: powervr is not a conformant Vulkan "
                       "implementation. Pass "
                       "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
                       "what you're doing.");
   }

   render_path = vk_strdup(&instance->vk.alloc,
                           drm_render_device->nodes[DRM_NODE_RENDER],
                           VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
   if (!render_path) {
      result = VK_ERROR_OUT_OF_HOST_MEMORY;
      goto err_out;
   }

   if (instance->vk.enabled_extensions.KHR_display) {
      display_path = vk_strdup(&instance->vk.alloc,
                               drm_display_device->nodes[DRM_NODE_PRIMARY],
                               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
      if (!display_path) {
         result = VK_ERROR_OUT_OF_HOST_MEMORY;
         goto err_vk_free_render_path;
      }
   } else {
      display_path = NULL;
   }

   result =
      pvr_winsys_create(render_path, display_path, &instance->vk.alloc, &ws);
   if (result != VK_SUCCESS)
      goto err_vk_free_display_path;

   pdevice->instance = instance;
   pdevice->render_path = render_path;
   pdevice->display_path = display_path;
   pdevice->ws = ws;

   result = ws->ops->device_info_init(ws,
                                      &pdevice->dev_info,
                                      &pdevice->dev_runtime_info);
   if (result != VK_SUCCESS)
      goto err_pvr_winsys_destroy;

   pvr_physical_device_get_supported_extensions(&supported_extensions);
   pvr_physical_device_get_supported_features(&pdevice->dev_info,
                                              &supported_features);
   if (!pvr_physical_device_get_properties(pdevice, &supported_properties)) {
      result = vk_errorf(instance,
                         VK_ERROR_INITIALIZATION_FAILED,
                         "Failed to collect physical device properties");
      goto err_pvr_winsys_destroy;
   }

   vk_physical_device_dispatch_table_from_entrypoints(
      &dispatch_table,
      &pvr_physical_device_entrypoints,
      true);

   vk_physical_device_dispatch_table_from_entrypoints(
      &dispatch_table,
      &wsi_physical_device_entrypoints,
      false);

   result = vk_physical_device_init(&pdevice->vk,
                                    &instance->vk,
                                    &supported_extensions,
                                    &supported_features,
                                    &supported_properties,
                                    &dispatch_table);
   if (result != VK_SUCCESS)
      goto err_pvr_winsys_destroy;

   pdevice->vk.supported_sync_types = ws->sync_types;

   /* Setup available memory heaps and types */
   pdevice->memory.memoryHeapCount = 1;
   pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
   pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;

   pdevice->memory.memoryTypeCount = 1;
   pdevice->memory.memoryTypes[0].propertyFlags =
      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
   pdevice->memory.memoryTypes[0].heapIndex = 0;

   result = pvr_wsi_init(pdevice);
   if (result != VK_SUCCESS) {
      vk_error(instance, result);
      goto err_vk_physical_device_finish;
   }

   pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
   if (!pdevice->compiler) {
      result = vk_errorf(instance,
                         VK_ERROR_INITIALIZATION_FAILED,
                         "Failed to initialize Rogue compiler");
      goto err_wsi_finish;
   }

   return VK_SUCCESS;

err_wsi_finish:
   pvr_wsi_finish(pdevice);

err_vk_physical_device_finish:
   vk_physical_device_finish(&pdevice->vk);

err_pvr_winsys_destroy:
   pvr_winsys_destroy(ws);

err_vk_free_display_path:
   vk_free(&instance->vk.alloc, display_path);

err_vk_free_render_path:
   vk_free(&instance->vk.alloc, render_path);

err_out:
   return result;
}

static VkResult pvr_get_drm_devices(void *const obj,
                                    drmDevicePtr *const devices,
                                    const int max_devices,
                                    int *const num_devices_out)
{
   int ret = drmGetDevices2(0, devices, max_devices);
   if (ret < 0) {
      return vk_errorf(obj,
                       VK_ERROR_INITIALIZATION_FAILED,
                       "Failed to enumerate drm devices (errno %d: %s)",
                       -ret,
                       strerror(-ret));
   }

   if (num_devices_out)
      *num_devices_out = ret;

   return VK_SUCCESS;
}

static bool
pvr_drm_device_compatible(const struct pvr_drm_device_info *const info,
                          drmDevice *const drm_dev)
{
   char **const compatible = drm_dev->deviceinfo.platform->compatible;

   for (char **compat = compatible; *compat; compat++) {
      if (strncmp(*compat, info->name, info->len) == 0)
         return true;
   }

   return false;
}

static const struct pvr_drm_device_config *
pvr_drm_device_get_config(drmDevice *const drm_dev)
{
   for (size_t i = 0U; i < ARRAY_SIZE(pvr_drm_configs); i++) {
      if (pvr_drm_device_compatible(&pvr_drm_configs[i].render, drm_dev))
         return &pvr_drm_configs[i];
   }

   return NULL;
}

static void
pvr_physical_device_dump_info(const struct pvr_physical_device *pdevice,
                              char *const *comp_display,
                              char *const *comp_render)
{
   drmVersionPtr version_display, version_render;
   struct pvr_device_dump_info info;

   version_display = drmGetVersion(pdevice->ws->display_fd);
   if (!version_display)
      return;

   version_render = drmGetVersion(pdevice->ws->render_fd);
   if (!version_render) {
      drmFreeVersion(version_display);
      return;
   }

   info.device_info = &pdevice->dev_info;
   info.device_runtime_info = &pdevice->dev_runtime_info;
   info.drm_display.patchlevel = version_display->version_patchlevel;
   info.drm_display.major = version_display->version_major;
   info.drm_display.minor = version_display->version_minor;
   info.drm_display.name = version_display->name;
   info.drm_display.date = version_display->date;
   info.drm_display.comp = comp_display;
   info.drm_render.patchlevel = version_render->version_patchlevel;
   info.drm_render.major = version_render->version_major;
   info.drm_render.minor = version_render->version_minor;
   info.drm_render.name = version_render->name;
   info.drm_render.date = version_render->date;
   info.drm_render.comp = comp_render;

   pvr_dump_physical_device_info(&info);

   drmFreeVersion(version_display);
   drmFreeVersion(version_render);
}

static VkResult
pvr_physical_device_enumerate(struct vk_instance *const vk_instance)
{
   struct pvr_instance *const instance =
      container_of(vk_instance, struct pvr_instance, vk);

   const struct pvr_drm_device_config *config = NULL;

   drmDevicePtr drm_display_device = NULL;
   drmDevicePtr drm_render_device = NULL;
   struct pvr_physical_device *pdevice;
   drmDevicePtr *drm_devices;
   int num_drm_devices = 0;
   VkResult result;

   result = pvr_get_drm_devices(instance, NULL, 0, &num_drm_devices);
   if (result != VK_SUCCESS)
      goto out;

   if (num_drm_devices == 0) {
      result = VK_SUCCESS;
      goto out;
   }

   drm_devices = vk_alloc(&vk_instance->alloc,
                          sizeof(*drm_devices) * num_drm_devices,
                          8,
                          VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
   if (!drm_devices) {
      result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
      goto out;
   }

   result = pvr_get_drm_devices(instance, drm_devices, num_drm_devices, NULL);
   if (result != VK_SUCCESS)
      goto out_free_drm_device_ptrs;

   /* First search for our render node... */
   for (int i = 0; i < num_drm_devices; i++) {
      drmDevice *const drm_dev = drm_devices[i];

      if (drm_dev->bustype != DRM_BUS_PLATFORM)
         continue;

      if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_RENDER)))
         continue;

      config = pvr_drm_device_get_config(drm_dev);
      if (config) {
         drm_render_device = drm_dev;
         break;
      }
   }

   if (!config) {
      result = VK_SUCCESS;
      goto out_free_drm_devices;
   }

   mesa_logd("Found compatible render device '%s'.",
             drm_render_device->nodes[DRM_NODE_RENDER]);

   /* ...then find the compatible display node. */
   for (int i = 0; i < num_drm_devices; i++) {
      drmDevice *const drm_dev = drm_devices[i];

      if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_PRIMARY)))
         continue;

      if (pvr_drm_device_compatible(&config->display, drm_dev)) {
         drm_display_device = drm_dev;
         break;
      }
   }

   if (!drm_display_device) {
      mesa_loge("Render device '%s' has no compatible display device.",
                drm_render_device->nodes[DRM_NODE_RENDER]);
      result = VK_SUCCESS;
      goto out_free_drm_devices;
   }

   mesa_logd("Found compatible display device '%s'.",
             drm_display_device->nodes[DRM_NODE_PRIMARY]);

   pdevice = vk_zalloc(&vk_instance->alloc,
                       sizeof(*pdevice),
                       8,
                       VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
   if (!pdevice) {
      result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
      goto out_free_drm_devices;
   }

   result = pvr_physical_device_init(pdevice,
                                     instance,
                                     drm_render_device,
                                     drm_display_device);
   if (result != VK_SUCCESS) {
      if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
         result = VK_SUCCESS;

      goto err_free_pdevice;
   }

   if (PVR_IS_DEBUG_SET(INFO)) {
      pvr_physical_device_dump_info(
         pdevice,
         drm_display_device->deviceinfo.platform->compatible,
         drm_render_device->deviceinfo.platform->compatible);
   }

   list_add(&pdevice->vk.link, &vk_instance->physical_devices.list);

   result = VK_SUCCESS;
   goto out_free_drm_devices;

err_free_pdevice:
   vk_free(&vk_instance->alloc, pdevice);

out_free_drm_devices:
   drmFreeDevices(drm_devices, num_drm_devices);

out_free_drm_device_ptrs:
   vk_free(&vk_instance->alloc, drm_devices);

out:
   return result;
}

VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
                            const VkAllocationCallbacks *pAllocator,
                            VkInstance *pInstance)
{
   struct vk_instance_dispatch_table dispatch_table;
   struct pvr_instance *instance;
   VkResult result;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);

   if (!pAllocator)
      pAllocator = vk_default_allocator();

   instance = vk_alloc(pAllocator,
                       sizeof(*instance),
                       8,
                       VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
   if (!instance)
      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);

   vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
                                               &pvr_instance_entrypoints,
                                               true);

   vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
                                               &wsi_instance_entrypoints,
                                               false);

   result = vk_instance_init(&instance->vk,
                             &pvr_instance_extensions,
                             &dispatch_table,
                             pCreateInfo,
                             pAllocator);
   if (result != VK_SUCCESS) {
      vk_free(pAllocator, instance);
      return result;
   }

   pvr_process_debug_variable();

   instance->active_device_count = 0;

   instance->vk.physical_devices.enumerate = pvr_physical_device_enumerate;
   instance->vk.physical_devices.destroy = pvr_physical_device_destroy;

   VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));

   *pInstance = pvr_instance_to_handle(instance);

   return VK_SUCCESS;
}

static uint32_t pvr_get_simultaneous_num_allocs(
   const struct pvr_device_info *dev_info,
   ASSERTED const struct pvr_device_runtime_info *dev_runtime_info)
{
   uint32_t min_cluster_per_phantom;

   if (PVR_HAS_FEATURE(dev_info, s8xe))
      return PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);

   assert(dev_runtime_info->num_phantoms == 1);
   min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);

   if (min_cluster_per_phantom >= 4)
      return 1;
   else if (min_cluster_per_phantom == 2)
      return 2;
   else
      return 4;
}

uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
   const struct pvr_device_info *dev_info,
   const struct pvr_device_runtime_info *dev_runtime_info,
   uint32_t fs_common_size,
   uint32_t min_tiles_in_flight)
{
   const uint32_t available_shareds =
      dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
   const uint32_t max_tiles_in_flight =
      PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
   uint32_t num_tile_in_flight;
   uint32_t num_allocs;

   if (fs_common_size == 0)
      return max_tiles_in_flight;

   num_allocs = pvr_get_simultaneous_num_allocs(dev_info, dev_runtime_info);

   if (fs_common_size == UINT32_MAX) {
      uint32_t max_common_size = available_shareds;

      num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);

      if (!PVR_HAS_ERN(dev_info, 38748)) {
         /* Hardware needs space for one extra shared allocation. */
         num_allocs += 1;
      }

      /* Double resource requirements to deal with fragmentation. */
      max_common_size /= num_allocs * 2;
      max_common_size = MIN2(max_common_size, ROGUE_MAX_PIXEL_SHARED_REGISTERS);
      max_common_size =
         ROUND_DOWN_TO(max_common_size,
                       PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));

      return max_common_size;
   }

   num_tile_in_flight = available_shareds / (fs_common_size * 2);

   if (!PVR_HAS_ERN(dev_info, 38748))
      num_tile_in_flight -= 1;

   num_tile_in_flight /= num_allocs;

#if MESA_DEBUG
   /* Validate the above result. */

   assert(num_tile_in_flight >= MIN2(num_tile_in_flight, max_tiles_in_flight));
   num_allocs *= num_tile_in_flight;

   if (!PVR_HAS_ERN(dev_info, 38748)) {
      /* Hardware needs space for one extra shared allocation. */
      num_allocs += 1;
   }

   assert(fs_common_size <= available_shareds / (num_allocs * 2));
#endif

   return MIN2(num_tile_in_flight, max_tiles_in_flight);
}

const static VkQueueFamilyProperties pvr_queue_family_properties = {
   .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
                 VK_QUEUE_TRANSFER_BIT,
   .queueCount = PVR_MAX_QUEUES,
   .timestampValidBits = 0,
   .minImageTransferGranularity = { 1, 1, 1 },
};

static uint64_t pvr_compute_heap_budget(struct pvr_physical_device *pdevice)
{
   const uint64_t heap_size = pdevice->memory.memoryHeaps[0].size;
   const uint64_t heap_used = pdevice->heap_used;
   uint64_t sys_available = 0, heap_available;
   ASSERTED bool has_available_memory =
      os_get_available_system_memory(&sys_available);
   assert(has_available_memory);

   /* Let's not incite the app to starve the system: report at most 90% of
    * available system memory.
    */
   heap_available = sys_available * 9 / 10;
   return MIN2(heap_size, heap_used + heap_available);
}

void pvr_GetPhysicalDeviceQueueFamilyProperties2(
   VkPhysicalDevice physicalDevice,
   uint32_t *pQueueFamilyPropertyCount,
   VkQueueFamilyProperties2 *pQueueFamilyProperties)
{
   VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
                          out,
                          pQueueFamilyProperties,
                          pQueueFamilyPropertyCount);

   vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
      p->queueFamilyProperties = pvr_queue_family_properties;

      vk_foreach_struct (ext, p->pNext) {
         vk_debug_ignored_stype(ext->sType);
      }
   }
}

void pvr_GetPhysicalDeviceMemoryProperties2(
   VkPhysicalDevice physicalDevice,
   VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
{
   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);

   pMemoryProperties->memoryProperties = pdevice->memory;

   vk_foreach_struct (ext, pMemoryProperties->pNext) {
      switch (ext->sType) {
      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
         VkPhysicalDeviceMemoryBudgetPropertiesEXT *pMemoryBudget =
            (VkPhysicalDeviceMemoryBudgetPropertiesEXT *)ext;

         pMemoryBudget->heapBudget[0] = pvr_compute_heap_budget(pdevice);
         pMemoryBudget->heapUsage[0] = pdevice->heap_used;

         for (uint32_t i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
            pMemoryBudget->heapBudget[i] = 0u;
            pMemoryBudget->heapUsage[i] = 0u;
         }
         break;
      }
      default:
         vk_debug_ignored_stype(ext->sType);
         break;
      }
   }
}

PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
                                           const char *pName)
{
   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
   return vk_instance_get_proc_addr(&instance->vk,
                                    &pvr_instance_entrypoints,
                                    pName);
}

/* With version 1+ of the loader interface the ICD should expose
 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
 * apps.
 */
PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
{
   return pvr_GetInstanceProcAddr(instance, pName);
}

VkResult pvr_pds_compute_shader_create_and_upload(
   struct pvr_device *device,
   struct pvr_pds_compute_shader_program *program,
   struct pvr_pds_upload *const pds_upload_out)
{
   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
   size_t staging_buffer_size;
   uint32_t *staging_buffer;
   uint32_t *data_buffer;
   uint32_t *code_buffer;
   VkResult result;

   /* Calculate how much space we'll need for the compute shader PDS program.
    */
   pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);

   /* FIXME: Fix the below inconsistency of code size being in bytes whereas
    * data size being in dwords.
    */
   /* Code size is in bytes, data size in dwords. */
   staging_buffer_size =
      PVR_DW_TO_BYTES(program->data_size) + program->code_size;

   staging_buffer = vk_alloc(&device->vk.alloc,
                             staging_buffer_size,
                             8U,
                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
   if (!staging_buffer)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   data_buffer = staging_buffer;
   code_buffer = pvr_pds_compute_shader(program,
                                        data_buffer,
                                        PDS_GENERATE_DATA_SEGMENT,
                                        dev_info);

   pvr_pds_compute_shader(program,
                          code_buffer,
                          PDS_GENERATE_CODE_SEGMENT,
                          dev_info);

   result = pvr_gpu_upload_pds(device,
                               data_buffer,
                               program->data_size,
                               PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
                               code_buffer,
                               program->code_size / sizeof(uint32_t),
                               PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
                               cache_line_size,
                               pds_upload_out);

   vk_free(&device->vk.alloc, staging_buffer);

   return result;
}

static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
{
   struct pvr_pds_compute_shader_program program;

   pvr_pds_compute_shader_program_init(&program);
   /* Fence kernel. */
   program.fence = true;
   program.clear_pds_barrier = true;

   return pvr_pds_compute_shader_create_and_upload(
      device,
      &program,
      &device->pds_compute_fence_program);
}

static VkResult pvr_device_init_compute_empty_program(struct pvr_device *device)
{
   struct pvr_pds_compute_shader_program program;

   pvr_pds_compute_shader_program_init(&program);
   program.clear_pds_barrier = true;

   return pvr_pds_compute_shader_create_and_upload(
      device,
      &program,
      &device->pds_compute_empty_program);
}

static VkResult pvr_pds_idfwdf_programs_create_and_upload(
   struct pvr_device *device,
   pvr_dev_addr_t usc_addr,
   uint32_t shareds,
   uint32_t temps,
   pvr_dev_addr_t shareds_buffer_addr,
   struct pvr_pds_upload *const upload_out,
   struct pvr_pds_upload *const sw_compute_barrier_upload_out)
{
   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
   struct pvr_pds_vertex_shader_sa_program program = {
      .kick_usc = true,
      .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
   };
   size_t staging_buffer_size;
   uint32_t *staging_buffer;
   VkResult result;

   /* We'll need to DMA the shareds into the USC's Common Store. */
   program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
                                                    program.dma_address,
                                                    0,
                                                    shareds,
                                                    shareds_buffer_addr.addr,
                                                    false,
                                                    dev_info);

   /* DMA temp regs. */
   pvr_pds_setup_doutu(&program.usc_task_control,
                       usc_addr.addr,
                       temps,
                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
                       false);

   pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);

   staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);

   staging_buffer = vk_alloc(&device->vk.alloc,
                             staging_buffer_size,
                             8,
                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
   if (!staging_buffer)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
   pvr_pds_vertex_shader_sa(&program,
                            staging_buffer,
                            PDS_GENERATE_DATA_SEGMENT,
                            dev_info);
   pvr_pds_vertex_shader_sa(&program,
                            &staging_buffer[program.data_size],
                            PDS_GENERATE_CODE_SEGMENT,
                            dev_info);

   /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
    * is bigger so we handle it first (if needed) and realloc() for a smaller
    * size.
    */
   if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
      /* FIXME: Figure out the define for alignment of 16. */
      result = pvr_gpu_upload_pds(device,
                                  &staging_buffer[0],
                                  program.data_size,
                                  16,
                                  &staging_buffer[program.data_size],
                                  program.code_size,
                                  16,
                                  16,
                                  sw_compute_barrier_upload_out);
      if (result != VK_SUCCESS) {
         vk_free(&device->vk.alloc, staging_buffer);
         return result;
      }

      program.clear_pds_barrier = false;

      pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);

      staging_buffer_size =
         PVR_DW_TO_BYTES(program.code_size + program.data_size);

      staging_buffer = vk_realloc(&device->vk.alloc,
                                  staging_buffer,
                                  staging_buffer_size,
                                  8,
                                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
      if (!staging_buffer) {
         pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);

         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
      }

      /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
      pvr_pds_vertex_shader_sa(&program,
                               staging_buffer,
                               PDS_GENERATE_DATA_SEGMENT,
                               dev_info);
      pvr_pds_vertex_shader_sa(&program,
                               &staging_buffer[program.data_size],
                               PDS_GENERATE_CODE_SEGMENT,
                               dev_info);
   } else {
      *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
         .pvr_bo = NULL,
      };
   }

   /* FIXME: Figure out the define for alignment of 16. */
   result = pvr_gpu_upload_pds(device,
                               &staging_buffer[0],
                               program.data_size,
                               16,
                               &staging_buffer[program.data_size],
                               program.code_size,
                               16,
                               16,
                               upload_out);
   if (result != VK_SUCCESS) {
      vk_free(&device->vk.alloc, staging_buffer);
      pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);

      return result;
   }

   vk_free(&device->vk.alloc, staging_buffer);

   return VK_SUCCESS;
}

static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
{
   uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
   uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
   struct util_dynarray usc_program;
   struct pvr_texture_state_info tex_info;
   uint32_t *dword_ptr;
   uint32_t usc_shareds;
   uint32_t usc_temps;
   VkResult result;

   util_dynarray_init(&usc_program, NULL);
   pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
                                    &usc_program,
                                    &usc_shareds,
                                    &usc_temps);

   device->idfwdf_state.usc_shareds = usc_shareds;

   /* FIXME: Figure out the define for alignment of 16. */
   result = pvr_gpu_upload_usc(device,
                               usc_program.data,
                               usc_program.size,
                               16,
                               &device->idfwdf_state.usc);
   util_dynarray_fini(&usc_program);

   if (result != VK_SUCCESS)
      return result;

   /* TODO: Get the store buffer size from the compiler? */
   /* TODO: How was the size derived here? */
   result = pvr_bo_alloc(device,
                         device->heaps.general_heap,
                         4 * sizeof(float) * 4 * 2,
                         4,
                         0,
                         &device->idfwdf_state.store_bo);
   if (result != VK_SUCCESS)
      goto err_free_usc_program;

   result = pvr_bo_alloc(device,
                         device->heaps.general_heap,
                         usc_shareds * ROGUE_REG_SIZE_BYTES,
                         ROGUE_REG_SIZE_BYTES,
                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
                         &device->idfwdf_state.shareds_bo);
   if (result != VK_SUCCESS)
      goto err_free_store_buffer;

   /* Pack state words. */

   pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
      sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
      sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
      sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
      sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
   }

   /* clang-format off */
   pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
   /* clang-format on */

   STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);

   tex_info = (struct pvr_texture_state_info){
      .format = VK_FORMAT_R32G32B32A32_SFLOAT,
      .mem_layout = PVR_MEMLAYOUT_LINEAR,
      .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
      .type = VK_IMAGE_VIEW_TYPE_2D,
      .extent = { .width = 4, .height = 2, .depth = 0 },
      .mip_levels = 1,
      .sample_count = 1,
      .stride = 4,
      .swizzle = { PIPE_SWIZZLE_X,
                   PIPE_SWIZZLE_Y,
                   PIPE_SWIZZLE_Z,
                   PIPE_SWIZZLE_W },
      .addr = device->idfwdf_state.store_bo->vma->dev_addr,
   };

   result = pvr_pack_tex_state(device, &tex_info, image_state);
   if (result != VK_SUCCESS)
      goto err_free_shareds_buffer;

   /* Fill the shareds buffer. */

   dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;

#define HIGH_32(val) ((uint32_t)((val) >> 32U))
#define LOW_32(val) ((uint32_t)(val))

   /* TODO: Should we use compiler info to setup the shareds data instead of
    * assuming there's always 12 and this is how they should be setup?
    */

   dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
   dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);

   /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
   dword_ptr[2] = 0U;
   dword_ptr[3] = 0U;

   dword_ptr[4] = LOW_32(image_state[0]);
   dword_ptr[5] = HIGH_32(image_state[0]);
   dword_ptr[6] = LOW_32(image_state[1]);
   dword_ptr[7] = HIGH_32(image_state[1]);

   dword_ptr[8] = LOW_32(sampler_state[0]);
   dword_ptr[9] = HIGH_32(sampler_state[0]);
   dword_ptr[10] = LOW_32(sampler_state[1]);
   dword_ptr[11] = HIGH_32(sampler_state[1]);
   assert(11 + 1 == usc_shareds);

#undef HIGH_32
#undef LOW_32

   pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
   dword_ptr = NULL;

   /* Generate and upload PDS programs. */
   result = pvr_pds_idfwdf_programs_create_and_upload(
      device,
      device->idfwdf_state.usc->dev_addr,
      usc_shareds,
      usc_temps,
      device->idfwdf_state.shareds_bo->vma->dev_addr,
      &device->idfwdf_state.pds,
      &device->idfwdf_state.sw_compute_barrier_pds);
   if (result != VK_SUCCESS)
      goto err_free_shareds_buffer;

   return VK_SUCCESS;

err_free_shareds_buffer:
   pvr_bo_free(device, device->idfwdf_state.shareds_bo);

err_free_store_buffer:
   pvr_bo_free(device, device->idfwdf_state.store_bo);

err_free_usc_program:
   pvr_bo_suballoc_free(device->idfwdf_state.usc);

   return result;
}

static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
{
   pvr_bo_suballoc_free(device->idfwdf_state.pds.pvr_bo);
   pvr_bo_suballoc_free(device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
   pvr_bo_free(device, device->idfwdf_state.shareds_bo);
   pvr_bo_free(device, device->idfwdf_state.store_bo);
   pvr_bo_suballoc_free(device->idfwdf_state.usc);
}

/* FIXME: We should be calculating the size when we upload the code in
 * pvr_srv_setup_static_pixel_event_program().
 */
static void pvr_device_get_pixel_event_pds_program_data_size(
   const struct pvr_device_info *dev_info,
   uint32_t *const data_size_in_dwords_out)
{
   struct pvr_pds_event_program program = {
      /* No data to DMA, just a DOUTU needed. */
      .num_emit_word_pairs = 0,
   };

   pvr_pds_set_sizes_pixel_event(&program, dev_info);

   *data_size_in_dwords_out = program.data_size;
}

static VkResult pvr_device_init_nop_program(struct pvr_device *device)
{
   const uint32_t cache_line_size =
      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
   struct pvr_pds_kickusc_program program = { 0 };
   struct util_dynarray nop_usc_bin;
   uint32_t staging_buffer_size;
   uint32_t *staging_buffer;
   VkResult result;

   pvr_uscgen_nop(&nop_usc_bin);

   result = pvr_gpu_upload_usc(device,
                               util_dynarray_begin(&nop_usc_bin),
                               nop_usc_bin.size,
                               cache_line_size,
                               &device->nop_program.usc);
   util_dynarray_fini(&nop_usc_bin);
   if (result != VK_SUCCESS)
      return result;

   /* Setup a PDS program that kicks the static USC program. */
   pvr_pds_setup_doutu(&program.usc_task_control,
                       device->nop_program.usc->dev_addr.addr,
                       0U,
                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
                       false);

   pvr_pds_set_sizes_pixel_shader(&program);

   staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);

   staging_buffer = vk_alloc(&device->vk.alloc,
                             staging_buffer_size,
                             8U,
                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
   if (!staging_buffer) {
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
      goto err_free_nop_usc_bo;
   }

   pvr_pds_generate_pixel_shader_program(&program, staging_buffer);

   /* FIXME: Figure out the define for alignment of 16. */
   result = pvr_gpu_upload_pds(device,
                               staging_buffer,
                               program.data_size,
                               16U,
                               &staging_buffer[program.data_size],
                               program.code_size,
                               16U,
                               16U,
                               &device->nop_program.pds);
   if (result != VK_SUCCESS)
      goto err_free_staging_buffer;

   vk_free(&device->vk.alloc, staging_buffer);

   return VK_SUCCESS;

err_free_staging_buffer:
   vk_free(&device->vk.alloc, staging_buffer);

err_free_nop_usc_bo:
   pvr_bo_suballoc_free(device->nop_program.usc);

   return result;
}

static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
{
   simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);

   for (uint32_t i = 0; i < ARRAY_SIZE(device->tile_buffer_state.buffers); i++)
      device->tile_buffer_state.buffers[i] = NULL;

   device->tile_buffer_state.buffer_count = 0;
}

static void pvr_device_finish_tile_buffer_state(struct pvr_device *device)
{
   /* Destroy the mutex first to trigger asserts in case it's still locked so
    * that we don't put things in an inconsistent state by freeing buffers that
    * might be in use or attempt to free buffers while new buffers are being
    * allocated.
    */
   simple_mtx_destroy(&device->tile_buffer_state.mtx);

   for (uint32_t i = 0; i < device->tile_buffer_state.buffer_count; i++)
      pvr_bo_free(device, device->tile_buffer_state.buffers[i]);
}

/**
 * \brief Ensures that a certain amount of tile buffers are allocated.
 *
 * Make sure that \p capacity amount of tile buffers are allocated. If less were
 * present, append new tile buffers of \p size_in_bytes each to reach the quota.
 */
VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
                                           uint32_t capacity,
                                           uint32_t size_in_bytes)
{
   struct pvr_device_tile_buffer_state *tile_buffer_state =
      &device->tile_buffer_state;
   const uint32_t cache_line_size =
      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
   VkResult result;

   simple_mtx_lock(&tile_buffer_state->mtx);

   /* Clamping in release and asserting in debug. */
   assert(capacity <= ARRAY_SIZE(tile_buffer_state->buffers));
   capacity = CLAMP(capacity,
                    tile_buffer_state->buffer_count,
                    ARRAY_SIZE(tile_buffer_state->buffers));

   /* TODO: Implement bo multialloc? To reduce the amount of syscalls and
    * allocations.
    */
   for (uint32_t i = tile_buffer_state->buffer_count; i < capacity; i++) {
      result = pvr_bo_alloc(device,
                            device->heaps.general_heap,
                            size_in_bytes,
                            cache_line_size,
                            0,
                            &tile_buffer_state->buffers[i]);
      if (result != VK_SUCCESS) {
         for (uint32_t j = tile_buffer_state->buffer_count; j < i; j++)
            pvr_bo_free(device, tile_buffer_state->buffers[j]);

         goto err_release_lock;
      }
   }

   tile_buffer_state->buffer_count = capacity;

   simple_mtx_unlock(&tile_buffer_state->mtx);

   return VK_SUCCESS;

err_release_lock:
   simple_mtx_unlock(&tile_buffer_state->mtx);

   return result;
}

static void pvr_device_init_default_sampler_state(struct pvr_device *device)
{
   pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
      sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
      sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
      sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
      sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
      sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
      sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
      sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
      sampler.non_normalized_coords = true;
   }
}

VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
                          const VkDeviceCreateInfo *pCreateInfo,
                          const VkAllocationCallbacks *pAllocator,
                          VkDevice *pDevice)
{
   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
   uint32_t initial_free_list_size = PVR_GLOBAL_FREE_LIST_INITIAL_SIZE;
   struct pvr_instance *instance = pdevice->instance;
   struct vk_device_dispatch_table dispatch_table;
   struct pvr_device *device;
   struct pvr_winsys *ws;
   VkResult result;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);

   result = pvr_winsys_create(pdevice->render_path,
                              pdevice->display_path,
                              pAllocator ? pAllocator : &instance->vk.alloc,
                              &ws);
   if (result != VK_SUCCESS)
      goto err_out;

   device = vk_alloc2(&instance->vk.alloc,
                      pAllocator,
                      sizeof(*device),
                      8,
                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
   if (!device) {
      result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
      goto err_pvr_winsys_destroy;
   }

   vk_device_dispatch_table_from_entrypoints(&dispatch_table,
                                             &pvr_device_entrypoints,
                                             true);

   vk_device_dispatch_table_from_entrypoints(&dispatch_table,
                                             &wsi_device_entrypoints,
                                             false);

   result = vk_device_init(&device->vk,
                           &pdevice->vk,
                           &dispatch_table,
                           pCreateInfo,
                           pAllocator);
   if (result != VK_SUCCESS)
      goto err_free_device;

   device->instance = instance;
   device->pdevice = pdevice;
   device->ws = ws;

   vk_device_set_drm_fd(&device->vk, ws->render_fd);

   if (ws->features.supports_threaded_submit) {
      /* Queue submission can be blocked if the kernel CCBs become full,
       * so enable threaded submit to not block the submitter.
       */
      vk_device_enable_threaded_submit(&device->vk);
   }

   ws->ops->get_heaps_info(ws, &device->heaps);

   result = pvr_bo_store_create(device);
   if (result != VK_SUCCESS)
      goto err_vk_device_finish;

   pvr_bo_suballocator_init(&device->suballoc_general,
                            device->heaps.general_heap,
                            device,
                            PVR_SUBALLOCATOR_GENERAL_SIZE);
   pvr_bo_suballocator_init(&device->suballoc_pds,
                            device->heaps.pds_heap,
                            device,
                            PVR_SUBALLOCATOR_PDS_SIZE);
   pvr_bo_suballocator_init(&device->suballoc_transfer,
                            device->heaps.transfer_frag_heap,
                            device,
                            PVR_SUBALLOCATOR_TRANSFER_SIZE);
   pvr_bo_suballocator_init(&device->suballoc_usc,
                            device->heaps.usc_heap,
                            device,
                            PVR_SUBALLOCATOR_USC_SIZE);
   pvr_bo_suballocator_init(&device->suballoc_vis_test,
                            device->heaps.vis_test_heap,
                            device,
                            PVR_SUBALLOCATOR_VIS_TEST_SIZE);

   if (p_atomic_inc_return(&instance->active_device_count) >
       PVR_SECONDARY_DEVICE_THRESHOLD) {
      initial_free_list_size = PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE;
   }

   result = pvr_free_list_create(device,
                                 initial_free_list_size,
                                 PVR_GLOBAL_FREE_LIST_MAX_SIZE,
                                 PVR_GLOBAL_FREE_LIST_GROW_SIZE,
                                 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
                                 NULL /* parent_free_list */,
                                 &device->global_free_list);
   if (result != VK_SUCCESS)
      goto err_dec_device_count;

   result = pvr_device_init_nop_program(device);
   if (result != VK_SUCCESS)
      goto err_pvr_free_list_destroy;

   result = pvr_device_init_compute_fence_program(device);
   if (result != VK_SUCCESS)
      goto err_pvr_free_nop_program;

   result = pvr_device_init_compute_empty_program(device);
   if (result != VK_SUCCESS)
      goto err_pvr_free_compute_fence;

   result = pvr_device_create_compute_query_programs(device);
   if (result != VK_SUCCESS)
      goto err_pvr_free_compute_empty;

   result = pvr_device_init_compute_idfwdf_state(device);
   if (result != VK_SUCCESS)
      goto err_pvr_destroy_compute_query_programs;

   result = pvr_device_init_graphics_static_clear_state(device);
   if (result != VK_SUCCESS)
      goto err_pvr_finish_compute_idfwdf;

   result = pvr_device_init_spm_load_state(device);
   if (result != VK_SUCCESS)
      goto err_pvr_finish_graphics_static_clear_state;

   pvr_device_init_tile_buffer_state(device);

   result = pvr_queues_create(device, pCreateInfo);
   if (result != VK_SUCCESS)
      goto err_pvr_finish_tile_buffer_state;

   pvr_device_init_default_sampler_state(device);

   pvr_spm_init_scratch_buffer_store(device);

   result = pvr_init_robustness_buffer(device);
   if (result != VK_SUCCESS)
      goto err_pvr_spm_finish_scratch_buffer_store;

   result = pvr_border_color_table_init(&device->border_color_table, device);
   if (result != VK_SUCCESS)
      goto err_pvr_robustness_buffer_finish;

   /* FIXME: Move this to a later stage and possibly somewhere other than
    * pvr_device. The purpose of this is so that we don't have to get the size
    * on each kick.
    */
   pvr_device_get_pixel_event_pds_program_data_size(
      &pdevice->dev_info,
      &device->pixel_event_data_size_in_dwords);

   device->global_cmd_buffer_submit_count = 0;
   device->global_queue_present_count = 0;

   *pDevice = pvr_device_to_handle(device);

   return VK_SUCCESS;

err_pvr_robustness_buffer_finish:
   pvr_robustness_buffer_finish(device);

err_pvr_spm_finish_scratch_buffer_store:
   pvr_spm_finish_scratch_buffer_store(device);

   pvr_queues_destroy(device);

err_pvr_finish_tile_buffer_state:
   pvr_device_finish_tile_buffer_state(device);
   pvr_device_finish_spm_load_state(device);

err_pvr_finish_graphics_static_clear_state:
   pvr_device_finish_graphics_static_clear_state(device);

err_pvr_finish_compute_idfwdf:
   pvr_device_finish_compute_idfwdf_state(device);

err_pvr_destroy_compute_query_programs:
   pvr_device_destroy_compute_query_programs(device);

err_pvr_free_compute_empty:
   pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);

err_pvr_free_compute_fence:
   pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);

err_pvr_free_nop_program:
   pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
   pvr_bo_suballoc_free(device->nop_program.usc);

err_pvr_free_list_destroy:
   pvr_free_list_destroy(device->global_free_list);

err_dec_device_count:
   p_atomic_dec(&device->instance->active_device_count);

   pvr_bo_suballocator_fini(&device->suballoc_vis_test);
   pvr_bo_suballocator_fini(&device->suballoc_usc);
   pvr_bo_suballocator_fini(&device->suballoc_transfer);
   pvr_bo_suballocator_fini(&device->suballoc_pds);
   pvr_bo_suballocator_fini(&device->suballoc_general);

   pvr_bo_store_destroy(device);

err_vk_device_finish:
   vk_device_finish(&device->vk);

err_free_device:
   vk_free(&device->vk.alloc, device);

err_pvr_winsys_destroy:
   pvr_winsys_destroy(ws);

err_out:
   return result;
}

void pvr_DestroyDevice(VkDevice _device,
                       const VkAllocationCallbacks *pAllocator)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);

   if (!device)
      return;

   pvr_border_color_table_finish(&device->border_color_table, device);
   pvr_robustness_buffer_finish(device);
   pvr_spm_finish_scratch_buffer_store(device);
   pvr_queues_destroy(device);
   pvr_device_finish_tile_buffer_state(device);
   pvr_device_finish_spm_load_state(device);
   pvr_device_finish_graphics_static_clear_state(device);
   pvr_device_finish_compute_idfwdf_state(device);
   pvr_device_destroy_compute_query_programs(device);
   pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
   pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
   pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
   pvr_bo_suballoc_free(device->nop_program.usc);
   pvr_free_list_destroy(device->global_free_list);
   pvr_bo_suballocator_fini(&device->suballoc_vis_test);
   pvr_bo_suballocator_fini(&device->suballoc_usc);
   pvr_bo_suballocator_fini(&device->suballoc_transfer);
   pvr_bo_suballocator_fini(&device->suballoc_pds);
   pvr_bo_suballocator_fini(&device->suballoc_general);
   pvr_bo_store_destroy(device);
   pvr_winsys_destroy(device->ws);
   p_atomic_dec(&device->instance->active_device_count);
   vk_device_finish(&device->vk);
   vk_free(&device->vk.alloc, device);
}

VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
                                              VkLayerProperties *pProperties)
{
   if (!pProperties) {
      *pPropertyCount = 0;
      return VK_SUCCESS;
   }

   return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
}

static void free_memory(struct pvr_device *device,
                        struct pvr_device_memory *mem,
                        const VkAllocationCallbacks *pAllocator)
{
   if (!mem)
      return;

   /* From the Vulkan spec (§11.2.13. Freeing Device Memory):
    *   If a memory object is mapped at the time it is freed, it is implicitly
    *   unmapped.
    */
   if (mem->bo->map)
      device->ws->ops->buffer_unmap(mem->bo);

   p_atomic_add(&device->pdevice->heap_used, -mem->bo->size);

   device->ws->ops->buffer_destroy(mem->bo);

   vk_object_free(&device->vk, pAllocator, mem);
}

VkResult pvr_AllocateMemory(VkDevice _device,
                            const VkMemoryAllocateInfo *pAllocateInfo,
                            const VkAllocationCallbacks *pAllocator,
                            VkDeviceMemory *pMem)
{
   const VkImportMemoryFdInfoKHR *fd_info = NULL;
   PVR_FROM_HANDLE(pvr_device, device, _device);
   enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
   struct pvr_device_memory *mem;
   uint64_t heap_used;
   VkResult result;

   assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
   assert(pAllocateInfo->allocationSize > 0);

   mem = vk_object_alloc(&device->vk,
                         pAllocator,
                         sizeof(*mem),
                         VK_OBJECT_TYPE_DEVICE_MEMORY);
   if (!mem)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
      switch ((unsigned)ext->sType) {
      case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
         if (device->ws->display_fd >= 0)
            type = PVR_WINSYS_BO_TYPE_DISPLAY;
         break;
      case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
         fd_info = (void *)ext;
         break;
      case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
         break;
      default:
         vk_debug_ignored_stype(ext->sType);
         break;
      }
   }

   if (fd_info && fd_info->handleType) {
      VkDeviceSize aligned_alloc_size =
         ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);

      assert(
         fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
         fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);

      result = device->ws->ops->buffer_create_from_fd(device->ws,
                                                      fd_info->fd,
                                                      &mem->bo);
      if (result != VK_SUCCESS)
         goto err_vk_object_free_mem;

      /* For security purposes, we reject importing the bo if it's smaller
       * than the requested allocation size. This prevents a malicious client
       * from passing a buffer to a trusted client, lying about the size, and
       * telling the trusted client to try and texture from an image that goes
       * out-of-bounds. This sort of thing could lead to GPU hangs or worse
       * in the trusted client. The trusted client can protect itself against
       * this sort of attack but only if it can trust the buffer size.
       */
      if (aligned_alloc_size > mem->bo->size) {
         result = vk_errorf(device,
                            VK_ERROR_INVALID_EXTERNAL_HANDLE,
                            "Aligned requested size too large for the given fd "
                            "%" PRIu64 "B > %" PRIu64 "B",
                            pAllocateInfo->allocationSize,
                            mem->bo->size);
         device->ws->ops->buffer_destroy(mem->bo);
         goto err_vk_object_free_mem;
      }

      /* From the Vulkan spec:
       *
       *    "Importing memory from a file descriptor transfers ownership of
       *    the file descriptor from the application to the Vulkan
       *    implementation. The application must not perform any operations on
       *    the file descriptor after a successful import."
       *
       * If the import fails, we leave the file descriptor open.
       */
      close(fd_info->fd);
   } else {
      /* Align physical allocations to the page size of the heap that will be
       * used when binding device memory (see pvr_bind_memory()) to ensure the
       * entire allocation can be mapped.
       */
      const uint64_t alignment = device->heaps.general_heap->page_size;

      /* FIXME: Need to determine the flags based on
       * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
       *
       * The alternative would be to store the flags alongside the memory
       * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
       * that they can be looked up.
       */
      result = device->ws->ops->buffer_create(device->ws,
                                              pAllocateInfo->allocationSize,
                                              alignment,
                                              type,
                                              PVR_WINSYS_BO_FLAG_CPU_ACCESS,
                                              &mem->bo);
      if (result != VK_SUCCESS)
         goto err_vk_object_free_mem;
   }

   heap_used = p_atomic_add_return(&device->pdevice->heap_used, mem->bo->size);
   if (heap_used > device->pdevice->memory.memoryHeaps[0].size) {
      free_memory(device, mem, pAllocator);
      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
   }

   *pMem = pvr_device_memory_to_handle(mem);

   return VK_SUCCESS;

err_vk_object_free_mem:
   vk_object_free(&device->vk, pAllocator, mem);

   return result;
}

VkResult pvr_GetMemoryFdKHR(VkDevice _device,
                            const VkMemoryGetFdInfoKHR *pGetFdInfo,
                            int *pFd)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);

   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);

   assert(
      pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
      pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);

   return device->ws->ops->buffer_get_fd(mem->bo, pFd);
}

VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
                             VkExternalMemoryHandleTypeFlagBits handleType,
                             int fd,
                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);

   switch (handleType) {
   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
      /* FIXME: This should only allow memory types having
       * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
       * dma-buf should be imported using cacheable memory types,
       * given exporter's mmap will always map it as cacheable.
       * Ref:
       * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
       */
      pMemoryFdProperties->memoryTypeBits =
         (1 << device->pdevice->memory.memoryTypeCount) - 1;
      return VK_SUCCESS;
   default:
      return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
   }
}

void pvr_FreeMemory(VkDevice _device,
                    VkDeviceMemory _mem,
                    const VkAllocationCallbacks *pAllocator)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);

   free_memory(device, mem, pAllocator);
}

VkResult pvr_MapMemory(VkDevice _device,
                       VkDeviceMemory _memory,
                       VkDeviceSize offset,
                       VkDeviceSize size,
                       VkMemoryMapFlags flags,
                       void **ppData)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
   VkResult result;

   if (!mem) {
      *ppData = NULL;
      return VK_SUCCESS;
   }

   if (size == VK_WHOLE_SIZE)
      size = mem->bo->size - offset;

   /* From the Vulkan spec version 1.0.32 docs for MapMemory:
    *
    *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
    *    assert(size != 0);
    *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
    *    equal to the size of the memory minus offset
    */

   assert(size > 0);
   assert(offset + size <= mem->bo->size);

   /* Check if already mapped */
   if (mem->bo->map) {
      *ppData = (uint8_t *)mem->bo->map + offset;
      return VK_SUCCESS;
   }

   /* Map it all at once */
   result = device->ws->ops->buffer_map(mem->bo);
   if (result != VK_SUCCESS)
      return result;

   *ppData = (uint8_t *)mem->bo->map + offset;

   return VK_SUCCESS;
}

void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);

   if (!mem || !mem->bo->map)
      return;

   device->ws->ops->buffer_unmap(mem->bo);
}

VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
                                     uint32_t memoryRangeCount,
                                     const VkMappedMemoryRange *pMemoryRanges)
{
   return VK_SUCCESS;
}

VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,
                                 uint32_t memoryRangeCount,
                                 const VkMappedMemoryRange *pMemoryRanges)
{
   return VK_SUCCESS;
}

void pvr_GetImageSparseMemoryRequirements2(
   VkDevice device,
   const VkImageSparseMemoryRequirementsInfo2 *pInfo,
   uint32_t *pSparseMemoryRequirementCount,
   VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
{
   *pSparseMemoryRequirementCount = 0;
}

void pvr_GetDeviceMemoryCommitment(VkDevice device,
                                   VkDeviceMemory memory,
                                   VkDeviceSize *pCommittedMemoryInBytes)
{
   *pCommittedMemoryInBytes = 0;
}

VkResult pvr_bind_memory(struct pvr_device *device,
                         struct pvr_device_memory *mem,
                         VkDeviceSize offset,
                         VkDeviceSize size,
                         VkDeviceSize alignment,
                         struct pvr_winsys_vma **const vma_out,
                         pvr_dev_addr_t *const dev_addr_out)
{
   VkDeviceSize virt_size =
      size + (offset & (device->heaps.general_heap->page_size - 1));
   struct pvr_winsys_vma *vma;
   pvr_dev_addr_t dev_addr;
   VkResult result;

   /* Valid usage:
    *
    *   "memoryOffset must be an integer multiple of the alignment member of
    *    the VkMemoryRequirements structure returned from a call to
    *    vkGetBufferMemoryRequirements with buffer"
    *
    *   "memoryOffset must be an integer multiple of the alignment member of
    *    the VkMemoryRequirements structure returned from a call to
    *    vkGetImageMemoryRequirements with image"
    */
   assert(offset % alignment == 0);
   assert(offset < mem->bo->size);

   result = device->ws->ops->heap_alloc(device->heaps.general_heap,
                                        virt_size,
                                        alignment,
                                        &vma);
   if (result != VK_SUCCESS)
      goto err_out;

   result = device->ws->ops->vma_map(vma, mem->bo, offset, size, &dev_addr);
   if (result != VK_SUCCESS)
      goto err_free_vma;

   *dev_addr_out = dev_addr;
   *vma_out = vma;

   return VK_SUCCESS;

err_free_vma:
   device->ws->ops->heap_free(vma);

err_out:
   return result;
}

void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
{
   device->ws->ops->vma_unmap(vma);
   device->ws->ops->heap_free(vma);
}

VkResult pvr_BindBufferMemory2(VkDevice _device,
                               uint32_t bindInfoCount,
                               const VkBindBufferMemoryInfo *pBindInfos)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   uint32_t i;

   for (i = 0; i < bindInfoCount; i++) {
      PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
      PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);

      VkResult result = pvr_bind_memory(device,
                                        mem,
                                        pBindInfos[i].memoryOffset,
                                        buffer->vk.size,
                                        buffer->alignment,
                                        &buffer->vma,
                                        &buffer->dev_addr);
      if (result != VK_SUCCESS) {
         while (i--) {
            PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
            pvr_unbind_memory(device, buffer->vma);
         }

         return result;
      }
   }

   return VK_SUCCESS;
}

VkResult pvr_QueueBindSparse(VkQueue _queue,
                             uint32_t bindInfoCount,
                             const VkBindSparseInfo *pBindInfo,
                             VkFence fence)
{
   return VK_SUCCESS;
}

/* Event functions. */

VkResult pvr_CreateEvent(VkDevice _device,
                         const VkEventCreateInfo *pCreateInfo,
                         const VkAllocationCallbacks *pAllocator,
                         VkEvent *pEvent)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);

   struct pvr_event *event = vk_object_alloc(&device->vk,
                                             pAllocator,
                                             sizeof(*event),
                                             VK_OBJECT_TYPE_EVENT);
   if (!event)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   event->sync = NULL;
   event->state = PVR_EVENT_STATE_RESET_BY_HOST;

   *pEvent = pvr_event_to_handle(event);

   return VK_SUCCESS;
}

void pvr_DestroyEvent(VkDevice _device,
                      VkEvent _event,
                      const VkAllocationCallbacks *pAllocator)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_event, event, _event);

   if (!event)
      return;

   if (event->sync)
      vk_sync_destroy(&device->vk, event->sync);

   vk_object_free(&device->vk, pAllocator, event);
}

VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_event, event, _event);
   VkResult result;

   switch (event->state) {
   case PVR_EVENT_STATE_SET_BY_DEVICE:
      if (!event->sync)
         return VK_EVENT_RESET;

      result =
         vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
      result = (result == VK_SUCCESS) ? VK_EVENT_SET : VK_EVENT_RESET;
      break;

   case PVR_EVENT_STATE_RESET_BY_DEVICE:
      if (!event->sync)
         return VK_EVENT_RESET;

      result =
         vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
      result = (result == VK_SUCCESS) ? VK_EVENT_RESET : VK_EVENT_SET;
      break;

   case PVR_EVENT_STATE_SET_BY_HOST:
      result = VK_EVENT_SET;
      break;

   case PVR_EVENT_STATE_RESET_BY_HOST:
      result = VK_EVENT_RESET;
      break;

   default:
      unreachable("Event object in unknown state");
   }

   return result;
}

VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
{
   PVR_FROM_HANDLE(pvr_event, event, _event);

   if (event->sync) {
      PVR_FROM_HANDLE(pvr_device, device, _device);

      const VkResult result = vk_sync_signal(&device->vk, event->sync, 0);
      if (result != VK_SUCCESS)
         return result;
   }

   event->state = PVR_EVENT_STATE_SET_BY_HOST;

   return VK_SUCCESS;
}

VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
{
   PVR_FROM_HANDLE(pvr_event, event, _event);

   if (event->sync) {
      PVR_FROM_HANDLE(pvr_device, device, _device);

      const VkResult result = vk_sync_reset(&device->vk, event->sync);
      if (result != VK_SUCCESS)
         return result;
   }

   event->state = PVR_EVENT_STATE_RESET_BY_HOST;

   return VK_SUCCESS;
}

/* Buffer functions. */

VkResult pvr_CreateBuffer(VkDevice _device,
                          const VkBufferCreateInfo *pCreateInfo,
                          const VkAllocationCallbacks *pAllocator,
                          VkBuffer *pBuffer)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   const uint32_t alignment = 4096;
   struct pvr_buffer *buffer;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
   assert(pCreateInfo->usage != 0);

   /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
   if (pCreateInfo->size >= ULONG_MAX - alignment)
      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);

   buffer =
      vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
   if (!buffer)
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   buffer->alignment = alignment;

   *pBuffer = pvr_buffer_to_handle(buffer);

   return VK_SUCCESS;
}

void pvr_DestroyBuffer(VkDevice _device,
                       VkBuffer _buffer,
                       const VkAllocationCallbacks *pAllocator)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);

   if (!buffer)
      return;

   if (buffer->vma)
      pvr_unbind_memory(device, buffer->vma);

   vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
}

VkResult pvr_gpu_upload(struct pvr_device *device,
                        struct pvr_winsys_heap *heap,
                        const void *data,
                        size_t size,
                        uint64_t alignment,
                        struct pvr_suballoc_bo **const pvr_bo_out)
{
   struct pvr_suballoc_bo *suballoc_bo = NULL;
   struct pvr_suballocator *allocator;
   VkResult result;
   void *map;

   assert(size > 0);

   if (heap == device->heaps.general_heap)
      allocator = &device->suballoc_general;
   else if (heap == device->heaps.pds_heap)
      allocator = &device->suballoc_pds;
   else if (heap == device->heaps.transfer_frag_heap)
      allocator = &device->suballoc_transfer;
   else if (heap == device->heaps.usc_heap)
      allocator = &device->suballoc_usc;
   else
      unreachable("Unknown heap type");

   result = pvr_bo_suballoc(allocator, size, alignment, false, &suballoc_bo);
   if (result != VK_SUCCESS)
      return result;

   map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
   memcpy(map, data, size);

   *pvr_bo_out = suballoc_bo;

   return VK_SUCCESS;
}

VkResult pvr_gpu_upload_usc(struct pvr_device *device,
                            const void *code,
                            size_t code_size,
                            uint64_t code_alignment,
                            struct pvr_suballoc_bo **const pvr_bo_out)
{
   struct pvr_suballoc_bo *suballoc_bo = NULL;
   VkResult result;
   void *map;

   assert(code_size > 0);

   /* The USC will prefetch the next instruction, so over allocate by 1
    * instruction to prevent reading off the end of a page into a potentially
    * unallocated page.
    */
   result = pvr_bo_suballoc(&device->suballoc_usc,
                            code_size + ROGUE_MAX_INSTR_BYTES,
                            code_alignment,
                            false,
                            &suballoc_bo);
   if (result != VK_SUCCESS)
      return result;

   map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
   memcpy(map, code, code_size);

   *pvr_bo_out = suballoc_bo;

   return VK_SUCCESS;
}

/**
 * \brief Upload PDS program data and code segments from host memory to device
 * memory.
 *
 * \param[in] device            Logical device pointer.
 * \param[in] data              Pointer to PDS data segment to upload.
 * \param[in] data_size_dwords  Size of PDS data segment in dwords.
 * \param[in] data_alignment    Required alignment of the PDS data segment in
 *                              bytes. Must be a power of two.
 * \param[in] code              Pointer to PDS code segment to upload.
 * \param[in] code_size_dwords  Size of PDS code segment in dwords.
 * \param[in] code_alignment    Required alignment of the PDS code segment in
 *                              bytes. Must be a power of two.
 * \param[in] min_alignment     Minimum alignment of the bo holding the PDS
 *                              program in bytes.
 * \param[out] pds_upload_out   On success will be initialized based on the
 *                              uploaded PDS program.
 * \return VK_SUCCESS on success, or error code otherwise.
 */
VkResult pvr_gpu_upload_pds(struct pvr_device *device,
                            const uint32_t *data,
                            uint32_t data_size_dwords,
                            uint32_t data_alignment,
                            const uint32_t *code,
                            uint32_t code_size_dwords,
                            uint32_t code_alignment,
                            uint64_t min_alignment,
                            struct pvr_pds_upload *const pds_upload_out)
{
   /* All alignment and sizes below are in bytes. */
   const size_t data_size = PVR_DW_TO_BYTES(data_size_dwords);
   const size_t code_size = PVR_DW_TO_BYTES(code_size_dwords);
   const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
   const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
   const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
   const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
   const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
                                     : data_aligned_size;
   VkResult result;
   void *map;

   assert(code || data);
   assert(!code || (code_size_dwords != 0 && code_alignment != 0));
   assert(!data || (data_size_dwords != 0 && data_alignment != 0));

   result = pvr_bo_suballoc(&device->suballoc_pds,
                            bo_size,
                            bo_alignment,
                            true,
                            &pds_upload_out->pvr_bo);
   if (result != VK_SUCCESS)
      return result;

   map = pvr_bo_suballoc_get_map_addr(pds_upload_out->pvr_bo);

   if (data) {
      memcpy(map, data, data_size);

      pds_upload_out->data_offset = pds_upload_out->pvr_bo->dev_addr.addr -
                                    device->heaps.pds_heap->base_addr.addr;

      /* Store data size in dwords. */
      assert(data_aligned_size % 4 == 0);
      pds_upload_out->data_size = data_aligned_size / 4;
   } else {
      pds_upload_out->data_offset = 0;
      pds_upload_out->data_size = 0;
   }

   if (code) {
      memcpy((uint8_t *)map + code_offset, code, code_size);

      pds_upload_out->code_offset =
         (pds_upload_out->pvr_bo->dev_addr.addr + code_offset) -
         device->heaps.pds_heap->base_addr.addr;

      /* Store code size in dwords. */
      assert(code_aligned_size % 4 == 0);
      pds_upload_out->code_size = code_aligned_size / 4;
   } else {
      pds_upload_out->code_offset = 0;
      pds_upload_out->code_size = 0;
   }

   return VK_SUCCESS;
}

static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device *device,
                                 struct pvr_framebuffer *framebuffer)
{
   const uint32_t cache_line_size =
      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
   uint32_t ppp_state[3];
   VkResult result;

   pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
      header.pres_terminate = true;
   }

   pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
      term0.clip_right =
         DIV_ROUND_UP(
            framebuffer->width,
            PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
         1;
      term0.clip_bottom =
         DIV_ROUND_UP(
            framebuffer->height,
            PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
         1;
   }

   pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
      term1.render_target = 0;
      term1.clip_left = 0;
   }

   result = pvr_gpu_upload(device,
                           device->heaps.general_heap,
                           ppp_state,
                           sizeof(ppp_state),
                           cache_line_size,
                           &framebuffer->ppp_state_bo);
   if (result != VK_SUCCESS)
      return result;

   /* Calculate the size of PPP state in dwords. */
   framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);

   return VK_SUCCESS;
}

static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
                                    uint32_t render_targets_count)
{
   uint32_t i;

   for (i = 0; i < render_targets_count; i++) {
      if (pthread_mutex_init(&render_targets[i].mutex, NULL))
         goto err_mutex_destroy;
   }

   return true;

err_mutex_destroy:
   while (i--)
      pthread_mutex_destroy(&render_targets[i].mutex);

   return false;
}

static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
                                    uint32_t render_targets_count)
{
   for (uint32_t i = 0; i < render_targets_count; i++) {
      if (render_targets[i].valid) {
         pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
         render_targets[i].valid = false;
      }

      pthread_mutex_destroy(&render_targets[i].mutex);
   }
}

VkResult pvr_CreateFramebuffer(VkDevice _device,
                               const VkFramebufferCreateInfo *pCreateInfo,
                               const VkAllocationCallbacks *pAllocator,
                               VkFramebuffer *pFramebuffer)
{
   PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
   PVR_FROM_HANDLE(pvr_device, device, _device);
   struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
   struct pvr_spm_eot_state *spm_eot_state_per_render;
   struct pvr_render_target *render_targets;
   struct pvr_framebuffer *framebuffer;
   struct pvr_image_view **attachments;
   uint32_t render_targets_count;
   uint64_t scratch_buffer_size;
   VkResult result;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);

   render_targets_count =
      PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);

   VK_MULTIALLOC(ma);
   vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
   vk_multialloc_add(&ma,
                     &attachments,
                     __typeof__(*attachments),
                     pCreateInfo->attachmentCount);
   vk_multialloc_add(&ma,
                     &render_targets,
                     __typeof__(*render_targets),
                     render_targets_count);
   vk_multialloc_add(&ma,
                     &spm_eot_state_per_render,
                     __typeof__(*spm_eot_state_per_render),
                     pass->hw_setup->render_count);
   vk_multialloc_add(&ma,
                     &spm_bgobj_state_per_render,
                     __typeof__(*spm_bgobj_state_per_render),
                     pass->hw_setup->render_count);

   if (!vk_multialloc_zalloc2(&ma,
                              &device->vk.alloc,
                              pAllocator,
                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);

   vk_object_base_init(&device->vk,
                       &framebuffer->base,
                       VK_OBJECT_TYPE_FRAMEBUFFER);

   framebuffer->width = pCreateInfo->width;
   framebuffer->height = pCreateInfo->height;
   framebuffer->layers = pCreateInfo->layers;

   framebuffer->attachments = attachments;
   framebuffer->attachment_count = pCreateInfo->attachmentCount;
   for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
      framebuffer->attachments[i] =
         pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
   }

   result = pvr_framebuffer_create_ppp_state(device, framebuffer);
   if (result != VK_SUCCESS)
      goto err_free_framebuffer;

   framebuffer->render_targets = render_targets;
   framebuffer->render_targets_count = render_targets_count;
   if (!pvr_render_targets_init(framebuffer->render_targets,
                                render_targets_count)) {
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
      goto err_free_ppp_state_bo;
   }

   scratch_buffer_size =
      pvr_spm_scratch_buffer_calc_required_size(pass,
                                                framebuffer->width,
                                                framebuffer->height);

   result = pvr_spm_scratch_buffer_get_buffer(device,
                                              scratch_buffer_size,
                                              &framebuffer->scratch_buffer);
   if (result != VK_SUCCESS)
      goto err_finish_render_targets;

   for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
      uint32_t emit_count;

      result = pvr_spm_init_eot_state(device,
                                      &spm_eot_state_per_render[i],
                                      framebuffer,
                                      &pass->hw_setup->renders[i],
                                      &emit_count);
      if (result != VK_SUCCESS)
         goto err_finish_eot_state;

      result = pvr_spm_init_bgobj_state(device,
                                        &spm_bgobj_state_per_render[i],
                                        framebuffer,
                                        &pass->hw_setup->renders[i],
                                        emit_count);
      if (result != VK_SUCCESS)
         goto err_finish_bgobj_state;

      continue;

err_finish_bgobj_state:
      pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);

      for (uint32_t j = 0; j < i; j++)
         pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);

err_finish_eot_state:
      for (uint32_t j = 0; j < i; j++)
         pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);

      goto err_finish_render_targets;
   }

   framebuffer->render_count = pass->hw_setup->render_count;
   framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
   framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;

   *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);

   return VK_SUCCESS;

err_finish_render_targets:
   pvr_render_targets_fini(framebuffer->render_targets, render_targets_count);

err_free_ppp_state_bo:
   pvr_bo_suballoc_free(framebuffer->ppp_state_bo);

err_free_framebuffer:
   vk_object_base_finish(&framebuffer->base);
   vk_free2(&device->vk.alloc, pAllocator, framebuffer);

   return result;
}

void pvr_DestroyFramebuffer(VkDevice _device,
                            VkFramebuffer _fb,
                            const VkAllocationCallbacks *pAllocator)
{
   PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
   PVR_FROM_HANDLE(pvr_device, device, _device);

   if (!framebuffer)
      return;

   for (uint32_t i = 0; i < framebuffer->render_count; i++) {
      pvr_spm_finish_bgobj_state(device,
                                 &framebuffer->spm_bgobj_state_per_render[i]);

      pvr_spm_finish_eot_state(device,
                               &framebuffer->spm_eot_state_per_render[i]);
   }

   pvr_spm_scratch_buffer_release(device, framebuffer->scratch_buffer);
   pvr_render_targets_fini(framebuffer->render_targets,
                           framebuffer->render_targets_count);
   pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
   vk_object_base_finish(&framebuffer->base);
   vk_free2(&device->vk.alloc, pAllocator, framebuffer);
}

static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
                                  VkFilter filter)
{
   switch (filter) {
   case VK_FILTER_NEAREST:
      return PVRX(TEXSTATE_FILTER_POINT);
   case VK_FILTER_LINEAR:
      return PVRX(TEXSTATE_FILTER_LINEAR);
   default:
      unreachable("Unknown filter type.");
   }
}

static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
{
   switch (addr_mode) {
   case VK_SAMPLER_ADDRESS_MODE_REPEAT:
      return PVRX(TEXSTATE_ADDRMODE_REPEAT);
   case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
      return PVRX(TEXSTATE_ADDRMODE_FLIP);
   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
      return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
   case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
      return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
   case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
      return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
   default:
      unreachable("Invalid sampler address mode.");
   }
}

VkResult pvr_CreateSampler(VkDevice _device,
                           const VkSamplerCreateInfo *pCreateInfo,
                           const VkAllocationCallbacks *pAllocator,
                           VkSampler *pSampler)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   uint32_t border_color_table_index;
   struct pvr_sampler *sampler;
   float lod_rounding_bias;
   VkFilter min_filter;
   VkFilter mag_filter;
   VkResult result;
   float min_lod;
   float max_lod;

   STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
                 sizeof(((union pvr_sampler_descriptor *)NULL)->words));

   sampler =
      vk_sampler_create(&device->vk, pCreateInfo, pAllocator, sizeof(*sampler));
   if (!sampler) {
      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
      goto err_out;
   }

   mag_filter = pCreateInfo->magFilter;
   min_filter = pCreateInfo->minFilter;

   result =
      pvr_border_color_table_get_or_create_entry(&device->border_color_table,
                                                 sampler,
                                                 &border_color_table_index);
   if (result != VK_SUCCESS)
      goto err_free_sampler;

   if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
      /* The min/mag filters may need adjustment here, the GPU should decide
       * which of the two filters to use based on the clamped LOD value: LOD
       * <= 0 implies magnification, while LOD > 0 implies minification.
       *
       * As a workaround, we override magFilter with minFilter if we know that
       * the magnification filter will never be used due to clamping anyway
       * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
       * if maxLod <= 0.
       */
      if (pCreateInfo->minLod > 0.0f) {
         /* The clamped LOD will always be positive => always minify. */
         mag_filter = pCreateInfo->minFilter;
      }

      if (pCreateInfo->maxLod <= 0.0f) {
         /* The clamped LOD will always be negative or zero => always
          * magnify.
          */
         min_filter = pCreateInfo->magFilter;
      }
   }

   if (pCreateInfo->compareEnable) {
      sampler->descriptor.data.compare_op =
         (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
   } else {
      sampler->descriptor.data.compare_op =
         (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
   }

   sampler->descriptor.data.word3 = 0;
   pvr_csb_pack (&sampler->descriptor.data.sampler_word,
                 TEXSTATE_SAMPLER,
                 word) {
      const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
      const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
                                  (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
      const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
                                         PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
                                (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
      const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
                                         PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
                                (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));

      word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
      word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);

      if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
         word.mipfilter = true;

      word.addrmode_u =
         pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
      word.addrmode_v =
         pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
      word.addrmode_w =
         pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);

      /* TODO: Figure out defines for these. */
      if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
         sampler->descriptor.data.word3 |= 0x40000000;

      if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
         sampler->descriptor.data.word3 |= 0x20000000;

      /* The Vulkan 1.0.205 spec says:
       *
       *    The absolute value of mipLodBias must be less than or equal to
       *    VkPhysicalDeviceLimits::maxSamplerLodBias.
       */
      word.dadjust =
         PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
         util_signed_fixed(
            CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
            PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));

      /* Anisotropy is not supported for now. */
      word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);

      if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
          pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
         /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
          * selected by adding 0.5 and then truncating the input LOD value.
          * This hardware adds the 0.5 bias before clamping against
          * lodmin/lodmax, while Vulkan specifies the bias to be added after
          * clamping. We compensate for this difference by adding the 0.5
          * bias to the LOD bounds, too.
          */
         lod_rounding_bias = 0.5f;
      } else {
         lod_rounding_bias = 0.0f;
      }

      min_lod = pCreateInfo->minLod + lod_rounding_bias;
      word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
                                        PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));

      max_lod = pCreateInfo->maxLod + lod_rounding_bias;
      word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
                                        PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));

      word.bordercolor_index = border_color_table_index;

      if (pCreateInfo->unnormalizedCoordinates)
         word.non_normalized_coords = true;
   }

   *pSampler = pvr_sampler_to_handle(sampler);

   return VK_SUCCESS;

err_free_sampler:
   vk_object_free(&device->vk, pAllocator, sampler);

err_out:
   return result;
}

void pvr_DestroySampler(VkDevice _device,
                        VkSampler _sampler,
                        const VkAllocationCallbacks *pAllocator)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);

   if (!sampler)
      return;

   vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
}

void pvr_GetBufferMemoryRequirements2(
   VkDevice _device,
   const VkBufferMemoryRequirementsInfo2 *pInfo,
   VkMemoryRequirements2 *pMemoryRequirements)
{
   PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
   PVR_FROM_HANDLE(pvr_device, device, _device);
   uint64_t size;

   /* The Vulkan 1.0.166 spec says:
    *
    *    memoryTypeBits is a bitmask and contains one bit set for every
    *    supported memory type for the resource. Bit 'i' is set if and only
    *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
    *    structure for the physical device is supported for the resource.
    *
    * All types are currently supported for buffers.
    */
   pMemoryRequirements->memoryRequirements.memoryTypeBits =
      (1ul << device->pdevice->memory.memoryTypeCount) - 1;

   pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;

   size = buffer->vk.size;

   if (size % device->ws->page_size == 0 ||
       size % device->ws->page_size >
          device->ws->page_size - PVR_BUFFER_MEMORY_PADDING_SIZE) {
      /* TODO: We can save memory by having one extra virtual page mapped
       * in and having the first and last virtual page mapped to the first
       * physical address.
       */
      size += PVR_BUFFER_MEMORY_PADDING_SIZE;
   }

   pMemoryRequirements->memoryRequirements.size =
      ALIGN_POT(size, buffer->alignment);
}

void pvr_GetImageMemoryRequirements2(VkDevice _device,
                                     const VkImageMemoryRequirementsInfo2 *pInfo,
                                     VkMemoryRequirements2 *pMemoryRequirements)
{
   PVR_FROM_HANDLE(pvr_device, device, _device);
   PVR_FROM_HANDLE(pvr_image, image, pInfo->image);

   /* The Vulkan 1.0.166 spec says:
    *
    *    memoryTypeBits is a bitmask and contains one bit set for every
    *    supported memory type for the resource. Bit 'i' is set if and only
    *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
    *    structure for the physical device is supported for the resource.
    *
    * All types are currently supported for images.
    */
   const uint32_t memory_types =
      (1ul << device->pdevice->memory.memoryTypeCount) - 1;

   /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
    * in GetImageMemoryRequirements()), but this should be known at image
    * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
    * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
    * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
    * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
    *
    * Note: Presumably the 4096 alignment requirement comes from the Vulkan
    * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
    * render and compute jobs.
    */
   pMemoryRequirements->memoryRequirements.alignment = image->alignment;
   pMemoryRequirements->memoryRequirements.size =
      align64(image->size, image->alignment);
   pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
}
