/*
 * Copyright (C) 2008 VMware, Inc.
 * Copyright (C) 2014 Broadcom
 * Copyright (C) 2018 Alyssa Rosenzweig
 * Copyright (C) 2019 Collabora, Ltd.
 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

#include "draw/draw_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "util/format/u_format.h"
#include "util/format/u_format_s3tc.h"
#include "util/os_time.h"
#include "util/u_debug.h"
#include "util/u_memory.h"
#include "util/u_process.h"
#include "util/u_screen.h"
#include "util/u_video.h"
#include "util/xmlconfig.h"

#include <fcntl.h>

#include "drm-uapi/drm_fourcc.h"
#include "drm-uapi/panfrost_drm.h"

#include "decode.h"
#include "pan_bo.h"
#include "pan_fence.h"
#include "pan_public.h"
#include "pan_resource.h"
#include "pan_screen.h"
#include "pan_shader.h"
#include "pan_util.h"

#include "pan_context.h"

#define DEFAULT_MAX_AFBC_PACKING_RATIO 90

/* clang-format off */
static const struct debug_named_value panfrost_debug_options[] = {
   {"perf",       PAN_DBG_PERF,     "Enable performance warnings"},
   {"trace",      PAN_DBG_TRACE,    "Trace the command stream"},
   {"dirty",      PAN_DBG_DIRTY,    "Always re-emit all state"},
   {"sync",       PAN_DBG_SYNC,     "Wait for each job's completion and abort on GPU faults"},
   {"nofp16",     PAN_DBG_NOFP16,    "Disable 16-bit support"},
   {"gl3",        PAN_DBG_GL3,      "Enable experimental GL 3.x implementation, up to 3.3"},
   {"noafbc",     PAN_DBG_NO_AFBC,  "Disable AFBC support"},
   {"crc",        PAN_DBG_CRC,      "Enable transaction elimination"},
   {"msaa16",     PAN_DBG_MSAA16,   "Enable MSAA 8x and 16x support"},
   {"linear",     PAN_DBG_LINEAR,   "Force linear textures"},
   {"nocache",    PAN_DBG_NO_CACHE, "Disable BO cache"},
   {"dump",       PAN_DBG_DUMP,     "Dump all graphics memory"},
#ifdef PAN_DBG_OVERFLOW
   {"overflow",   PAN_DBG_OVERFLOW, "Check for buffer overflows in pool uploads"},
#endif
   {"yuv",        PAN_DBG_YUV,      "Tint YUV textures with blue for 1-plane and green for 2-plane"},
   {"forcepack",  PAN_DBG_FORCE_PACK,  "Force packing of AFBC textures on upload"},
   DEBUG_NAMED_VALUE_END
};
/* clang-format on */

static const char *
panfrost_get_name(struct pipe_screen *screen)
{
   return pan_device(screen)->model->name;
}

static const char *
panfrost_get_vendor(struct pipe_screen *screen)
{
   return "Mesa";
}

static const char *
panfrost_get_device_vendor(struct pipe_screen *screen)
{
   return "Arm";
}

static int
panfrost_get_param(struct pipe_screen *screen, enum pipe_cap param)
{
   struct panfrost_device *dev = pan_device(screen);

   /* Our GL 3.x implementation is WIP */
   bool is_gl3 = dev->debug & PAN_DBG_GL3;

   /* Native MRT is introduced with v5 */
   bool has_mrt = (dev->arch >= 5);

   switch (param) {
   case PIPE_CAP_NPOT_TEXTURES:
   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
   case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
   case PIPE_CAP_DEPTH_CLIP_DISABLE:
   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
   case PIPE_CAP_FRONTEND_NOOP:
   case PIPE_CAP_SAMPLE_SHADING:
   case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
   case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
   case PIPE_CAP_HAS_CONST_BW:
      return 1;

   /* Removed in v9 (Valhall) */
   case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:
      return dev->arch < 9;

   case PIPE_CAP_MAX_RENDER_TARGETS:
   case PIPE_CAP_FBFETCH:
   case PIPE_CAP_FBFETCH_COHERENT:
      return has_mrt ? 8 : 1;

   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
      return 1;

   case PIPE_CAP_OCCLUSION_QUERY:
   case PIPE_CAP_PRIMITIVE_RESTART:
   case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
      return true;

   case PIPE_CAP_ANISOTROPIC_FILTER:
      return panfrost_device_gpu_rev(dev) >= dev->model->min_rev_anisotropic;

   /* Compile side is done for Bifrost, Midgard TODO. Needs some kernel
    * work to turn on, since CYCLE_COUNT_START needs to be issued. In
    * kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
    * yet way to request this with mainline TODO */
   case PIPE_CAP_SHADER_CLOCK:
      return 0;

   case PIPE_CAP_VS_INSTANCEID:
   case PIPE_CAP_TEXTURE_MULTISAMPLE:
   case PIPE_CAP_SURFACE_SAMPLE_COUNT:
      return true;

   case PIPE_CAP_SAMPLER_VIEW_TARGET:
   case PIPE_CAP_CLIP_HALFZ:
   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
   case PIPE_CAP_TEXTURE_SWIZZLE:
   case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
   case PIPE_CAP_INDEP_BLEND_ENABLE:
   case PIPE_CAP_INDEP_BLEND_FUNC:
   case PIPE_CAP_GENERATE_MIPMAP:
   case PIPE_CAP_ACCELERATED:
   case PIPE_CAP_UMA:
   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
   case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
   case PIPE_CAP_PACKED_UNIFORMS:
   case PIPE_CAP_IMAGE_LOAD_FORMATTED:
   case PIPE_CAP_CUBE_MAP_ARRAY:
   case PIPE_CAP_COMPUTE:
   case PIPE_CAP_INT64:
      return 1;

   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
      return 1;

   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
      return PIPE_MAX_SO_BUFFERS;

   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
      return PIPE_MAX_SO_OUTPUTS;

   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
      return 1;

   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
      return 2048;

   case PIPE_CAP_GLSL_FEATURE_LEVEL:
   case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
      return is_gl3 ? 330 : 140;
   case PIPE_CAP_ESSL_FEATURE_LEVEL:
      return dev->arch >= 6 ? 320 : 310;

   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
      return 16;

   /* v7 (only) restricts component orders with AFBC. To workaround, we
    * compose format swizzles with texture swizzles. pan_texture.c motsly
    * handles this but we need to fix up the border colour.
    */
   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
      if (dev->arch == 7 || dev->arch >= 10)
         return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
      else
         return 0;

   case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
      return PAN_MAX_TEXEL_BUFFER_ELEMENTS;

   /* Must be at least 64 for correct behaviour */
   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
      return 64;

   case PIPE_CAP_QUERY_TIME_ELAPSED:
   case PIPE_CAP_QUERY_TIMESTAMP:
      return dev->kmod.props.gpu_can_query_timestamp &&
             dev->kmod.props.timestamp_frequency != 0;

   case PIPE_CAP_TIMER_RESOLUTION:
      return pan_gpu_time_to_ns(dev, 1);

   /* The hardware requires element alignment for data conversion to work
    * as expected. If data conversion is not required, this restriction is
    * lifted on Midgard at a performance penalty. We conservatively
    * require element alignment for vertex buffers, using u_vbuf to
    * translate to match the hardware requirement.
    *
    * This is less heavy-handed than the 4BYTE_ALIGNED_ONLY caps, which
    * would needlessly require alignment even for 8-bit formats.
    */
   case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
      return 1;

   case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
      return 1 << (PAN_MAX_MIP_LEVELS - 1);

   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
      return PAN_MAX_MIP_LEVELS;

   /* pixel coord is in integer sysval on bifrost. */
   case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
      return dev->arch >= 6;
   case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
      return dev->arch < 6;

   case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
      /* Hardware is upper left */
      return 0;

   case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
   case PIPE_CAP_TGSI_TEXCOORD:
      return 1;

   /* We would prefer varyings on Midgard, but proper sysvals on Bifrost */
   case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
   case PIPE_CAP_FS_POSITION_IS_SYSVAL:
   case PIPE_CAP_FS_POINT_IS_SYSVAL:
      return dev->arch >= 6;

   case PIPE_CAP_SEAMLESS_CUBE_MAP:
   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
      return true;

   case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
      return 0xffff;

   case PIPE_CAP_TEXTURE_TRANSFER_MODES:
      return 0;

   case PIPE_CAP_ENDIANNESS:
      return PIPE_ENDIAN_NATIVE;

   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
      return 4;

   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
      return -8;

   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
      return 7;

   case PIPE_CAP_VIDEO_MEMORY: {
      uint64_t system_memory;

      if (!os_get_total_physical_memory(&system_memory))
         return 0;

      return (int)(system_memory >> 20);
   }

   case PIPE_CAP_SHADER_STENCIL_EXPORT:
   case PIPE_CAP_CONDITIONAL_RENDER:
   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
      return true;

   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
      return 4;

   case PIPE_CAP_MAX_VARYINGS:
      return dev->arch >= 9 ? 16 : 32;

   /* Removed in v6 (Bifrost) */
   case PIPE_CAP_GL_CLAMP:
   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
   case PIPE_CAP_ALPHA_TEST:
      return dev->arch <= 5;

   /* Removed in v9 (Valhall). PRIMTIIVE_RESTART_FIXED_INDEX is of course
    * still supported as it is core GLES3.0 functionality
    */
   case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART:
      return dev->arch >= 9;

   case PIPE_CAP_FLATSHADE:
   case PIPE_CAP_TWO_SIDED_COLOR:
   case PIPE_CAP_CLIP_PLANES:
      return 0;

   case PIPE_CAP_PACKED_STREAM_OUTPUT:
      return 0;

   case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:
   case PIPE_CAP_PSIZ_CLAMPED:
      return 1;

   case PIPE_CAP_NIR_IMAGES_AS_DEREF:
      return 0;

   case PIPE_CAP_DRAW_INDIRECT:
      return 1;

   case PIPE_CAP_MULTI_DRAW_INDIRECT:
      return dev->arch >= 10;

   case PIPE_CAP_START_INSTANCE:
   case PIPE_CAP_DRAW_PARAMETERS:
      return pan_is_bifrost(dev);

   case PIPE_CAP_SUPPORTED_PRIM_MODES:
   case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART: {
      /* Mali supports GLES and QUADS. Midgard and v6 Bifrost
       * support more */
      uint32_t modes = BITFIELD_MASK(MESA_PRIM_QUADS + 1);

      if (dev->arch <= 6) {
         modes |= BITFIELD_BIT(MESA_PRIM_QUAD_STRIP);
         modes |= BITFIELD_BIT(MESA_PRIM_POLYGON);
      }

      if (dev->arch >= 9) {
         /* Although Valhall is supposed to support quads, they
          * don't seem to work correctly. Disable to fix
          * arb-provoking-vertex-render.
          */
         modes &= ~BITFIELD_BIT(MESA_PRIM_QUADS);
      }

      return modes;
   }

   case PIPE_CAP_IMAGE_STORE_FORMATTED:
      return 1;

   case PIPE_CAP_NATIVE_FENCE_FD:
      return 1;

   case PIPE_CAP_ASTC_DECODE_MODE:
      return dev->arch >= 9 && (dev->compressed_formats & (1 << 30));

   default:
      return u_pipe_screen_get_param_defaults(screen, param);
   }
}

static int
panfrost_get_shader_param(struct pipe_screen *screen,
                          enum pipe_shader_type shader,
                          enum pipe_shader_cap param)
{
   struct panfrost_device *dev = pan_device(screen);
   bool is_nofp16 = dev->debug & PAN_DBG_NOFP16;

   switch (shader) {
   case PIPE_SHADER_VERTEX:
   case PIPE_SHADER_FRAGMENT:
   case PIPE_SHADER_COMPUTE:
      break;
   default:
      return 0;
   }

   /* We only allow observable side effects (memory writes) in compute and
    * fragment shaders. Side effects in the geometry pipeline cause
    * trouble with IDVS and conflict with our transform feedback lowering.
    */
   bool allow_side_effects = (shader != PIPE_SHADER_VERTEX);

   switch (param) {
   case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
   case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
   case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
   case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
      return 16384; /* arbitrary */

   case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
      return 1024; /* arbitrary */

   case PIPE_SHADER_CAP_MAX_INPUTS:
      /* Used as ABI on Midgard */
      return 16;

   case PIPE_SHADER_CAP_MAX_OUTPUTS:
      return shader == PIPE_SHADER_FRAGMENT ? 8 : PIPE_MAX_ATTRIBS;

   case PIPE_SHADER_CAP_MAX_TEMPS:
      return 256; /* arbitrary */

   case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
      return 16 * 1024 * sizeof(float);

   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
      STATIC_ASSERT(PAN_MAX_CONST_BUFFERS < 0x100);
      return PAN_MAX_CONST_BUFFERS;

   case PIPE_SHADER_CAP_CONT_SUPPORTED:
      return 0;

   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
      return 1;
   case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
      return 0;

   case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
      return dev->arch >= 6;

   case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
      return 1;

   case PIPE_SHADER_CAP_SUBROUTINES:
      return 0;

   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
      return 0;

   case PIPE_SHADER_CAP_INTEGERS:
      return 1;

      /* The Bifrost compiler supports full 16-bit. Midgard could but int16
       * support is untested, so restrict INT16 to Bifrost. Midgard
       * architecturally cannot support fp16 derivatives. */

   case PIPE_SHADER_CAP_FP16:
   case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
      return !is_nofp16;
   case PIPE_SHADER_CAP_FP16_DERIVATIVES:
   case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
      return dev->arch >= 6 && !is_nofp16;
   case PIPE_SHADER_CAP_INT16:
      /* Blocked on https://gitlab.freedesktop.org/mesa/mesa/-/issues/6075 */
      return false;

   case PIPE_SHADER_CAP_INT64_ATOMICS:
   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
      return 0;

   case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
      STATIC_ASSERT(PIPE_MAX_SAMPLERS < 0x10000);
      return PIPE_MAX_SAMPLERS;

   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
      STATIC_ASSERT(PIPE_MAX_SHADER_SAMPLER_VIEWS < 0x10000);
      return PIPE_MAX_SHADER_SAMPLER_VIEWS;

   case PIPE_SHADER_CAP_SUPPORTED_IRS:
      return (1 << PIPE_SHADER_IR_NIR);

   case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
      return allow_side_effects ? 16 : 0;

   case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
      return allow_side_effects ? PIPE_MAX_SHADER_IMAGES : 0;

   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
   case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
      return 0;

   default:
      return 0;
   }

   return 0;
}

static float
panfrost_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
{
   switch (param) {
   case PIPE_CAPF_MIN_LINE_WIDTH:
   case PIPE_CAPF_MIN_LINE_WIDTH_AA:
   case PIPE_CAPF_MIN_POINT_SIZE:
   case PIPE_CAPF_MIN_POINT_SIZE_AA:
      return 1;

   case PIPE_CAPF_POINT_SIZE_GRANULARITY:
   case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
      return 0.0625;

   case PIPE_CAPF_MAX_LINE_WIDTH:
   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
   case PIPE_CAPF_MAX_POINT_SIZE:
   case PIPE_CAPF_MAX_POINT_SIZE_AA:
      return 4095.9375;

   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
      return 16.0;

   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
      return 16.0; /* arbitrary */

   case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
   case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
   case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
      return 0.0f;

   default:
      debug_printf("Unexpected PIPE_CAPF %d query\n", param);
      return 0.0;
   }
}

static uint32_t
pipe_to_pan_bind_flags(uint32_t pipe_bind_flags)
{
   static_assert(PIPE_BIND_DEPTH_STENCIL == PAN_BIND_DEPTH_STENCIL, "");
   static_assert(PIPE_BIND_RENDER_TARGET == PAN_BIND_RENDER_TARGET, "");
   static_assert(PIPE_BIND_SAMPLER_VIEW == PAN_BIND_SAMPLER_VIEW, "");
   static_assert(PIPE_BIND_VERTEX_BUFFER == PAN_BIND_VERTEX_BUFFER, "");

   return pipe_bind_flags & (PAN_BIND_DEPTH_STENCIL | PAN_BIND_RENDER_TARGET |
                             PAN_BIND_VERTEX_BUFFER | PAN_BIND_SAMPLER_VIEW);
}

/**
 * Query format support for creating a texture, drawing surface, etc.
 * \param format  the format to test
 * \param type  one of PIPE_TEXTURE, PIPE_SURFACE
 */
static bool
panfrost_is_format_supported(struct pipe_screen *screen,
                             enum pipe_format format,
                             enum pipe_texture_target target,
                             unsigned sample_count,
                             unsigned storage_sample_count, unsigned bind)
{
   struct panfrost_device *dev = pan_device(screen);

   /* MSAA 2x gets rounded up to 4x. MSAA 8x/16x only supported on v5+.
    * TODO: debug MSAA 8x/16x */

   switch (sample_count) {
   case 0:
   case 1:
   case 4:
      break;
   case 8:
   case 16:
      if (dev->debug & PAN_DBG_MSAA16)
         break;
      else
         return false;
   default:
      return false;
   }

   if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
      return false;

   /* Z16 causes dEQP failures on t720 */
   if (format == PIPE_FORMAT_Z16_UNORM && dev->arch <= 4)
      return false;

   /* Check we support the format with the given bind */

   unsigned pan_bind_flags = pipe_to_pan_bind_flags(bind);

   struct panfrost_format fmt = dev->formats[format];

   /* Also check that compressed texture formats are supported on this
    * particular chip. They may not be depending on system integration
    * differences. */

   bool supported =
      panfrost_supports_compressed_format(dev, MALI_EXTRACT_INDEX(fmt.hw));

   if (!supported)
      return false;

   return MALI_EXTRACT_INDEX(fmt.hw) && ((pan_bind_flags & ~fmt.bind) == 0);
}

static void
panfrost_query_compression_rates(struct pipe_screen *screen,
                                 enum pipe_format format, int max,
                                 uint32_t *rates, int *count)
{
   struct panfrost_device *dev = pan_device(screen);

   if (!dev->has_afrc) {
      *count = 0;
      return;
   }

   *count = panfrost_afrc_query_rates(format, max, rates);
}

static void
panfrost_query_compression_modifiers(struct pipe_screen *screen,
                                     enum pipe_format format, uint32_t rate,
                                     int max, uint64_t *modifiers, int *count)
{
   struct panfrost_device *dev = pan_device(screen);

   if (!dev->has_afrc || rate == PIPE_COMPRESSION_FIXED_RATE_NONE) {
      int mod_count = 0;
      for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
         if (drm_is_afrc(pan_best_modifiers[i]))
            continue;
         if (mod_count < max)
            modifiers[mod_count] = pan_best_modifiers[i];
         mod_count++;
         if (max > 0 && mod_count >= max)
            break;
      }
      *count = mod_count;
      return;
   }

   *count = panfrost_afrc_get_modifiers(format, rate, max, modifiers);
}

static bool
panfrost_is_compression_modifier(struct pipe_screen *screen,
                                 enum pipe_format format, uint64_t modifier,
                                 uint32_t *rate)
{
   struct panfrost_device *dev = pan_device(screen);
   uint32_t compression_rate = panfrost_afrc_get_rate(format, modifier);

   if (!dev->has_afrc)
      return false;

   if (rate)
      *rate = compression_rate;

   return (compression_rate != 0);
}

/* We always support linear and tiled operations, both external and internal.
 * We support AFBC for a subset of formats, and colourspace transform for a
 * subset of those. */

static void
panfrost_walk_dmabuf_modifiers(struct pipe_screen *screen,
                               enum pipe_format format, int max,
                               uint64_t *modifiers, unsigned int *external_only,
                               int *out_count, uint64_t test_modifier)
{
   /* Query AFBC status */
   struct panfrost_device *dev = pan_device(screen);
   bool afbc =
      dev->has_afbc && panfrost_format_supports_afbc(dev->arch, format);
   bool ytr = panfrost_afbc_can_ytr(format);
   bool tiled_afbc = panfrost_afbc_can_tile(dev->arch);
   bool afrc = dev->has_afrc && panfrost_format_supports_afrc(format);

   unsigned count = 0;

   for (unsigned i = 0; i < PAN_MODIFIER_COUNT; ++i) {
      if (drm_is_afbc(pan_best_modifiers[i]) && !afbc)
         continue;

      if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_YTR) && !ytr)
         continue;

      if ((pan_best_modifiers[i] & AFBC_FORMAT_MOD_TILED) && !tiled_afbc)
         continue;

      if (drm_is_afrc(pan_best_modifiers[i]) && !afrc)
         continue;

      if (test_modifier != DRM_FORMAT_MOD_INVALID &&
          test_modifier != pan_best_modifiers[i])
         continue;

      if (max > (int)count) {
         modifiers[count] = pan_best_modifiers[i];

         if (external_only)
            external_only[count] = false;
      }
      count++;
   }

   *out_count = count;
}

static void
panfrost_query_dmabuf_modifiers(struct pipe_screen *screen,
                                enum pipe_format format, int max,
                                uint64_t *modifiers,
                                unsigned int *external_only, int *out_count)
{
   panfrost_walk_dmabuf_modifiers(screen, format, max, modifiers, external_only,
                                  out_count, DRM_FORMAT_MOD_INVALID);
}

static bool
panfrost_is_dmabuf_modifier_supported(struct pipe_screen *screen,
                                      uint64_t modifier,
                                      enum pipe_format format,
                                      bool *external_only)
{
   uint64_t unused;
   unsigned int uint_extern_only = 0;
   int count;

   panfrost_walk_dmabuf_modifiers(screen, format, 1, &unused, &uint_extern_only,
                                  &count, modifier);

   if (external_only)
      *external_only = uint_extern_only ? true : false;

   return count > 0;
}

static int
panfrost_get_compute_param(struct pipe_screen *pscreen,
                           enum pipe_shader_ir ir_type,
                           enum pipe_compute_cap param, void *ret)
{
   struct panfrost_device *dev = pan_device(pscreen);
   const char *const ir = "panfrost";

#define RET(x)                                                                 \
   do {                                                                        \
      if (ret)                                                                 \
         memcpy(ret, x, sizeof(x));                                            \
      return sizeof(x);                                                        \
   } while (0)

   switch (param) {
   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
      RET((uint32_t[]){64});

   case PIPE_COMPUTE_CAP_IR_TARGET:
      if (ret)
         sprintf(ret, "%s", ir);
      return strlen(ir) * sizeof(char);

   case PIPE_COMPUTE_CAP_GRID_DIMENSION:
      RET((uint64_t[]){3});

   case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
      RET(((uint64_t[]){65535, 65535, 65535}));

   case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
      /* Unpredictable behaviour at larger sizes. Mali-G52 advertises
       * 384x384x384.
       *
       * On Midgard, we don't allow more than 128 threads in each
       * direction to match PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK.
       * That still exceeds the minimum-maximum.
       */
      if (dev->arch >= 6)
         RET(((uint64_t[]){256, 256, 256}));
      else
         RET(((uint64_t[]){128, 128, 128}));

   case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
      /* On Bifrost and newer, all GPUs can support at least 256 threads
       * regardless of register usage, so we report 256.
       *
       * On Midgard, with maximum register usage, the maximum
       * thread count is only 64. We would like to report 64 here, but
       * the GLES3.1 spec minimum is 128, so we report 128 and limit
       * the register allocation of affected compute kernels.
       */
      RET((uint64_t[]){dev->arch >= 6 ? 256 : 128});

   case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
   case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
      uint64_t total_ram;

      if (!os_get_total_physical_memory(&total_ram))
         return 0;

      /* We don't want to burn too much ram with the GPU. If the user has 4GiB
       * or less, we use at most half. If they have more than 4GiB, we use 3/4.
       */
      uint64_t available_ram;
      if (total_ram <= 4ull * 1024 * 1024 * 1024)
         available_ram = total_ram / 2;
      else
         available_ram = total_ram * 3 / 4;

      /* 48bit address space max, with the lower 32MB reserved. We clamp
       * things so it matches kmod VA range limitations.
       */
      uint64_t user_va_start =
         panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_START);
      uint64_t user_va_end =
         panfrost_clamp_to_usable_va_range(dev->kmod.dev, PAN_VA_USER_END);

      /* We cannot support more than the VA limit */
      RET((uint64_t[]){MIN2(available_ram, user_va_end - user_va_start)});
   }

   case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
      RET((uint64_t[]){32768});

   case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
   case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
      RET((uint64_t[]){4096});

   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
      RET((uint32_t[]){800 /* MHz -- TODO */});

   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
      RET((uint32_t[]){dev->core_count});

   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
      RET((uint32_t[]){1});

   case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
      RET((uint32_t[]){pan_subgroup_size(dev->arch)});

   case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
      RET((uint32_t[]){0 /* TODO */});

   case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
      RET((uint64_t[]){1024}); // TODO
   }

   return 0;
}

static void
panfrost_destroy_screen(struct pipe_screen *pscreen)
{
   struct panfrost_device *dev = pan_device(pscreen);
   struct panfrost_screen *screen = pan_screen(pscreen);

   panfrost_resource_screen_destroy(pscreen);
   panfrost_pool_cleanup(&screen->blitter.bin_pool);
   panfrost_pool_cleanup(&screen->blitter.desc_pool);
   pan_blend_shader_cache_cleanup(&dev->blend_shaders);

   if (screen->vtbl.screen_destroy)
      screen->vtbl.screen_destroy(pscreen);

   if (dev->ro)
      dev->ro->destroy(dev->ro);
   panfrost_close_device(dev);

   disk_cache_destroy(screen->disk_cache);
   ralloc_free(pscreen);
}

static const void *
panfrost_screen_get_compiler_options(struct pipe_screen *pscreen,
                                     enum pipe_shader_ir ir,
                                     enum pipe_shader_type shader)
{
   return pan_screen(pscreen)->vtbl.get_compiler_options();
}

static struct disk_cache *
panfrost_get_disk_shader_cache(struct pipe_screen *pscreen)
{
   return pan_screen(pscreen)->disk_cache;
}

static int
panfrost_get_screen_fd(struct pipe_screen *pscreen)
{
   return panfrost_device_fd(pan_device(pscreen));
}

int
panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
                               struct pipe_driver_query_info *info)
{
   int num_queries = ARRAY_SIZE(panfrost_driver_query_list);

   if (!info)
      return num_queries;

   if (index >= num_queries)
      return 0;

   *info = panfrost_driver_query_list[index];

   return 1;
}

static uint64_t
panfrost_get_timestamp(struct pipe_screen *pscreen)
{
   struct panfrost_device *dev = pan_device(pscreen);

   return pan_gpu_time_to_ns(dev, pan_kmod_query_timestamp(dev->kmod.dev));
}

struct pipe_screen *
panfrost_create_screen(int fd, const struct pipe_screen_config *config,
                       struct renderonly *ro)
{
   /* Create the screen */
   struct panfrost_screen *screen = rzalloc(NULL, struct panfrost_screen);

   if (!screen)
      return NULL;

   struct panfrost_device *dev = pan_device(&screen->base);

   driParseConfigFiles(config->options, config->options_info, 0,
                       "panfrost", NULL, NULL, NULL, 0, NULL, 0);

   /* Debug must be set first for pandecode to work correctly */
   dev->debug =
      debug_get_flags_option("PAN_MESA_DEBUG", panfrost_debug_options, 0);
   screen->max_afbc_packing_ratio = debug_get_num_option(
      "PAN_MAX_AFBC_PACKING_RATIO", DEFAULT_MAX_AFBC_PACKING_RATIO);
   panfrost_open_device(screen, fd, dev);

   if (dev->debug & PAN_DBG_NO_AFBC)
      dev->has_afbc = false;

   /* Bail early on unsupported hardware */
   if (dev->model == NULL) {
      debug_printf("panfrost: Unsupported model %X",
                   panfrost_device_gpu_id(dev));
      panfrost_destroy_screen(&(screen->base));
      return NULL;
   }

   screen->force_afbc_packing = dev->debug & PAN_DBG_FORCE_PACK;
   if (!screen->force_afbc_packing)
      screen->force_afbc_packing = driQueryOptionb(config->options,
                                                   "pan_force_afbc_packing");

   const char *option = debug_get_option("PAN_AFRC_RATE", NULL);
   if (!option) {
      screen->force_afrc_rate = -1;
   } else if (strcmp(option, "default") == 0) {
      screen->force_afrc_rate = PIPE_COMPRESSION_FIXED_RATE_DEFAULT;
   } else {
      int64_t rate =
         debug_parse_num_option(option, PIPE_COMPRESSION_FIXED_RATE_NONE);
      screen->force_afrc_rate = rate;
   }

   screen->csf_tiler_heap.chunk_size = driQueryOptioni(config->options,
                                                       "pan_csf_chunk_size");
   screen->csf_tiler_heap.initial_chunks = driQueryOptioni(config->options,
                                                           "pan_csf_initial_chunks");
   screen->csf_tiler_heap.max_chunks = driQueryOptioni(config->options,
                                                       "pan_csf_max_chunks");

   dev->ro = ro;

   screen->base.destroy = panfrost_destroy_screen;

   screen->base.get_screen_fd = panfrost_get_screen_fd;
   screen->base.get_name = panfrost_get_name;
   screen->base.get_vendor = panfrost_get_vendor;
   screen->base.get_device_vendor = panfrost_get_device_vendor;
   screen->base.get_driver_query_info = panfrost_get_driver_query_info;
   screen->base.get_param = panfrost_get_param;
   screen->base.get_shader_param = panfrost_get_shader_param;
   screen->base.get_compute_param = panfrost_get_compute_param;
   screen->base.get_paramf = panfrost_get_paramf;
   screen->base.get_timestamp = panfrost_get_timestamp;
   screen->base.is_format_supported = panfrost_is_format_supported;
   screen->base.query_dmabuf_modifiers = panfrost_query_dmabuf_modifiers;
   screen->base.is_dmabuf_modifier_supported =
      panfrost_is_dmabuf_modifier_supported;
   screen->base.context_create = panfrost_create_context;
   screen->base.get_compiler_options = panfrost_screen_get_compiler_options;
   screen->base.get_disk_shader_cache = panfrost_get_disk_shader_cache;
   screen->base.fence_reference = panfrost_fence_reference;
   screen->base.fence_finish = panfrost_fence_finish;
   screen->base.fence_get_fd = panfrost_fence_get_fd;
   screen->base.set_damage_region = panfrost_resource_set_damage_region;
   screen->base.query_compression_rates = panfrost_query_compression_rates;
   screen->base.query_compression_modifiers =
      panfrost_query_compression_modifiers;
   screen->base.is_compression_modifier = panfrost_is_compression_modifier;

   panfrost_resource_screen_init(&screen->base);
   pan_blend_shader_cache_init(&dev->blend_shaders,
                               panfrost_device_gpu_id(dev));

   panfrost_disk_cache_init(screen);

   panfrost_pool_init(&screen->blitter.bin_pool, NULL, dev, PAN_BO_EXECUTE,
                      4096, "Blitter shaders", false, true);
   panfrost_pool_init(&screen->blitter.desc_pool, NULL, dev, 0, 65536,
                      "Blitter RSDs", false, true);
   if (dev->arch == 4)
      panfrost_cmdstream_screen_init_v4(screen);
   else if (dev->arch == 5)
      panfrost_cmdstream_screen_init_v5(screen);
   else if (dev->arch == 6)
      panfrost_cmdstream_screen_init_v6(screen);
   else if (dev->arch == 7)
      panfrost_cmdstream_screen_init_v7(screen);
   else if (dev->arch == 9)
      panfrost_cmdstream_screen_init_v9(screen);
   else if (dev->arch == 10)
      panfrost_cmdstream_screen_init_v10(screen);
   else
      unreachable("Unhandled architecture major");

   return &screen->base;
}
