# Copyright © 2024 Intel Corporation

# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

from textwrap import dedent

# TYPES is an ordered list of all declarations in this file.
TYPES = []

# TYPES_BY_NAME allows the lookup of any declaration
TYPES_BY_NAME = {}

class Define:
    """Specifies a c macro definition."""
    def __init__(self, name, value, comment=None):
        self.name = name
        self.value = value
        self.comment = comment
        TYPES.append(self)

class EnumValue:
    """allows comments and setting of enum values"""
    def __init__(self, name, value=None, comment=None,
                 group_begin=None, group_end=None):
        self.name = name
        self.value = value
        self.comment = comment
        self.group_begin = group_begin
        self.group_end = group_end

    def __str__(self):
        return self.name

class Enum:
    """Stores details needed to declare and serialize an enumeration"""
    def __init__(self, name, values, external=False):
        self.name = name
        self.values = []
        for v in values:
            if isinstance(v, EnumValue):
                self.values.append(v)
            else:
                self.values.append(EnumValue(v))

        self.external = external
        TYPES.append(self)
        TYPES_BY_NAME[name] = TYPES[-1]

class Member:
    """Stores details needed to declare and serialize the member of a struct."""
    def __init__(self, member_type, name, array=None,
                 compiler_field=False, ray_tracing_field=False,
                 comment=None):
        self.member_type = member_type
        self.name = name
        self.array = array
        # indicates whether this field is used by the compiler, and whether it
        # should be included in the shader compiler cache hash function.
        self.compiler_field = compiler_field
        self.ray_tracing_field = ray_tracing_field
        self.comment=comment

class Struct:
    """Stores details needed to declare and serialize a struct"""
    def __init__(self, name, members):
        self.name = name
        self.members = members
        TYPES.append(self)
        TYPES_BY_NAME[name] = TYPES[-1]

INT_TYPES = set(["uint8_t",
                 "uint16_t",
                 "uint32_t",
                 "uint64_t",
                 "unsigned",
                 "int"])

FUNDAMENTAL_TYPES = set(["char", "bool"]).union(INT_TYPES)

Define("INTEL_DEVICE_MAX_NAME_SIZE", 64)
Define("INTEL_DEVICE_MAX_SLICES", 8)
Define("INTEL_DEVICE_MAX_SUBSLICES", 8, "Maximum on gfx11")
Define("INTEL_DEVICE_MAX_EUS_PER_SUBSLICE", 16, "Maximum on gfx11")
Define("INTEL_DEVICE_MAX_PIXEL_PIPES", 16, "Maximum on DG2")

Enum("intel_platform",
     [EnumValue("INTEL_PLATFORM_GFX3", value=1),
      "INTEL_PLATFORM_I965",
      "INTEL_PLATFORM_ILK",
      "INTEL_PLATFORM_G4X",
      "INTEL_PLATFORM_SNB",
      "INTEL_PLATFORM_IVB",
      "INTEL_PLATFORM_BYT",
      "INTEL_PLATFORM_HSW",
      "INTEL_PLATFORM_BDW",
      "INTEL_PLATFORM_CHV",
      "INTEL_PLATFORM_SKL",
      "INTEL_PLATFORM_BXT",
      "INTEL_PLATFORM_KBL",
      "INTEL_PLATFORM_GLK",
      "INTEL_PLATFORM_CFL",
      "INTEL_PLATFORM_ICL",
      "INTEL_PLATFORM_EHL",
      "INTEL_PLATFORM_TGL",
      "INTEL_PLATFORM_RKL",
      "INTEL_PLATFORM_DG1",
      "INTEL_PLATFORM_ADL",
      "INTEL_PLATFORM_RPL",
      EnumValue("INTEL_PLATFORM_DG2_G10", group_begin="DG2"),
      "INTEL_PLATFORM_DG2_G11",
      EnumValue("INTEL_PLATFORM_DG2_G12", group_end="DG2"),
      EnumValue("INTEL_PLATFORM_ATSM_G10", group_begin="ATSM"),
      EnumValue("INTEL_PLATFORM_ATSM_G11", group_end="ATSM"),
      EnumValue("INTEL_PLATFORM_MTL_U", group_begin="MTL"),
      EnumValue("INTEL_PLATFORM_MTL_H", group_end="MTL"),
      EnumValue("INTEL_PLATFORM_ARL_U", group_begin="ARL"),
      EnumValue("INTEL_PLATFORM_ARL_H", group_end="ARL"),
      "INTEL_PLATFORM_LNL",
      "INTEL_PLATFORM_BMG",
      ])

Struct("intel_memory_class_instance",
       [ Member("int", "klass",
                comment = "Kernel backend specific class value, no translation needed yet"),
         Member("int", "instance")])

Enum("intel_device_info_mmap_mode",
      [EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_UC", value=0),
       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WC"),
       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_WB"),
       EnumValue("INTEL_DEVICE_INFO_MMAP_MODE_XD",
                 comment=dedent("""\
                 Xe2+ only. Only supported in GPU side and used for displayable
                 buffers."""))
       ])

Struct("intel_device_info_pat_entry",
       [Member("uint8_t", "index"),
        Member("intel_device_info_mmap_mode", "mmap",
               comment=dedent("""\
               This tells KMD what caching mode the CPU mapping should use.
               It has nothing to do with any PAT cache modes."""))])

Enum("intel_cmat_scope",
     [EnumValue("INTEL_CMAT_SCOPE_NONE", value=0),
     "INTEL_CMAT_SCOPE_SUBGROUP"])

Enum("intel_cooperative_matrix_component_type",
     ["INTEL_CMAT_FLOAT16",
      "INTEL_CMAT_FLOAT32",
      "INTEL_CMAT_SINT32",
      "INTEL_CMAT_SINT8",
      "INTEL_CMAT_UINT32",
      "INTEL_CMAT_UINT8"])

Enum("intel_engine_class",
     ["INTEL_ENGINE_CLASS_RENDER",
      "INTEL_ENGINE_CLASS_COPY",
      "INTEL_ENGINE_CLASS_VIDEO",
      "INTEL_ENGINE_CLASS_VIDEO_ENHANCE",
      "INTEL_ENGINE_CLASS_COMPUTE",
      "INTEL_ENGINE_CLASS_INVALID"])

Struct("intel_cooperative_matrix_configuration",
   [Member("intel_cmat_scope", "scope",
           comment=dedent("""\
           Matrix A is MxK.
           Matrix B is KxN.
           Matrix C and Matrix Result are MxN.

           Result = A * B + C;""")),
    Member("uint8_t", "m"),
    Member("uint8_t", "n"),
    Member("uint8_t", "k"),
    Member("intel_cooperative_matrix_component_type", "a"),
    Member("intel_cooperative_matrix_component_type", "b"),
    Member("intel_cooperative_matrix_component_type", "c"),
    Member("intel_cooperative_matrix_component_type", "result")])

Enum("intel_kmd_type",
     ["INTEL_KMD_TYPE_INVALID",
      "INTEL_KMD_TYPE_I915",
      "INTEL_KMD_TYPE_XE",
      "INTEL_KMD_TYPE_STUB",
      "INTEL_KMD_TYPE_LAST"
      ], external=True)

Struct("intel_device_info_mem_region",
       [Member("uint64_t", "size"),
        Member("uint64_t", "free")])

Struct("intel_device_info_ram_desc",
       [Member("intel_memory_class_instance", "mem"),
        Member("intel_device_info_mem_region", "mappable"),
        Member("intel_device_info_mem_region", "unmappable")])

Struct("intel_device_info_mem_desc",
       [Member("bool", "use_class_instance"),
        Member("intel_device_info_ram_desc", "sram"),
        Member("intel_device_info_ram_desc", "vram")])

Struct("intel_device_info_urb_desc",
       [Member("int", "size"),
        Member("int", "min_entries", array=4),
        Member("int", "max_entries", array=4)])

Struct("intel_device_info_pat_desc",
       [Member("intel_device_info_pat_entry", "cached_coherent",
               comment="To be used when CPU access is frequent, WB + 1 or 2 way coherent"),

        Member("intel_device_info_pat_entry", "scanout",
               comment="scanout and external BOs"),

        Member("intel_device_info_pat_entry", "compressed",
               comment="Only supported in Xe2, compressed + WC"),

        Member("intel_device_info_pat_entry", "writeback_incoherent",
               comment=("BOs without special needs, can be WB not coherent "
                        "or WC it depends on the platforms and KMD")),

        Member("intel_device_info_pat_entry", "writecombining")])

Struct("intel_device_info",
       [Member("intel_kmd_type", "kmd_type"),

        Member("int", "ver", compiler_field=True,
               comment="Driver internal numbers used to differentiate platforms."),

        Member("int", "verx10", compiler_field=True),

        Member("uint32_t", "gfx_ip_ver", compiler_field=True,
               comment=dedent("""\
               This is the run-time hardware GFX IP version that may be more specific
               than ver/verx10. ver/verx10 may be more useful for comparing a class
               of devices whereas gfx_ip_ver may be more useful for precisely
               checking for a graphics ip type. GFX_IP_VER(major, minor) should be
               used to compare IP versions.""")),

        Member("int", "revision",
               comment=dedent("""\
               This revision is queried from KMD unlike
               pci_revision_id from drm device. Its value is not always
               same as the pci_revision_id.
               For LNL+ this is the stepping of GT IP/GMD RevId.""")),

        Member("int", "gt"),
        Member("uint16_t", "pci_domain", comment="PCI info"),
        Member("uint8_t", "pci_bus"),
        Member("uint8_t", "pci_dev"),
        Member("uint8_t", "pci_func"),
        Member("uint16_t", "pci_device_id"),
        Member("uint8_t", "pci_revision_id"),
        Member("intel_platform", "platform", compiler_field=True),
        Member("bool", "has_hiz_and_separate_stencil"),
        Member("bool", "must_use_separate_stencil"),
        Member("bool", "has_sample_with_hiz"),
        Member("bool", "has_bit6_swizzle"),
        Member("bool", "has_llc"),
        Member("bool", "has_pln", compiler_field=True),
        Member("bool", "has_64bit_float", compiler_field=True),
        Member("bool", "has_64bit_float_via_math_pipe", compiler_field=True),
        Member("bool", "has_64bit_int", compiler_field=True),
        Member("bool", "has_integer_dword_mul", compiler_field=True),
        Member("bool", "has_compr4", compiler_field=True),
        Member("bool", "has_surface_tile_offset"),
        Member("bool", "supports_simd16_3src", compiler_field=True),
        Member("bool", "disable_ccs_repack"),

        Member("bool", "has_illegal_ccs_values",
               comment="True if CCS needs to be initialized before use."),

        Member("bool", "has_flat_ccs",
               comment=dedent("""\
               True if CCS uses a flat virtual address translation to a memory
               carve-out, rather than aux map translations, or additional surfaces.""")),

        Member("bool", "has_aux_map"),
        Member("bool", "has_caching_uapi"),
        Member("bool", "has_tiling_uapi"),
        Member("bool", "has_ray_tracing", compiler_field=True),
        Member("bool", "has_ray_query"),
        Member("bool", "has_local_mem"),
        Member("bool", "has_lsc", compiler_field=True),
        Member("bool", "has_mesh_shading"),
        Member("bool", "has_mmap_offset"),
        Member("bool", "has_userptr_probe"),
        Member("bool", "has_context_isolation"),
        Member("bool", "has_set_pat_uapi"),
        Member("bool", "has_indirect_unroll"),
        Member("bool", "has_negative_rhw_bug", compiler_field=True,
               comment="Intel hardware quirks"),

        Member("bool", "has_coarse_pixel_primitive_and_cb", compiler_field=True,
               comment=dedent("""\
               Whether this platform supports fragment shading rate controlled by a
               primitive in geometry shaders and by a control buffer.""")),

        Member("bool", "has_compute_engine", comment="Whether this platform has compute engine"),

        Member("bool", "needs_unlit_centroid_workaround", compiler_field=True,
               comment=dedent("""\
               Some versions of Gen hardware don't do centroid interpolation correctly
               on unlit pixels, causing incorrect values for derivatives near triangle
               edges.  Enabling this flag causes the fragment shader to use
               non-centroid interpolation for unlit pixels, at the expense of two extra
               fragment shader instructions.""")),

        Member("bool", "needs_null_push_constant_tbimr_workaround",
               comment=dedent("""\
               Whether the platform needs an undocumented workaround for a hardware bug
               that affects draw calls with a pixel shader that has 0 push constant cycles
               when TBIMR is enabled, which has been seen to lead to hangs.  To avoid the
               issue we simply pad the push constant payload to be at least 1 register.""")),

        Member("bool", "is_adl_n", comment="We need this for ADL-N specific Wa_14014966230."),

        Member("unsigned", "num_slices",
               comment=dedent("""\
               GPU hardware limits

               In general, you can find shader thread maximums by looking at the "Maximum
               Number of Threads" field in the Intel PRM description of the 3DSTATE_VS,
               3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry
               limits come from the "Number of URB Entries" field in the
               3DSTATE_URB_VS command and friends.

               These fields are used to calculate the scratch space to allocate.  The
               amount of scratch space can be larger without being harmful on modern
               GPUs, however, prior to Haswell, programming the maximum number of threads
               to greater than the hardware maximum would cause GPU performance to tank.

               Total number of slices present on the device whether or not they've been
               fused off.

               XXX: CS thread counts are limited by the inability to do cross subslice
               communication. It is the effectively the number of logical threads which
               can be executed in a subslice. Fuse configurations may cause this number
               to change, so we program @max_cs_threads as the lower maximum.""")),

        Member("unsigned", "max_slices", compiler_field=True,
               comment=dedent("""\
               Maximum number of slices present on this device (can be more than
               num_slices if some slices are fused).""")),

        Member("unsigned", "num_subslices", array="INTEL_DEVICE_MAX_SLICES",
               comment="Number of subslices for each slice (used to be uniform until CNL)."),

        Member("unsigned", "max_subslices_per_slice", compiler_field=True,
               comment=dedent("""\
               Maximum number of subslices per slice present on this device (can be
               more than the maximum value in the num_subslices[] array if some
               subslices are fused).

               This is GT_SS_PER_SLICE in SKU.""")),

        Member("unsigned", "ppipe_subslices", array="INTEL_DEVICE_MAX_PIXEL_PIPES",
               comment="Number of subslices on each pixel pipe (ICL)."),

        Member("unsigned", "max_eus_per_subslice", compiler_field=True,
               comment="Maximum number of EUs per subslice (some EUs can be fused off)."),

        Member("unsigned", "num_thread_per_eu", compiler_field=True,
               comment="Number of threads per eu, varies between 4 and 8 between generations."),

        Member("uint8_t", "grf_size",
               comment="Size of a register from the EU GRF file in bytes."),

        Member("uint8_t", "slice_masks",
               comment="A bit mask of the slices available."),

        Member("uint8_t", "subslice_masks",
               array="INTEL_DEVICE_MAX_SLICES * DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)",
               compiler_field=True,
               ray_tracing_field=True,
               comment=dedent("""\
               An array of bit mask of the subslices available, use subslice_slice_stride
               to access this array.""")),

        Member("unsigned", "subslice_total",
               comment=dedent("""\
               The number of enabled subslices (considering fusing). For exactly which
               subslices are enabled, see subslice_masks[].""")),

        Member("uint8_t", "eu_masks",
               array=("INTEL_DEVICE_MAX_SLICES * INTEL_DEVICE_MAX_SUBSLICES * "
                      "DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)"),
               comment=dedent("""\
               An array of bit mask of EUs available, use eu_slice_stride &
               eu_subslice_stride to access this array.""")),

        Member("uint16_t", "subslice_slice_stride", compiler_field=True,
               comment="Stride to access subslice_masks[]."),

        Member("uint16_t", "eu_slice_stride",
               comment="Strides to access eu_masks[]."),

        Member("uint16_t", "eu_subslice_stride"),
        Member("unsigned", "l3_banks"),

        Member("unsigned", "max_vs_threads",
               comment="Maximum Vertex Shader threads"),

        Member("unsigned", "max_tcs_threads",
               comment="Maximum Hull Shader threads"),

        Member("unsigned", "max_tes_threads",
               comment="Maximum Domain Shader threads"),

        Member("unsigned", "max_gs_threads",
               comment="Maximum Geometry Shader threads"),

        Member("unsigned", "max_wm_threads",
               comment=dedent("""\
               Theoretical maximum number of Pixel Shader threads.

               PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will
               automatically scale pixel shader thread count, based on a single value
               programmed into 3DSTATE_PS.

               To calculate the maximum number of threads for Gfx8 beyond (which have
               multiple Pixel Shader Dispatchers):

               - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
               - Usually there's only one PSD per subslice, so use the number of
                 subslices for number of PSDs.
               - For max_wm_threads, the total should be PSD threads * #PSDs.""")),

        Member("unsigned", "max_threads_per_psd"),

        Member("unsigned", "max_cs_threads",
               comment=dedent("""\
               Maximum Compute Shader threads.

               Thread count * number of EUs per subslice""")),

        Member("unsigned", "max_cs_workgroup_threads", compiler_field=True,
               comment=dedent("""\
               Maximum number of threads per workgroup supported by the GPGPU_WALKER or
               COMPUTE_WALKER command.

               This may be smaller than max_cs_threads as it takes into account added
               restrictions on the GPGPU/COMPUTE_WALKER commands.  While max_cs_threads
               expresses the total parallelism of the GPU, this expresses the maximum
               number of threads we can dispatch in a single workgroup.""")),


        Member("unsigned", "max_scratch_ids", array="MESA_SHADER_STAGES", compiler_field=True,
               comment=dedent("""\
               The maximum number of potential scratch ids. Due to hardware
               implementation details, the range of scratch ids may be larger than the
               number of subslices.""")),

        Member("uint32_t", "max_scratch_size_per_thread", compiler_field=True),

        Member("intel_device_info_urb_desc", "urb"),
        Member("unsigned", "max_constant_urb_size_kb"),
        Member("unsigned", "mesh_max_constant_urb_size_kb"),
        Member("unsigned", "engine_class_prefetch", array="INTEL_ENGINE_CLASS_INVALID"),
        Member("unsigned", "engine_class_supported_count", array="INTEL_ENGINE_CLASS_INVALID"),
        Member("unsigned", "mem_alignment"),
        Member("uint64_t", "timestamp_frequency"),
        Member("uint64_t", "aperture_bytes"),
        Member("uint64_t", "gtt_size"),
        Member("int", "simulator_id"),
        Member("char", "name", array="INTEL_DEVICE_MAX_NAME_SIZE"),
        Member("bool", "no_hw"),
        Member("bool", "probe_forced", comment="Device needed INTEL_FORCE_PROBE"),
        Member("intel_device_info_mem_desc", "mem"),
        Member("intel_device_info_pat_desc", "pat"),
        Member("intel_cooperative_matrix_configuration",
               "cooperative_matrix_configurations", array=4)]
       )
