/*
 * Copyright 2021 Alyssa Rosenzweig
 * Copyright 2020 Collabora Ltd.
 * SPDX-License-Identifier: MIT
 */

#pragma once

#include "compiler/nir/nir.h"
#include "util/half_float.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
#include "util/u_worklist.h"
#include "agx_compile.h"
#include "agx_minifloat.h"
#include "agx_opcodes.h"

#ifdef __cplusplus
extern "C" {
#endif

/* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
#define AGX_NUM_REGS (256)

/* u0-u255 inclusive, as pairs of 16-bits */
#define AGX_NUM_UNIFORMS (512)

/* Semi-arbitrary limit for spill slot allocation */
#define AGX_NUM_MODELED_REGS (2048)

/* Limit on number of sources for non-phi instructions */
#define AGX_MAX_NORMAL_SOURCES (16)

enum agx_index_type {
   AGX_INDEX_NULL = 0,
   AGX_INDEX_NORMAL = 1,
   AGX_INDEX_IMMEDIATE = 2,
   AGX_INDEX_UNIFORM = 3,
   AGX_INDEX_REGISTER = 4,
   AGX_INDEX_UNDEF = 5,
};

enum agx_size { AGX_SIZE_16 = 0, AGX_SIZE_32 = 1, AGX_SIZE_64 = 2 };

static inline unsigned
agx_size_align_16(enum agx_size size)
{
   switch (size) {
   case AGX_SIZE_16:
      return 1;
   case AGX_SIZE_32:
      return 2;
   case AGX_SIZE_64:
      return 4;
   }

   unreachable("Invalid size");
}

/* Keep synced with hash_index */
typedef struct {
   /* Sufficient for as many SSA values, immediates, and uniforms as we need. */
   uint32_t value;

   /* Indicates that this source kills the referenced value (because it is the
    * last use in a block and the source is not live after the block). Set by
    * liveness analysis.
    */
   bool kill : 1;

   /* Cache hints */
   bool cache   : 1;
   bool discard : 1;

   /* src - float modifiers */
   bool abs : 1;
   bool neg : 1;

   /* Register class */
   bool memory : 1;

   unsigned channels_m1     : 3;
   enum agx_size size       : 2;
   enum agx_index_type type : 3;
   unsigned padding         : 18;
} agx_index;

static inline unsigned
agx_channels(agx_index idx)
{
   return idx.channels_m1 + 1;
}

static inline unsigned
agx_index_size_16(agx_index idx)
{
   return agx_size_align_16(idx.size) * agx_channels(idx);
}

static inline agx_index
agx_get_vec_index(unsigned value, enum agx_size size, unsigned channels)
{
   return (agx_index){
      .value = value,
      .channels_m1 = channels - 1,
      .size = size,
      .type = AGX_INDEX_NORMAL,
   };
}

static inline agx_index
agx_get_index(unsigned value, enum agx_size size)
{
   return agx_get_vec_index(value, size, 1);
}

static inline agx_index
agx_immediate(uint32_t imm)
{
   assert(imm < (1 << 16) && "overflowed immediate");

   return (agx_index){
      .value = imm,
      .size = AGX_SIZE_16,
      .type = AGX_INDEX_IMMEDIATE,
   };
}

static inline agx_index
agx_immediate_f(float f)
{
   assert(agx_minifloat_exact(f));
   return agx_immediate(agx_minifloat_encode(f));
}

/* in half-words, specify r0h as 1, r1 as 2... */
static inline agx_index
agx_register(uint32_t imm, enum agx_size size)
{
   assert(imm < AGX_NUM_REGS);

   return (agx_index){
      .value = imm,
      .size = size,
      .type = AGX_INDEX_REGISTER,
   };
}

static inline agx_index
agx_memory_register(uint32_t imm, enum agx_size size)
{
   return (agx_index){
      .value = imm,
      .memory = true,
      .size = size,
      .type = AGX_INDEX_REGISTER,
   };
}

static inline agx_index
agx_register_like(uint32_t imm, agx_index like)
{
   return (agx_index){
      .value = imm,
      .memory = like.memory,
      .channels_m1 = like.channels_m1,
      .size = like.size,
      .type = AGX_INDEX_REGISTER,
   };
}

static inline agx_index
agx_undef(enum agx_size size)
{
   return (agx_index){
      .size = size,
      .type = AGX_INDEX_UNDEF,
   };
}

/* Also in half-words */
static inline agx_index
agx_uniform(uint32_t imm, enum agx_size size)
{
   assert(imm < AGX_NUM_UNIFORMS);

   return (agx_index){
      .value = imm,
      .size = size,
      .type = AGX_INDEX_UNIFORM,
   };
}

static inline agx_index
agx_null()
{
   return (agx_index){.type = AGX_INDEX_NULL};
}

static inline agx_index
agx_zero()
{
   return agx_immediate(0);
}

/* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
 * = exponent = 0, sign bit set */

static inline agx_index
agx_negzero()
{
   return agx_immediate(0x80);
}

static inline agx_index
agx_abs(agx_index idx)
{
   idx.abs = true;
   idx.neg = false;
   return idx;
}

static inline agx_index
agx_neg(agx_index idx)
{
   idx.neg ^= true;
   return idx;
}

/* Replaces an index, preserving any modifiers */

static inline agx_index
agx_replace_index(agx_index old, agx_index replacement)
{
   replacement.abs = old.abs;
   replacement.neg = old.neg;
   return replacement;
}

static inline bool
agx_is_null(agx_index idx)
{
   return idx.type == AGX_INDEX_NULL;
}

/* Compares equivalence as references */

static inline bool
agx_is_equiv(agx_index left, agx_index right)
{
   return (left.type == right.type) && (left.value == right.value);
}

enum agx_icond {
   AGX_ICOND_UEQ = 0,
   AGX_ICOND_ULT = 1,
   AGX_ICOND_UGT = 2,
   /* unknown */
   AGX_ICOND_SEQ = 4,
   AGX_ICOND_SLT = 5,
   AGX_ICOND_SGT = 6,
   /* unknown */
};

enum agx_fcond {
   AGX_FCOND_EQ = 0,
   AGX_FCOND_LT = 1,
   AGX_FCOND_GT = 2,
   AGX_FCOND_LTN = 3,
   /* unknown */
   AGX_FCOND_GE = 5,
   AGX_FCOND_LE = 6,
   AGX_FCOND_GTN = 7,
};

enum agx_round {
   AGX_ROUND_RTZ = 0,
   AGX_ROUND_RTE = 1,
};

enum agx_convert {
   AGX_CONVERT_U8_TO_F = 0,
   AGX_CONVERT_S8_TO_F = 1,
   AGX_CONVERT_F_TO_U16 = 4,
   AGX_CONVERT_F_TO_S16 = 5,
   AGX_CONVERT_U16_TO_F = 6,
   AGX_CONVERT_S16_TO_F = 7,
   AGX_CONVERT_F_TO_U32 = 8,
   AGX_CONVERT_F_TO_S32 = 9,
   AGX_CONVERT_U32_TO_F = 10,
   AGX_CONVERT_S32_TO_F = 11
};

enum agx_lod_mode {
   AGX_LOD_MODE_AUTO_LOD = 0,
   AGX_LOD_MODE_AUTO_LOD_BIAS_UNIFORM = 1,
   AGX_LOD_MODE_LOD_MIN_UNIFORM = 2,
   AGX_LOD_MODE_AUTO_LOD_BIAS = 5,
   AGX_LOD_MODE_LOD_GRAD = 4,
   AGX_LOD_MODE_LOD_MIN = 6,
   AGX_LOD_MODE_AUTO_LOD_BIAS_MIN_UNIFORM = 9,
   AGX_LOD_MODE_LOD_GRAD_MIN = 12,
   AGX_LOD_MODE_AUTO_LOD_BIAS_MIN = 13,
};

/* Forward declare for branch target */
struct agx_block;

/* Keep synced with hash_instr */
typedef struct {
   /* Must be first */
   struct list_head link;

   /* The sources list. */
   agx_index *src;

   /* Data flow */
   agx_index *dest;

   enum agx_opcode op;

   uint8_t nr_dests;
   uint8_t nr_srcs;

   /* TODO: More efficient */
   union {
      enum agx_icond icond;
      enum agx_fcond fcond;
   };

   union {
      uint64_t imm;
      uint32_t writeout;
      uint32_t truth_table;
      uint32_t component;
      uint32_t channels;
      uint32_t bfi_mask;
      uint16_t pixel_offset;
      uint16_t zs;
      int16_t stack_size;
      enum agx_sr sr;
      enum agx_round round;
      enum agx_atomic_opc atomic_opc;
      enum agx_lod_mode lod_mode;
      enum agx_simd_op simd_op;
      struct agx_block *target;

      /* As a special case to workaround ordering issues when translating phis,
       * if nr_srcs == 0 and the opcode is PHI, points to the NIR phi.
       */
      nir_phi_instr *phi;
   };

   /* For local access */
   enum agx_format format;

   /* Number of nested control flow layers to jump by. TODO: Optimize */
   uint32_t nest;

   /* Invert icond/fcond */
   bool invert_cond : 1;

   /* TODO: Handle tex ops more efficient */
   enum agx_dim dim       : 4;
   bool offset            : 1;
   bool shadow            : 1;
   bool query_lod         : 1;
   enum agx_gather gather : 3;

   /* TODO: Handle tilebuffer ops more efficient */
   bool explicit_coords : 1;

   /* TODO: Handle iter ops more efficient */
   enum agx_interpolation interpolation : 2;

   /* Final st_vary op */
   bool last : 1;

   /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
   unsigned shift : 4;

   /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
    * scoreboarding (everything but memory load/store and texturing). */
   unsigned scoreboard : 1;

   /* Output modifiers */
   bool saturate : 1;
   unsigned mask : 4;

   unsigned padding : 8;
} agx_instr;

static inline void
agx_replace_src(agx_instr *I, unsigned src_index, agx_index replacement)
{
   I->src[src_index] = agx_replace_index(I->src[src_index], replacement);
}

struct agx_block;

typedef struct agx_block {
   /* Link to next block. Must be first */
   struct list_head link;

   /* List of instructions emitted for the current block */
   struct list_head instructions;

   /* Index of the block in source order */
   unsigned index;

   /* Control flow graph */
   struct agx_block *successors[2];
   struct util_dynarray predecessors;
   bool unconditional_jumps;

   /* Liveness analysis results */
   BITSET_WORD *live_in;
   BITSET_WORD *live_out;

   /* For visited blocks during register assignment and live-out registers, the
    * mapping of registers to SSA names at the end of the block. This is dense,
    * unlike its inverse.
    */
   uint32_t *reg_to_ssa_out[2];

   /* Is this block a loop header? If not, all of its predecessors precede it in
    * source order.
    */
   bool loop_header;

   /* Offset of the block in the emitted binary */
   off_t offset, last_offset;

   /** Available for passes to use for metadata */
   uint8_t pass_flags;
} agx_block;

typedef struct {
   nir_shader *nir;
   gl_shader_stage stage;
   bool is_preamble;
   unsigned scratch_size;

   struct list_head blocks; /* list of agx_block */
   struct agx_shader_info *out;
   struct agx_shader_key *key;

   /* Maximum block index */
   unsigned num_blocks;

   /* For creating temporaries */
   unsigned alloc;

   /* Does the shader statically use scratch memory? */
   bool any_scratch;

   /* I don't really understand how writeout ops work yet */
   bool did_writeout;

   /* Has r0l been zeroed yet due to control flow? */
   bool any_cf;

   /* Do we need r0h zero throughout the program to handle quad-divergent
    * shuffle?
    */
   bool any_quad_divergent_shuffle;

   /* Number of nested control flow structures within the innermost loop. Since
    * NIR is just loop and if-else, this is the number of nested if-else
    * statements in the loop */
   unsigned loop_nesting;

   /* Total nesting across all loops, to determine if we need push_exec */
   unsigned total_nesting;

   /* Whether loop being emitted used any `continue` jumps */
   bool loop_continues;

   /* During instruction selection, for inserting control flow */
   agx_block *current_block;
   agx_block *continue_block;
   agx_block *break_block;
   agx_block *after_block;
   agx_block **indexed_nir_blocks;

   /* During instruction selection, map from vector agx_index to its scalar
    * components, populated by a split. */
   struct hash_table_u64 *allocated_vec;

   /* During instruction selection, preloaded values or NULL if it hasn't been
    * preloaded.
    */
   agx_index preloaded[AGX_NUM_REGS];

   /* Beginning of our stack allocation used for spilling, below that is
    * NIR-level scratch.
    */
   unsigned spill_base;

   /* Beginning of stack allocation used for parallel copy lowering */
   bool has_spill_pcopy_reserved;
   unsigned spill_pcopy_base;

   /* Stats for shader-db */
   unsigned loop_count;
   unsigned max_reg;
} agx_context;

static inline void
agx_remove_instruction(agx_instr *ins)
{
   list_del(&ins->link);
}

static inline agx_index
agx_vec_temp(agx_context *ctx, enum agx_size size, unsigned channels)
{
   return agx_get_vec_index(ctx->alloc++, size, channels);
}

static inline agx_index
agx_temp(agx_context *ctx, enum agx_size size)
{
   return agx_get_index(ctx->alloc++, size);
}

static inline agx_index
agx_temp_like(agx_context *ctx, agx_index idx)
{
   idx.value = ctx->alloc++;
   return idx;
}

static enum agx_size
agx_size_for_bits(unsigned bits)
{
   switch (bits) {
   case 1:
   case 8:
   case 16:
      return AGX_SIZE_16;
   case 32:
      return AGX_SIZE_32;
   case 64:
      return AGX_SIZE_64;
   default:
      unreachable("Invalid bitsize");
   }
}

static inline agx_index
agx_def_index(nir_def *ssa)
{
   return agx_get_vec_index(ssa->index, agx_size_for_bits(ssa->bit_size),
                            ssa->num_components);
}

static inline agx_index
agx_src_index(nir_src *src)
{
   return agx_def_index(src->ssa);
}

static inline agx_index
agx_vec_for_def(agx_context *ctx, nir_def *def)
{
   return agx_vec_temp(ctx, agx_size_for_bits(def->bit_size),
                       def->num_components);
}

static inline agx_index
agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr)
{
   return agx_vec_for_def(ctx, &instr->def);
}

static inline unsigned
agx_num_predecessors(agx_block *block)
{
   return util_dynarray_num_elements(&block->predecessors, agx_block *);
}

static inline unsigned
agx_num_successors(agx_block *block)
{
   STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
   return (block->successors[0] ? 1 : 0) + (block->successors[1] ? 1 : 0);
}

static inline agx_block *
agx_start_block(agx_context *ctx)
{
   agx_block *first = list_first_entry(&ctx->blocks, agx_block, link);
   assert(agx_num_predecessors(first) == 0);
   return first;
}

static inline agx_block *
agx_end_block(agx_context *ctx)
{
   agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
   assert(agx_num_successors(last) == 0);
   return last;
}

void agx_block_add_successor(agx_block *block, agx_block *successor);

/* Iterators for AGX IR */

#define agx_foreach_block(ctx, v)                                              \
   list_for_each_entry(agx_block, v, &ctx->blocks, link)

#define agx_foreach_block_safe(ctx, v)                                         \
   list_for_each_entry_safe(agx_block, v, &ctx->blocks, link)

#define agx_foreach_block_rev(ctx, v)                                          \
   list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)

#define agx_foreach_block_from(ctx, from, v)                                   \
   list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)

#define agx_foreach_block_from_rev(ctx, from, v)                               \
   list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)

#define agx_foreach_instr_in_block(block, v)                                   \
   list_for_each_entry(agx_instr, v, &(block)->instructions, link)

#define agx_foreach_instr_in_block_rev(block, v)                               \
   list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)

#define agx_foreach_instr_in_block_safe(block, v)                              \
   list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)

#define agx_foreach_instr_in_block_safe_rev(block, v)                          \
   list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)

#define agx_foreach_instr_in_block_from(block, v, from)                        \
   list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)

#define agx_foreach_instr_in_block_from_rev(block, v, from)                    \
   list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions,    \
                                link)

#define agx_foreach_instr_global(ctx, v)                                       \
   agx_foreach_block(ctx, v_block)                                             \
      agx_foreach_instr_in_block(v_block, v)

#define agx_foreach_instr_global_rev(ctx, v)                                   \
   agx_foreach_block_rev(ctx, v_block)                                         \
      agx_foreach_instr_in_block_rev(v_block, v)

#define agx_foreach_instr_global_safe(ctx, v)                                  \
   agx_foreach_block(ctx, v_block)                                             \
      agx_foreach_instr_in_block_safe(v_block, v)

#define agx_foreach_instr_global_safe_rev(ctx, v)                              \
   agx_foreach_block_rev(ctx, v_block)                                         \
      agx_foreach_instr_in_block_safe_rev(v_block, v)

/* Based on set_foreach, expanded with automatic type casts */

#define agx_foreach_successor(blk, v)                                          \
   agx_block *v;                                                               \
   agx_block **_v;                                                             \
   for (_v = (agx_block **)&blk->successors[0], v = *_v;                       \
        v != NULL && _v < (agx_block **)&blk->successors[2]; _v++, v = *_v)

#define agx_foreach_predecessor(blk, v)                                        \
   util_dynarray_foreach(&blk->predecessors, agx_block *, v)

#define agx_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)

#define agx_foreach_src_rev(ins, v)                                            \
   for (signed v = ins->nr_srcs - 1; v >= 0; --v)

#define agx_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)

#define agx_foreach_dest_rev(ins, v)                                           \
   for (signed v = ins->nr_dests - 1; v >= 0; --v)

#define agx_foreach_ssa_src(ins, v)                                            \
   agx_foreach_src(ins, v)                                                     \
      if (ins->src[v].type == AGX_INDEX_NORMAL)

#define agx_foreach_ssa_src_rev(ins, v)                                        \
   agx_foreach_src_rev(ins, v)                                                 \
      if (ins->src[v].type == AGX_INDEX_NORMAL)

#define agx_foreach_ssa_dest(ins, v)                                           \
   agx_foreach_dest(ins, v)                                                    \
      if (ins->dest[v].type == AGX_INDEX_NORMAL)

#define agx_foreach_ssa_dest_rev(ins, v)                                       \
   agx_foreach_dest_rev(ins, v)                                                \
      if (ins->dest[v].type == AGX_INDEX_NORMAL)

/* Phis only come at the start (after else instructions) so we stop as soon as
 * we hit a non-phi
 */
#define agx_foreach_phi_in_block(block, v)                                     \
   agx_foreach_instr_in_block(block, v)                                        \
      if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP)      \
         continue;                                                             \
      else if (v->op != AGX_OPCODE_PHI)                                        \
         break;                                                                \
      else

#define agx_foreach_phi_in_block_safe(block, v)                                \
   agx_foreach_instr_in_block_safe(block, v)                                   \
      if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP)      \
         continue;                                                             \
      else if (v->op != AGX_OPCODE_PHI)                                        \
         break;                                                                \
      else

/*
 * Find the index of a predecessor, used as the implicit order of phi sources.
 */
static inline unsigned
agx_predecessor_index(agx_block *succ, agx_block *pred)
{
   unsigned index = 0;

   agx_foreach_predecessor(succ, x) {
      if (*x == pred)
         return index;

      index++;
   }

   unreachable("Invalid predecessor");
}

static inline agx_block *
agx_prev_block(agx_block *ins)
{
   return list_last_entry(&(ins->link), agx_block, link);
}

static inline agx_instr *
agx_prev_op(agx_instr *ins)
{
   return list_last_entry(&(ins->link), agx_instr, link);
}

static inline agx_instr *
agx_first_instr(agx_block *block)
{
   if (list_is_empty(&block->instructions))
      return NULL;
   else
      return list_first_entry(&block->instructions, agx_instr, link);
}

static inline agx_instr *
agx_last_instr(agx_block *block)
{
   if (list_is_empty(&block->instructions))
      return NULL;
   else
      return list_last_entry(&block->instructions, agx_instr, link);
}

static inline agx_instr *
agx_next_op(agx_instr *ins)
{
   return list_first_entry(&(ins->link), agx_instr, link);
}

static inline agx_block *
agx_next_block(agx_block *block)
{
   return list_first_entry(&(block->link), agx_block, link);
}

static inline agx_block *
agx_exit_block(agx_context *ctx)
{
   agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
   assert(!last->successors[0] && !last->successors[1]);
   return last;
}

#define agx_worklist_init(ctx, w)        u_worklist_init(w, ctx->num_blocks, ctx)
#define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
#define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
#define agx_worklist_peek_head(w)        u_worklist_peek_head(w, agx_block, index)
#define agx_worklist_pop_head(w)         u_worklist_pop_head(w, agx_block, index)
#define agx_worklist_peek_tail(w)        u_worklist_peek_tail(w, agx_block, index)
#define agx_worklist_pop_tail(w)         u_worklist_pop_tail(w, agx_block, index)

/* Like in NIR, for use with the builder */

enum agx_cursor_option {
   agx_cursor_after_block,
   agx_cursor_before_instr,
   agx_cursor_after_instr
};

typedef struct {
   enum agx_cursor_option option;

   union {
      agx_block *block;
      agx_instr *instr;
   };
} agx_cursor;

static inline bool
agx_cursors_equal(agx_cursor a, agx_cursor b)
{
   if (a.option != b.option)
      return false;

   if (a.option == agx_cursor_after_block)
      return a.block == b.block;
   else
      return a.instr == b.instr;
}

static inline agx_cursor
agx_after_block(agx_block *block)
{
   return (agx_cursor){
      .option = agx_cursor_after_block,
      .block = block,
   };
}

static inline agx_cursor
agx_before_instr(agx_instr *instr)
{
   return (agx_cursor){
      .option = agx_cursor_before_instr,
      .instr = instr,
   };
}

static inline agx_cursor
agx_after_instr(agx_instr *instr)
{
   return (agx_cursor){
      .option = agx_cursor_after_instr,
      .instr = instr,
   };
}

static inline agx_cursor
agx_before_nonempty_block(agx_block *block)
{
   agx_instr *I = list_first_entry(&block->instructions, agx_instr, link);
   assert(I != NULL);

   return agx_before_instr(I);
}

static inline agx_cursor
agx_before_block(agx_block *block)
{
   if (list_is_empty(&block->instructions))
      return agx_after_block(block);
   else
      return agx_before_nonempty_block(block);
}

static inline bool
instr_after_logical_end(const agx_instr *I)
{
   switch (I->op) {
   case AGX_OPCODE_JMP_EXEC_ANY:
   case AGX_OPCODE_JMP_EXEC_NONE:
   case AGX_OPCODE_POP_EXEC:
   case AGX_OPCODE_BREAK:
   case AGX_OPCODE_IF_ICMP:
   case AGX_OPCODE_WHILE_ICMP:
   case AGX_OPCODE_IF_FCMP:
   case AGX_OPCODE_WHILE_FCMP:
   case AGX_OPCODE_STOP:
   case AGX_OPCODE_EXPORT:
      return true;
   default:
      return false;
   }
}

/*
 * Get a cursor inserting at the logical end of the block. In particular, this
 * is before branches or control flow instructions, which occur after the
 * logical end but before the physical end.
 */
static inline agx_cursor
agx_after_block_logical(agx_block *block)
{
   /* Search for the first instruction that's not past the logical end */
   agx_foreach_instr_in_block_rev(block, I) {
      if (!instr_after_logical_end(I))
         return agx_after_instr(I);
   }

   /* If we got here, the block is either empty or entirely control flow */
   return agx_before_block(block);
}

/* Get a cursor at the start of a function, after any preloads */
static inline agx_cursor
agx_before_function(agx_context *ctx)
{
   agx_block *block = agx_start_block(ctx);

   agx_foreach_instr_in_block(block, I) {
      if (I->op != AGX_OPCODE_PRELOAD)
         return agx_before_instr(I);
   }

   /* The whole block is preloads, so insert at the end */
   return agx_after_block(block);
}

/* IR builder in terms of cursor infrastructure */

typedef struct {
   agx_context *shader;
   agx_cursor cursor;
} agx_builder;

static inline agx_builder
agx_init_builder(agx_context *ctx, agx_cursor cursor)
{
   return (agx_builder){
      .shader = ctx,
      .cursor = cursor,
   };
}

/* Insert an instruction at the cursor and move the cursor */

static inline void
agx_builder_insert(agx_cursor *cursor, agx_instr *I)
{
   switch (cursor->option) {
   case agx_cursor_after_instr:
      list_add(&I->link, &cursor->instr->link);
      cursor->instr = I;
      return;

   case agx_cursor_after_block:
      list_addtail(&I->link, &cursor->block->instructions);
      cursor->option = agx_cursor_after_instr;
      cursor->instr = I;
      return;

   case agx_cursor_before_instr:
      list_addtail(&I->link, &cursor->instr->link);
      cursor->option = agx_cursor_after_instr;
      cursor->instr = I;
      return;
   }

   unreachable("Invalid cursor option");
}

bool agx_instr_accepts_uniform(enum agx_opcode op, unsigned src_index,
                               unsigned value, enum agx_size size);

/* Routines defined for AIR */
void agx_print_index(agx_index index, bool is_float, FILE *fp);
void agx_print_instr(const agx_instr *I, FILE *fp);
void agx_print_block(const agx_block *block, FILE *fp);
void agx_print_shader(const agx_context *ctx, FILE *fp);
void agx_optimizer(agx_context *ctx);
void agx_lower_divergent_shuffle(agx_context *ctx);
void agx_lower_pseudo(agx_context *ctx);
void agx_lower_spill(agx_context *ctx);
void agx_lower_uniform_sources(agx_context *ctx);
void agx_opt_cse(agx_context *ctx);
void agx_opt_compact_constants(agx_context *ctx);
void agx_opt_promote_constants(agx_context *ctx);
void agx_dce(agx_context *ctx, bool partial);
void agx_pressure_schedule(agx_context *ctx);
void agx_spill(agx_context *ctx, unsigned k);
void agx_repair_ssa(agx_context *ctx);
void agx_reindex_ssa(agx_context *ctx);
void agx_ra(agx_context *ctx);
void agx_lower_64bit_postra(agx_context *ctx);
void agx_insert_waits(agx_context *ctx);
void agx_opt_empty_else(agx_context *ctx);
void agx_opt_break_if(agx_context *ctx);
void agx_opt_jmp_none(agx_context *ctx);
void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);

#ifndef NDEBUG
void agx_validate(agx_context *ctx, const char *after_str);
#else
static inline void
agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str)
{
   return;
}
#endif

enum agx_size agx_split_width(const agx_instr *I);
bool agx_allows_16bit_immediate(agx_instr *I);

static inline bool
agx_is_float_src(const agx_instr *I, unsigned s)
{
   struct agx_opcode_info info = agx_opcodes_info[I->op];
   bool fcmp = (I->op == AGX_OPCODE_FCMPSEL || I->op == AGX_OPCODE_FCMP);

   /* fcmp takes first 2 as floats but returns an integer */
   return info.is_float || (s < 2 && fcmp);
}

struct agx_copy {
   /* Base register destination of the copy */
   unsigned dest;

   /* Destination is memory */
   bool dest_mem;

   /* Source of the copy */
   agx_index src;

   /* Whether the copy has been handled. Callers must leave to false. */
   bool done;
};

void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
                              unsigned n);

void agx_compute_liveness(agx_context *ctx);
void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);

bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
bool agx_nir_lower_load_mask(nir_shader *shader);
bool agx_nir_lower_address(nir_shader *shader);
bool agx_nir_lower_ubo(nir_shader *shader);
bool agx_nir_lower_shared_bitsize(nir_shader *shader);
bool agx_nir_lower_frag_sidefx(nir_shader *s);

struct agx_cycle_estimate {
   /* ALU throughput */
   unsigned alu;

   /* Floating point and SCIB (select, conditional, integer, and boolean)
    * throughput.
    */
   unsigned f_scib;

   /* IC (Integer and complex) throughput */
   unsigned ic;
};

struct agx_cycle_estimate agx_estimate_cycles(agx_context *ctx);

extern int agx_compiler_debug;

#ifdef __cplusplus
} /* extern C */
#endif
