#!/usr/bin/python3
#
# Copyright (C) 2018 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Generate intrinsics code."""

from collections import OrderedDict

import asm_defs
import json
import os
import re
import sys

# C-level intrinsic calling convention:
# 1. All arguments are passed using the natural data types:
#  - int8_t passed as one byte argument (on the stack in IA32 mode, in GP register in x86-64 mode)
#  - int32_t passed as 4 bytes argument (on the stack in IA32 mode, in GP register in x86-64 mode)
#  - int64_t is passed as 8 byte argument (on the stack in IA32 mode, in GP register in x86-64 mode)
#  - float is passed as float (on the stack in IA32 mode, in XMM register in x86-64 mode)
#  - double is passed as double (on the stack in IA32 mode, in XMM register in x86-64 mode)
#  - vector formats are passed as pointers to 128bit data structure
# 2. Return values.
#  - Values are returned as std::tuple.  This means that on IA32 it's always returned on stack.

INDENT = '  '
AUTOGEN = """\
// This file automatically generated by gen_intrinsics.py
// DO NOT EDIT!
"""


class VecFormat(object):

  def __init__(self, num_elements, element_size, is_unsigned, is_float, index,
               c_type):
    self.num_elements = num_elements
    self.element_size = element_size
    self.is_unsigned = is_unsigned
    self.is_float = is_float
    self.index = index
    self.c_type = c_type


# Vector format defined as:
#  vector_size, element_size, is_unsigned, is_float, index, ir_format, c_type
# TODO(olonho): make flat numbering after removing legacy macro compat.
_VECTOR_FORMATS = {
    'U8x8': VecFormat(8, 1, True, False, 1, 'uint8_t'),
    'U16x4': VecFormat(4, 2, True, False, 2, 'uint16_t'),
    'U32x2': VecFormat(2, 4, True, False, 3, 'uint32_t'),
    'U64x1': VecFormat(1, 8, True, False, 4, 'uint64_t'),
    'U8x16': VecFormat(16, 1, True, False, 5, 'uint8_t'),
    'U16x8': VecFormat(8, 2, True, False, 6, 'uint16_t'),
    'U32x4': VecFormat(4, 4, True, False, 7, 'uint32_t'),
    'U64x2': VecFormat(2, 8, True, False, 8, 'uint64_t'),
    'I8x8': VecFormat(8, 1, False, False, 9, 'int8_t'),
    'I16x4': VecFormat(4, 2, False, False, 10, 'int16_t'),
    'I32x2': VecFormat(2, 4, False, False, 11, 'int32_t'),
    'I64x1': VecFormat(1, 8, False, False, 12, 'int64_t'),
    'I8x16': VecFormat(16, 1, False, False, 13, 'int8_t'),
    'I16x8': VecFormat(8, 2, False, False, 14, 'int16_t'),
    'I32x4': VecFormat(4, 4, False, False, 15, 'int32_t'),
    'I64x2': VecFormat(2, 8, False, False, 16, 'int64_t'),
    'U8x1': VecFormat(1, 1, True, False, 17, 'uint8_t'),
    'I8x1': VecFormat(1, 1, False, False, 18, 'int8_t'),
    'U16x1': VecFormat(1, 2, True, False, 19, 'uint16_t'),
    'I16x1': VecFormat(1, 2, False, False, 20, 'int16_t'),
    'U32x1': VecFormat(1, 4, True, False, 21, 'uint32_t'),
    'I32x1': VecFormat(1, 4, False, False, 22, 'int32_t'),
    # These vector formats can never intersect with above, so can reuse index.
    'F32x1': VecFormat(1, 4, False, True, 1, 'Float32'),
    'F32x2': VecFormat(2, 4, False, True, 2, 'Float32'),
    'F32x4': VecFormat(4, 4, False, True, 3, 'Float32'),
    'F64x1': VecFormat(1, 8, False, True, 4, 'Float64'),
    'F64x2': VecFormat(2, 8, False, True, 5, 'Float64'),
    # Those vector formats can never intersect with above, so can reuse index.
    'U8x4': VecFormat(4, 1, True, False, 1, 'uint8_t'),
    'U16x2': VecFormat(2, 2, True, False, 2, 'uint16_t'),
    'I8x4': VecFormat(4, 1, False, False, 3, 'int8_t'),
    'I16x2': VecFormat(2, 2, False, False, 4, 'int16_t'),
}


class VecSize(object):

  def __init__(self, num_elements, index):
    self.num_elements = num_elements
    self.index = index


_VECTOR_SIZES = {'X64': VecSize(64, 1), 'X128': VecSize(128, 2)}

_ROUNDING_MODES = ['FE_TONEAREST', 'FE_DOWNWARD', 'FE_UPWARD', 'FE_TOWARDZERO', 'FE_TIESAWAY']


def _is_imm_type(arg_type):
  return 'imm' in arg_type


def _is_template_type(arg_type):
  if not arg_type.startswith('Type'):
    return False
  assert isinstance(int(arg_type[4:]), int)
  return True


def _get_imm_c_type(arg_type):
  return {
      'imm8' : 'int8_t',
      'uimm8' : 'uint8_t',
      'uimm16' : 'uint16_t',
      'uimm32' : 'uint32_t',
  }[arg_type]


def _get_c_type(arg_type):
  if (arg_type in ('Float16', 'Float32', 'Float64', 'int8_t', 'uint8_t', 'int16_t',
                  'uint16_t', 'int32_t', 'uint32_t', 'int64_t', 'uint64_t',
                  'volatile uint8_t*', 'volatile uint32_t*') or
      _is_template_type(arg_type)):
    return arg_type
  if arg_type in ('fp_flags', 'fp_control', 'int', 'flag', 'flags', 'vec32'):
    return 'uint32_t'
  if _is_imm_type(arg_type):
    return _get_imm_c_type(arg_type)
  if arg_type == 'vec':
    return 'SIMD128Register'
  if arg_type in _ROUNDING_MODES:
    return 'int'
  raise Exception('Type %s not supported' % (arg_type))


def _get_semantic_player_type(arg_type, type_map):
  if type_map is not None and arg_type in type_map:
    return type_map[arg_type]
  if arg_type in ('Float16', 'Float32', 'Float64', 'vec'):
    return 'SimdRegister'
  if _is_imm_type(arg_type):
    return _get_imm_c_type(arg_type)
  return 'Register'


def _gen_scalar_intr_decl(f, name, intr):
  ins = intr.get('in')
  outs = intr.get('out')
  params = [_get_c_type(op) for op in ins]
  if len(outs) > 0:
    retval = 'std::tuple<' + ', '.join(_get_c_type(out) for out in outs) + '>'
  else:
    retval = 'void'
  comment = intr.get('comment')
  if comment:
    print('// %s.' % (comment), file=f)
  if intr.get('precise_nans', False):
    print('template <bool precise_nan_operations_handling, '
          'enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible>',
          file=f)
  print('%s %s(%s);' % (retval, name, ', '.join(params)), file=f)


def _gen_template_intr_decl(f, name, intr):
  ins = intr.get('in')
  outs = intr.get('out')
  params = [_get_c_type(op) for op in ins]
  if len(outs) > 0:
    retval = 'std::tuple<' + ', '.join(_get_c_type(out) for out in outs) + '>'
  else:
    retval = 'void'
  comment = intr.get('comment')
  if comment:
    print('// %s.' % (comment), file=f)
  print('template <%s>' % _get_template_arguments(
      intr.get('variants'), intr.get('precise_nans', False)), file=f)
  print('%s %s(%s);' % (retval, name, ', '.join(params)), file=f)


def _get_template_arguments(
    variants,
    precise_nans = False,
    extra = ['enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible']):
  template = None
  for variant in variants:
    counter = -1
    def get_counter():
      nonlocal counter
      counter += 1
      return counter
    new_template = ', '.join(
      (["bool kPreciseNaNOperationsHandling"] if precise_nans else []) +
      ['bool kBool%s' % get_counter() if param.strip() in ('true', 'false') else
       'uint32_t kInt%s' % get_counter() if param.strip() in _ROUNDING_MODES else
       'typename Type%d' % get_counter() if re.search('[_a-zA-Z]', param) else
       'int kInt%s' % get_counter()
       for param in variant.split(',')] + extra)
    assert template is None or template == new_template
    template = new_template
  return template


def _is_vector_class(intr):
  return intr.get('class') in ('vector_4', 'vector_8', 'vector_16',
                               'vector_8/16', 'vector_8/16/single',
                               'vector_8/single', 'vector_16/single')


def _is_simd128_conversion_required(t, type_map=None):
  return (_get_semantic_player_type(t, type_map) == 'SimdRegister' and
          _get_c_type(t) != 'SIMD128Register')


def _get_semantics_player_hook_result(intr):
  outs = intr['out']
  if len(outs) == 0:
    return 'void'
  elif len(outs) == 1:
    # No tuple for single result.
    return _get_semantic_player_type(outs[0], intr.get('sem-player-types'))
  return 'std::tuple<' + ', '.join(
      _get_semantic_player_type(out, intr.get('sem-player-types'))
      for out in outs) + '>'


def _get_semantics_player_hook_proto_components(name, intr):
  ins = intr['in']

  args = []
  if _is_vector_class(intr):
    if 'raw' in intr['variants']:
      assert len(intr['variants']) == 1, "Unexpected length of variants"
      args = ["uint8_t size"]
    else:
      args = ["uint8_t elem_size", "uint8_t elem_num"]
      if (_is_signed(intr) and _is_unsigned(intr)):
        args += ['bool is_signed']

  args += [
      '%s arg%d' % (
          _get_semantic_player_type(op, intr.get('sem-player-types')), num)
      for num, op in enumerate(ins)
  ]

  result = _get_semantics_player_hook_result(intr)

  return result, name, ', '.join(args)


def _get_semantics_player_hook_proto(name, intr):
  result, name, args = _get_semantics_player_hook_proto_components(name, intr)
  if intr.get('class') == 'template':
    return 'template<%s>\n%s %s(%s)' % (
      _get_template_arguments(intr.get('variants'), False, []), result, name, args)
  return '%s %s(%s)' % (result, name, args)


def _get_interpreter_hook_call_expr(name, intr, desc=None):
  ins = intr['in']
  outs = intr['out']

  call_params = []
  for num, op in enumerate(ins):
    arg = 'arg%d' % (num)
    semantic_player_type = _get_semantic_player_type(
        op, intr.get('sem-player-types'))
    if semantic_player_type == 'FpRegister':
      call_params.append('FPRegToFloat<%s>(%s)' % (op, arg))
    elif semantic_player_type == 'SimdRegister':
      call_params.append(_get_cast_from_simd128(arg, op, ptr_bits=64))
    elif '*' in _get_c_type(op):
      call_params.append('berberis::bit_cast<%s>(%s)' % (_get_c_type(op), arg))
    else:
      call_params.append('GPRRegToInteger<%s>(%s)' % (_get_c_type(op), arg))

  call_expr = 'intrinsics::%s%s(%s)' % (
      name, _get_desc_specializations(intr, desc).replace(
          'Float', 'intrinsics::Float'), ', '.join(call_params))

  if len(outs) == 1:
    # Unwrap tuple for single result.
    call_expr = 'std::get<0>(%s)' % call_expr
    if 'sem-player-types' in intr:
      out_type = _get_semantic_player_type(outs[0], intr.get('sem-player-types'))
      if out_type == "FpRegister":
        call_expr = 'FloatToFPReg(%s)' % call_expr
      elif out_type != "SimdRegister":
        assert out_type == "Register"
        assert not _is_simd128_conversion_required(
          outs[0], intr.get('sem-player-types'))
        call_expr = 'IntegerToGPRReg(%s)' % call_expr
    else:
      # Currently this kind of mismatch can only happen for single result, so we
      # can keep simple code here for now.
      if _is_simd128_conversion_required(outs[0]):
        out_type = _get_c_type(outs[0])
        if out_type in ('Float16', 'Float32', 'Float64'):
          call_expr = 'FloatToFPReg(%s)' % call_expr
        else:
          raise Exception('Type %s is not supported' % (out_type))
  else:
    if any(_is_simd128_conversion_required(out) for out in outs):
      raise Exception(
          'Unsupported SIMD128Register conversion with multiple results')

  return call_expr


def _get_interpreter_hook_return_stmt(name, intr, desc=None):
  return 'return ' + _get_interpreter_hook_call_expr(name, intr, desc) + ';'

def _get_unused(intr):
  call_expr = 'UNUSED(%s);' % ', '.join('arg%d' % (num) for num, _ in enumerate(intr['in']))
  return call_expr

def _get_placeholder_return_stmt(intr, f):
  print(INDENT + _get_unused(intr), file=f)
  outs = intr['out']
  if outs:
    print(INDENT + 'return {};', file=f)

def _get_semantics_player_hook_raw_vector_body(name, intr, get_return_stmt):
  outs = intr['out']
  if (len(outs) == 0):
    raise Exception('No result raw vector intrinsic is not supported')
  reg_class = intr.get('class')
  yield 'switch (size) {'
  for fmt, desc in _VECTOR_SIZES.items():
    if _check_reg_class_size(reg_class, desc.num_elements / 8):
      yield INDENT + 'case %s:' % desc.num_elements
      yield 2 * INDENT + get_return_stmt(name, intr, desc)
  yield INDENT + 'default:'
  yield 2 * INDENT + 'LOG_ALWAYS_FATAL("Unsupported size");'
  yield '}'


def _is_signed(intr):
  return any(v.startswith("signed") for v in intr['variants'])


def _is_unsigned(intr):
  return any(v.startswith("unsigned") for v in intr['variants'])


def _get_vector_format_init_expr(intr):
  variants = intr.get('variants')

  if ('Float16' in variants or 'Float32' in variants or 'Float64' in variants):
    return 'intrinsics::GetVectorFormatFP(elem_size, elem_num)'

  assert _is_signed(intr) or _is_unsigned(intr), "Unexpected intrinsic class"
  if _is_signed(intr) and _is_unsigned(intr):
    signed_arg = ', is_signed'
  else:
    signed_arg = ', true' if _is_signed(intr) else ', false'
  return 'intrinsics::GetVectorFormatInt(elem_size, elem_num%s)' % signed_arg


def _get_semantics_player_hook_vector_body(name, intr, get_return_stmt):
  outs = intr['out']
  if (len(outs) == 0):
    raise Exception('No result vector intrinsic is not supported')
  reg_class = intr.get('class')
  yield 'auto format = %s;' % _get_vector_format_init_expr(intr)
  yield 'switch (format) {'
  for variant in intr.get('variants'):
    for fmt, desc in _VECTOR_FORMATS.items():
      if (_check_reg_class_size(reg_class,
                                desc.element_size * desc.num_elements) and
          _check_typed_variant(variant, desc)):
        yield INDENT + 'case intrinsics::kVector%s:' % fmt
        yield 2 * INDENT + get_return_stmt(name, intr, desc)
      elif (reg_class in ('vector_8/single', 'vector_8/16/single', 'vector_16/single') and
            desc.num_elements == 1 and
          _check_typed_variant(variant, desc)):
        assert desc.element_size <= 8, "Unexpected element size"
        yield INDENT + 'case intrinsics::kVector%s:' % fmt
        yield 2 * INDENT + get_return_stmt(name, intr, desc)
  yield INDENT + 'default:'
  yield 2 * INDENT + 'LOG_ALWAYS_FATAL("Unsupported format");'
  yield '}'


# Syntax sugar heavily used in tests.
def _get_interpreter_hook_vector_body(name, intr):
  return _get_semantics_player_hook_vector_body(
      name, intr, _get_interpreter_hook_return_stmt)


def _gen_interpreter_hook(f, name, intr, option):
  print('%s const {' % (_get_semantics_player_hook_proto(name, intr)), file=f)

  if _is_vector_class(intr):
    if 'raw' in intr['variants']:
      assert len(intr['variants']) == 1, "Unexpected length of variants"
      lines = _get_semantics_player_hook_raw_vector_body(
          name,
          intr,
          _get_interpreter_hook_return_stmt)
    else:
      lines = _get_interpreter_hook_vector_body(name, intr)

    lines = [INDENT + l for l in lines]
    print('\n'.join(lines), file=f)
  else:
    # TODO(b/363057506): Add float support and clean up the logic here.
    arm64_allowlist = ['AmoAdd', 'AmoAnd', 'AmoMax', 'AmoMin', 'AmoOr', 'AmoSwap', 'AmoXor', 'Bclr',
                       'Bclri', 'Bext', 'Bexti', 'Binv', 'Binvi', 'Bset', 'Bseti', 'Div', 'Max',
                       'Min', 'Rem', 'Rev8', 'Rol', 'Ror', 'Sext', 'Sh1add', 'Sh1adduw', 'Sh2add',
                       'Sh2adduw', 'Sh3add', 'Sh3adduw', 'Zext', 'UnboxNan']
    if (option == 'arm64') and (name not in arm64_allowlist):
      _get_placeholder_return_stmt(intr, f)
    else:
      print(INDENT + _get_interpreter_hook_return_stmt(name, intr), file=f)

  print('}\n', file=f)


def _get_translator_hook_call_expr(name, intr, desc = None):
  desc_spec = _get_desc_specializations(intr, desc).replace(
      'Float', 'intrinsics::Float')
  args = [('arg%d' % n) for n, _ in enumerate(intr['in'])]
  template_params = ['&intrinsics::' + name + desc_spec]
  template_params += [_get_semantics_player_hook_result(intr)]
  return 'CallIntrinsic<%s>(%s)' % (', '.join(template_params), ', '.join(args))


def _get_translator_hook_return_stmt(name, intr, desc=None):
  return 'return ' + _get_translator_hook_call_expr(name, intr, desc) + ';'


def _gen_translator_hook(f, name, intr):
  print('%s {' % (_get_semantics_player_hook_proto(name, intr)), file=f)

  if _is_vector_class(intr):
    if 'raw' in intr['variants']:
      assert len(intr['variants']) == 1, "Unexpected length of variants"
      lines = _get_semantics_player_hook_raw_vector_body(
          name,
          intr,
          _get_translator_hook_return_stmt)
    else:
      lines = _get_semantics_player_hook_vector_body(
          name,
          intr,
          _get_translator_hook_return_stmt)
    lines = [INDENT + l for l in lines]
    print('\n'.join(lines), file=f)
  else:
    print(INDENT + _get_translator_hook_return_stmt(name, intr), file=f)

  print('}\n', file=f)


def _gen_mock_semantics_listener_hook(f, name, intr):
  result, name, args = _get_semantics_player_hook_proto_components(name, intr)
  if intr.get('class') == 'template':
    print('template<%s>\n%s %s(%s) {\n  return %s(%s);\n}' % (
      _get_template_arguments(intr.get('variants'), False, []),
      result,
      name,
      args,
      name,
      ', '.join([
        'intrinsics::kEnumFromTemplateType<%s>' % arg if arg.startswith('Type') else arg
        for arg in _get_template_spec_arguments(intr.get('variants'))] +
      [('arg%d' % n) for n, _ in enumerate(intr['in'])])), file=f)
    args = ', '.join([
      '%s %s' % (
          {
              'kBoo': 'bool',
              'kInt': 'int',
              'Type': 'intrinsics::EnumFromTemplateType'
          }[argument[0:4]],
          argument)
      for argument in _get_template_spec_arguments(intr.get('variants'))] + [args])
  print('MOCK_METHOD((%s), %s, (%s));' % (result, name, args), file=f)


def _check_signed_variant(variant, desc):
  if variant == 'signed':
    return True
  if variant == 'signed_32':
    return desc.element_size == 4
  if variant == 'signed_64':
    return desc.element_size == 8
  if variant == 'signed_16/32':
    return desc.element_size in (2, 4)
  if variant == 'signed_8/16/32':
    return desc.element_size in (1, 2, 4)
  if variant == 'signed_16/32/64':
    return desc.element_size in (2, 4, 8)
  if variant == 'signed_8/16/32/64':
    return desc.element_size in (1, 2, 4, 8)
  if variant == 'signed_32/64':
    return desc.element_size in (4, 8)
  return False


def _check_unsigned_variant(variant, desc):
  if variant == 'unsigned':
    return True
  if variant == 'unsigned_8':
    return desc.element_size == 1
  if variant == 'unsigned_16':
    return desc.element_size == 2
  if variant == 'unsigned_32':
    return desc.element_size == 4
  if variant == 'unsigned_64':
    return desc.element_size == 8
  if variant == 'unsigned_8/16':
    return desc.element_size in (1, 2)
  if variant == 'unsigned_8/16/32':
    return desc.element_size in (1, 2, 4)
  if variant == 'unsigned_16/32/64':
    return desc.element_size in (2, 4, 8)
  if variant == 'unsigned_8/16/32/64':
    return desc.element_size in (1, 2, 4, 8)
  if variant == 'unsigned_32/64':
    return desc.element_size in (4, 8)
  return False


def _check_reg_class_size(reg_class, size):
  # Small vectors are separate namespace.
  if size == 4 and reg_class == 'vector_4':
    return True
  if size == 8 and reg_class in ('vector_8', 'vector_8/16', 'vector_8/16/single',
                                 'vector_8/single'):
    return True
  if size == 16 and reg_class in ('vector_16', 'vector_8/16', 'vector_8/16/single',
                                  'vector_16/single'):
    return True
  return False


def _check_typed_variant(variant, desc):
  if desc.is_unsigned and not desc.is_float:
    return _check_unsigned_variant(variant, desc)
  if not desc.is_unsigned and not desc.is_float:
    return _check_signed_variant(variant, desc)
  if desc.is_float:
    if desc.element_size == 2:
      return variant == 'Float16'
    if desc.element_size == 4:
      return variant == 'Float32'
    if desc.element_size == 8:
      return variant == 'Float64'
  return False


def _get_formats_with_descriptions(intr):
  reg_class = intr.get('class')
  for variant in intr.get('variants'):
    found_fmt = False
    for fmt, desc in _VECTOR_FORMATS.items():
      if (_check_reg_class_size(reg_class,
                                desc.element_size * desc.num_elements) and
          _check_typed_variant(variant, desc) and
          (reg_class != 'vector_4' or desc.element_size < 4)):
        found_fmt = True
        yield fmt, desc

    if variant == 'raw':
      for fmt, desc in _VECTOR_SIZES.items():
        if _check_reg_class_size(reg_class, desc.num_elements / 8):
          found_fmt = True
          yield fmt, desc

    assert found_fmt, 'Couldn\'t expand %s' % reg_class


def _get_result_type(outs):
  result_type = 'void'
  return_stmt = ''
  if len(outs) >= 1:
    result_type = ('std::tuple<' +
                   ', '.join(_get_c_type(out) for out in outs) + '>')
    return_stmt = 'return '
  return result_type, return_stmt


def _get_in_params(params):
  for param_index, param in enumerate(params):
    yield _get_c_type(param), 'in%d' % (param_index)


def _get_out_params(params):
  for param_index, param in enumerate(params):
    yield _get_c_type(param), 'out%d' % (param_index)


def _get_cast_from_simd128(var, target_type, ptr_bits):
  if ('*' in target_type):
    return 'berberis::bit_cast<%s>(%s.Get<uint%d_t>(0))' % (_get_c_type(target_type), var,
                                                  ptr_bits)

  c_type = _get_c_type(target_type)
  if c_type in ('Float16', 'Float32', 'Float64'):
    return 'FPRegToFloat<intrinsics::%s>(%s)' % (c_type, var)

  cast_map = {
      'int8_t': '.Get<int8_t>(0)',
      'uint8_t': '.Get<uint8_t>(0)',
      'int16_t': '.Get<int16_t>(0)',
      'uint16_t': '.Get<uint16_t>(0)',
      'int32_t': '.Get<int32_t>(0)',
      'uint32_t': '.Get<uint32_t>(0)',
      'int64_t': '.Get<int64_t>(0)',
      'uint64_t': '.Get<uint64_t>(0)',
      'SIMD128Register': ''
  }
  return '%s%s' % (var, cast_map[c_type])


def _get_desc_specializations(intr, desc=None):
  if intr.get('class') == 'template':
    spec = _get_template_spec_arguments(intr.get('variants'))
  elif hasattr(desc, 'c_type'):
    spec = [desc.c_type, str(desc.num_elements)]
  elif hasattr(desc, 'num_elements'):
    spec = [str(desc.num_elements)]
  else:
    spec = []
  if intr.get('precise_nans', False):
    spec = ['config::kPreciseNaNOperationsHandling'] + spec
  if not len(spec):
    return ''
  return '<%s>' % ', '.join(spec)


def _get_template_spec_arguments(variants):
  spec = None
  for variant in variants:
    counter = -1
    def get_counter():
      nonlocal counter
      counter += 1
      return counter
    new_spec = [
      'kBool%s' % get_counter() if param.strip() in ('true', 'false') else
      'kInt%s' % get_counter() if param.strip() in _ROUNDING_MODES else
      'Type%d' % get_counter() if re.search('[_a-zA-Z]', param) else
      'kInt%s' % get_counter()
      for param in variant.split(',')]
    assert spec is None or spec == new_spec
    spec = new_spec
  return spec


def _intr_has_side_effects(intr, fmt=None):
  ins = intr.get('in')
  outs = intr.get('out')
  # If we have 'has_side_effects' mark in JSON file then we use it "as is".
  if 'has_side_effects' in intr:
    return intr.get('has_side_effects')
  # Otherwise we mark all floating-point related intrinsics as "volatile".
  # TODO(b/68857496): move that information in HIR/LIR and stop doing that.
  if 'Float16' in ins or 'Float32' in ins or 'Float64' in ins:
    return True
  if 'Float16' in outs or  'Float32' in outs or 'Float64' in outs:
    return True
  if fmt is not None and fmt.startswith('F'):
    return True
  return False


def _gen_intrinsics_inl_h(f, intrs):
  print(AUTOGEN, file=f)
  for name, intr in intrs:
    if intr.get('class') == 'scalar':
      _gen_scalar_intr_decl(f, name, intr)
    elif intr.get('class') == 'template':
      _gen_template_intr_decl(f, name, intr)


def _gen_semantic_player_types(intrs):
  for name, intr in intrs:
    if intr.get('class') == 'template':
      map = None
      for variant in intr.get('variants'):
        counter = -1
        def get_counter():
          nonlocal counter
          counter += 1
          return counter
        new_map = {
          'Float16': 'FpRegister',
          'Float32': 'FpRegister',
          'Float64': 'FpRegister',
        }
        for type in filter(
              lambda param: param.strip() not in ('true', 'false') and
                            re.search('[_a-zA-Z]', param),
            variant.split(',')):
          new_map['Type%d' % get_counter()] = (
              'FpRegister' if type.strip() in ('Float16', 'Float32', 'Float64') else
              _get_semantic_player_type(type, None))
        assert map is None or map == new_map
        map = new_map
      intr['sem-player-types'] = map


def _gen_interpreter_intrinsics_hooks_impl_inl_h(f, intrs, option):
  print(AUTOGEN, file=f)
  for name, intr in intrs:
    _gen_interpreter_hook(f, name, intr, option)


def _gen_translator_intrinsics_hooks_impl_inl_h(f, intrs):
  print(AUTOGEN, file=f)
  for name, intr in intrs:
    _gen_translator_hook(f, name, intr)


def _gen_mock_semantics_listener_intrinsics_hooks_impl_inl_h(f, intrs):
  print(AUTOGEN, file=f)
  for name, intr in intrs:
    _gen_mock_semantics_listener_hook(f, name, intr)


def _get_reg_operand_info(arg, info_prefix=None):
  need_tmp = arg['class'] in ('EAX', 'EDX', 'CL', 'ECX')
  if info_prefix is None:
    class_info = 'void'
  else:
    class_info = '%s::%s' % (info_prefix, arg['class'])
  if arg['class'] == 'Imm8':
    return 'ImmArg<%d, int8_t, %s>' % (arg['ir_arg'], class_info)
  if info_prefix is None:
    using_info = 'void'
  else:
    using_info = '%s::%s' % (info_prefix, {
        'def': 'Def',
        'def_early_clobber': 'DefEarlyClobber',
        'use': 'Use',
        'use_def': 'UseDef'
    }[arg['usage']])
  if arg['usage'] == 'use':
    if need_tmp:
      return 'InTmpArg<%d, %s, %s>' % (arg['ir_arg'], class_info, using_info)
    return 'InArg<%d, %s, %s>' % (arg['ir_arg'], class_info, using_info)
  if arg['usage'] in ('def', 'def_early_clobber'):
    assert 'ir_arg' not in arg
    if 'ir_res' in arg:
      if need_tmp:
        return 'OutTmpArg<%d, %s, %s>' % (arg['ir_res'], class_info, using_info)
      return 'OutArg<%d, %s, %s>' % (arg['ir_res'], class_info, using_info)
    return 'TmpArg<%s, %s>' % (class_info, using_info)
  if arg['usage'] == 'use_def':
    if 'ir_res' in arg:
      if need_tmp:
        return 'InOutTmpArg<%s, %s, %s, %s>' % (arg['ir_arg'], arg['ir_res'],
                                                class_info, using_info)
      return 'InOutArg<%s, %s, %s, %s>' % (arg['ir_arg'], arg['ir_res'],
                                           class_info, using_info)
    return 'InTmpArg<%s, %s, %s>' % (arg['ir_arg'], class_info, using_info)
  assert False, 'unknown operand usage %s' % (arg['usage'])


def _gen_make_intrinsics(f, intrs, archs):
  print("""%s
template <typename MacroAssembler,
          typename Callback,
          typename... Args>
void ProcessAllBindings([[maybe_unused]] Callback callback,
                        [[maybe_unused]] Args&&... args) {""" % AUTOGEN,
    file=f)
  for line in _gen_c_intrinsics_generator(
          intrs, _is_interpreter_compatible_assembler, False): # False for gen_builder
      print(line, file=f)
  print('}', file=f)

def _gen_opcode_generators_f(f, intrs):
  for line in _gen_opcode_generators(intrs):
    print(line, file=f)

def _gen_opcode_generators(intrs):
  opcode_generators = {}
  for name, intr in intrs:
    if 'asm' not in intr:
      continue
    if 'variants' in intr:
      variants = _get_formats_with_descriptions(intr)
      variants = sorted(variants, key=lambda variant: variant[1].index)
      # Collect intr_asms for all variants of intrinsic.
      # Note: not all variants are guaranteed to have an asm variant!
      # If that happens the list of intr_asms for that variant will be empty.
      variants = [[
          intr_asm for intr_asm in _gen_sorted_asms(intr)
          if fmt in intr_asm['variants']
      ] for fmt, _ in variants]
      # Print intrinsic generator
      for intr_asms in variants:
        if len(intr_asms) > 0:
          for intr_asm in intr_asms:
            if not _is_translator_compatible_assembler(intr_asm):
              continue
            for line in _gen_opcode_generator(intr_asm, opcode_generators):
              yield line
    else:
      for intr_asm in _gen_sorted_asms(intr):
        if not _is_translator_compatible_assembler(intr_asm):
          continue
        for line in _gen_opcode_generator(intr_asm, opcode_generators):
          yield line

def _gen_opcode_generator(asm, opcode_generators):
  name = asm['name']
  num_mem_args = sum(1 for arg in asm['args'] if arg.get('class').startswith("Mem") and arg.get('usage') == 'def_early_clobber')
  opcode = 'Undefined' if num_mem_args > 2 else (asm_defs.get_mem_macro_name(asm, '').replace("Mem", "MemBaseDisp")) if num_mem_args > 0 else name

  if name not in opcode_generators:
    opcode_generators[name] = True
    yield """
// TODO(b/260725458): Pass lambda as template argument after C++20 becomes available.
class GetOpcode%s {
 public:
  template <typename Opcode>
  constexpr auto operator()() {
    return Opcode::kMachineOp%s;
  }
};""" % (name, opcode)

def _gen_process_bindings(f, intrs, archs):
  print('%s' % AUTOGEN, file=f)
  _gen_opcode_generators_f(f, intrs)
  print("""
template <auto kFunc,
          typename MacroAssembler,
          typename Result,
          typename Callback,
          typename... Args>
Result ProcessBindings(Callback callback, Result def_result, Args&&... args) {""",
    file=f)
  for line in _gen_c_intrinsics_generator(
          intrs, _is_translator_compatible_assembler, True): # True for gen_builder
      print(line, file=f)
  print("""  }
  return std::forward<Result>(def_result);
}""", file=f)


def _gen_c_intrinsics_generator(intrs, check_compatible_assembler, gen_builder):
  string_labels = {}
  mnemo_idx = [0]
  for name, intr in intrs:
    ins = intr.get('in')
    outs = intr.get('out')
    params = _get_in_params(ins)
    formal_args = ', '.join('%s %s' % (type, param) for type, param in params)
    result_type, _ = _get_result_type(outs)
    if 'asm' not in intr:
      continue
    if 'variants' in intr:
      variants = _get_formats_with_descriptions(intr)
      # Sort by index, to keep order close to what _gen_intrs_enum produces.
      variants = sorted(variants, key=lambda variant: variant[1].index)
      # Collect intr_asms for all versions of intrinsic.
      # Note: not all variants are guaranteed to have asm version!
      # If that happens list of intr_asms for that variant would be empty.
      variants = [(desc, [
          intr_asm for intr_asm in _gen_sorted_asms(intr)
          if fmt in intr_asm['variants']
      ]) for fmt, desc in variants]
      # Print intrinsic generator
      for desc, intr_asms in variants:
        if len(intr_asms) > 0:
          if 'raw' in intr['variants']:
            spec = '%d' % (desc.num_elements)
          else:
            spec = '%s, %d' % (desc.c_type, desc.num_elements)
          for intr_asm in intr_asms:
            for line in _gen_c_intrinsic('%s<%s>' % (name, spec),
                                         intr,
                                         intr_asm,
                                         string_labels,
                                         mnemo_idx,
                                         check_compatible_assembler,
                                         gen_builder):
              yield line
    else:
      for intr_asm in _gen_sorted_asms(intr):
        for line in _gen_c_intrinsic(name,
                                     intr,
                                     intr_asm,
                                     string_labels,
                                     mnemo_idx,
                                     check_compatible_assembler,
                                     gen_builder):
          yield line


def _gen_sorted_asms(intr):
  return sorted(intr['asm'],
    key = lambda intr:
        intr.get('nan', '') +
      _KNOWN_FEATURES_KEYS.get(
        intr.get('feature', ''), intr.get('feature', '')), reverse = True)

_KNOWN_FEATURES_KEYS = {
  'LZCNT': '001',
  'BMI': '002',
  'BMI2': '003',
  'SSE': '010',
  'SSE2': '011',
  'SSE3': '012',
  'SSSE3': '013',
  'SSE4a': '014',
  'SSE4_1': '015',
  'SSE4_2': '016',
  'AVX': '017',
  'AVX2': '018',
  'FMA': '019',
  'FMA4': '020',
  'CustomCapability': '021'
}


def _gen_c_intrinsic(name,
                     intr,
                     asm,
                     string_labels,
                     mnemo_idx,
                     check_compatible_assembler,
                     gen_builder):
  if not check_compatible_assembler(asm):
    return

  cpuid_restriction = 'intrinsics::bindings::NoCPUIDRestriction'
  if 'feature' in asm:
    if asm['feature'] == 'AuthenticAMD':
      cpuid_restriction = 'intrinsics::bindings::IsAuthenticAMD'
    else:
      cpuid_restriction = 'intrinsics::bindings::Has%s' % asm['feature']

  nan_restriction = 'intrinsics::bindings::NoNansOperation'
  if 'nan' in asm:
    nan_restriction = 'intrinsics::bindings::%sNanOperationsHandling' % asm['nan']
    template_arg = 'true' if asm['nan'] == "Precise" else "false"
    if '<' in name:
      template_pos = name.index('<')
      name = name[0:template_pos+1] + template_arg + ", " + name[template_pos+1:]
    else:
      name += '<' + template_arg + '>'

  if name not in string_labels:
    name_label = 'kName%d' % len(string_labels)
    string_labels[name] = name_label
    if check_compatible_assembler == _is_translator_compatible_assembler:
      yield ' %s if constexpr (std::is_same_v<FunctionCompareTag<kFunc>,' % (
        '' if name_label == 'kName0' else ' } else'
      )
      yield '                                      FunctionCompareTag<%s>>) {' % name
    yield '    static constexpr const char %s[] = "%s";' % (
        name_label, name)
  else:
    name_label = string_labels[name]

  mnemo = asm['mnemo']
  mnemo_label = 'kMnemo%d' % mnemo_idx[0]
  mnemo_idx[0] += 1
  yield '    static constexpr const char %s[] = "%s";' % (
      mnemo_label, mnemo)

  restriction = [cpuid_restriction, nan_restriction]

  if check_compatible_assembler == _is_translator_compatible_assembler:
    yield '    if (auto result = callback('
  else:
    yield '    callback('
  yield '          intrinsics::bindings::AsmCallInfo<'
  yield '              %s>(),' % (
    ',\n              '.join(
        [name_label,
         _get_asm_reference(asm),
         mnemo_label,
         _get_builder_reference(intr, asm) if gen_builder else 'void',
         cpuid_restriction,
         nan_restriction,
         'true' if _intr_has_side_effects(intr) else 'false',
         _get_c_type_tuple(intr['in']),
         _get_c_type_tuple(intr['out'])] +
        [_get_reg_operand_info(arg, 'intrinsics::bindings')
         for arg in asm['args']]))
  if check_compatible_assembler == _is_translator_compatible_assembler:
    yield '          std::forward<Args>(args)...); result.has_value()) {'
    yield '      return *std::move(result);'
    yield '    }'
  else:
    yield '          std::forward<Args>(args)...);'


def _get_c_type_tuple(arguments):
    return 'std::tuple<%s>' % ', '.join(
        _get_c_type(argument) for argument in arguments).replace(
            'Float', 'intrinsics::Float')


def _get_asm_type(asm, prefix=''):
  args = filter(
    lambda arg: not asm_defs.is_implicit_reg(arg['class']), asm['args'])
  return ', '.join(_get_asm_operand_type(arg, prefix) for arg in args)


def _get_asm_operand_type(arg, prefix=''):
  cls = arg.get('class')
  if asm_defs.is_x87reg(cls):
    return prefix + 'X87Register'
  if asm_defs.is_greg(cls):
    return prefix + 'Register'
  if asm_defs.is_xreg(cls):
    return prefix + 'XMMRegister'
  if asm_defs.is_mem_op(cls):
    return 'const ' + prefix + 'Operand&'
  if asm_defs.is_imm(cls):
    if cls == 'Imm2':
      return 'int8_t'
    return 'int' + cls[3:] + '_t'
  assert False


def _get_asm_reference(asm):
  # Because of misfeature of Itanium C++ ABI we couldn't just use MacroAssembler
  # to static cast these references if we want to use them as template argument:
  # https://ibob.bg/blog/2018/08/18/a-bug-in-the-cpp-standard/

  # Thankfully there are usually no need to use the same trick for MacroInstructions
  # since we may always rename these, except when immediates are involved.

  # But for assembler we need to use actual type from where these
  # instructions come from!
  #
  # E.g. LZCNT have to be processed like this:
  #   static_cast<void (Assembler_common_x86::*)(
  #     typename Assembler_common_x86::Register,
  #     typename Assembler_common_x86::Register)>(
  #       &Assembler_common_x86::Lzcntl)
  assembler = 'std::tuple_element_t<%s, MacroAssembler>' % asm['macroassembler']
  return 'static_cast<void (%s::*)(%s)>(%s&%s::%s%s)' % (
      assembler,
      _get_asm_type(asm, 'typename %s::' % assembler),
      '\n                  ',
      assembler,
      'template ' if '<' in asm['asm'] else '',
      asm['asm'])

def _get_builder_reference(intr, asm):
  return 'GetOpcode%s' % (asm['name'])

def _load_intrs_def_files(intrs_def_files):
  result = {}
  for intrs_def in intrs_def_files:
    with open(intrs_def) as intrs:
      result.update(json.load(intrs))
  result.pop('License', None)
  return result


def _load_intrs_arch_def(intrs_defs):
  json_data = []
  for intrs_def in intrs_defs:
    with open(intrs_def) as intrs:
      json_array = json.load(intrs)
      while isinstance(len(json_array) > 0 and json_array[0], str):
        json_array.pop(0)
      json_data.extend(json_array)
  return json_data


def _load_macro_def(intrs, arch_intrs, insns_def, macroassembler):
  arch, insns = asm_defs.load_asm_defs(insns_def)
  for insn in insns:
    insn['macroassembler'] = macroassembler
  insns_map = dict((insn['name'], insn) for insn in insns)
  unprocessed_intrs = []
  for arch_intr in arch_intrs:
    if arch_intr['insn'] in insns_map:
      insn = insns_map[arch_intr['insn']]
      _add_asm_insn(intrs, arch_intr, insn)
    else:
      unprocessed_intrs.append(arch_intr)
  return arch, unprocessed_intrs


def _is_interpreter_compatible_assembler(intr_asm):
  if intr_asm.get('usage', '') == 'inline-only':
    return False
  return True


def _is_translator_compatible_assembler(intr_asm):
  if intr_asm.get('usage', '') == 'no-inline':
    return False
  return True



def _add_asm_insn(intrs, arch_intr, insn):
  name = ','.join(name_part.strip() for name_part in arch_intr['name'].split(','))
  # Sanity checks: MacroInstruction could implement few different intrinsics but
  # number of arguments in arch intrinsic and arch-independent intrinsic
  # should match.
  #
  # Note: we allow combining intrinsics with variants and intrinsics without
  # variants (e.g. AbsF32 is combined with VectorAbsoluteFP for F32x2 and F32x4),
  # but don't allow macroinstructions which would handle different set of
  # variants for different intrinsics.

  assert 'variants' not in insn or insn['variants'] == arch_intr['variants']
  assert 'feature' not in insn or insn['feature'] == arch_intr['feature']
  assert 'nan' not in insn or insn['nan'] == arch_intr['nan']
  assert 'usage' not in insn or insn['usage'] == arch_intr['usage']
  assert len(intrs[name]['in']) == len(arch_intr['in'])
  assert len(intrs[name]['out']) == len(arch_intr['out'])

  if 'variants' in arch_intr:
    insn['variants'] = arch_intr['variants']
  if 'feature' in arch_intr:
    insn['feature'] = arch_intr['feature']
  if 'nan' in arch_intr:
    insn['nan'] = arch_intr['nan']
  if 'usage' in arch_intr:
    insn['usage'] = arch_intr['usage']

  for count, in_arg in enumerate(arch_intr['in']):
    # Sanity check: each in argument should only be used once - but if two
    # different intrinsics use them same macroinstruction it could be already
    # defined... yet it must be defined identically.
    assert ('ir_arg' not in insn['args'][in_arg] or
            insn['args'][in_arg]['ir_arg'] == count)
    insn['args'][in_arg]['ir_arg'] = count

  for count, out_arg in enumerate(arch_intr['out']):
    # Sanity check: each out argument should only be used once, too.
    assert ('ir_res' not in insn['args'][out_arg] or
            insn['args'][out_arg]['ir_res'] == count)
    insn['args'][out_arg]['ir_res'] = count

  # Note: one intrinsic could have more than one implementation (e.g.
  # SSE2 vs SSE4.2).
  if 'asm' not in intrs[name]:
    intrs[name]['asm'] = []
  intrs[name]['asm'].append(insn)


def _open_asm_def_files(def_files, arch_def_files, asm_def_files, need_archs=True):
  intrs = _load_intrs_def_files(def_files)
  expanded_intrs = _expand_template_intrinsics(intrs)
  arch_intrs = _load_intrs_arch_def(arch_def_files)
  archs = []
  macro_assemblers = 0
  for macro_def in asm_def_files:
    arch, arch_intrs = _load_macro_def(expanded_intrs, arch_intrs, macro_def, macro_assemblers)
    macro_assemblers += 1
  # Make sure that all intrinsics were found during processing of arch_intrs.
  assert arch_intrs == []
  if need_archs:
    return archs, sorted(intrs.items()), sorted(expanded_intrs.items())
  else:
    return sorted(intrs.items())


def _expand_template_intrinsics(intrs):
  expanded_intrs = {}
  for name, intr in intrs.items():
    if intr.get('class') != 'template':
      expanded_intrs[name] = intr
    else:
     for variant in intr.get('variants'):
       types = {}
       params = [param.strip() for param in variant.split(',')]
       for param in params:
         if param in ('true', 'false'):
           continue
         if re.search('[_a-zA-Z]', param):
           types['Type'+str(len(types))] = param
       new_intr = intr.copy()
       del new_intr['variants']
       new_intr['in'] = [types.get(param, param) for param in new_intr.get('in')]
       new_intr['out'] = [types.get(param, param) for param in new_intr.get('out')]
       expanded_intrs[name+'<'+','.join(params)+'>'] = new_intr
  return expanded_intrs


def main(argv):
  # Usage:
  #   gen_intrinsics.py --public_headers <intrinsics-inl.h>
  #                                      <intrinsics_process_bindings-inl.h>
  #                                      <interpreter_intrinsics_hooks-inl.h>
  #                                      <translator_intrinsics_hooks-inl.h>
  #                                      <mock_semantics_listener_intrinsics_hooks-inl.h>
  #                                      <riscv64_to_x86_64/intrinsic_def.json",
  #                                      ...
  #                                      <riscv64_to_x86_64/machine_ir_intrinsic_binding.json>,
  #                                      ...
  #                                      <riscv64_to_x86_64/macro_def.json>,
  #                                      ...
  #   gen_intrinsics.py --text_asm_intrinsics_bindings <make_intrinsics-inl.h>
  #                                                    <riscv64_to_x86_64/intrinsic_def.json",
  #                                                    ...
  #                                                    <riscv64_to_x86_64/machine_ir_intrinsic_binding.json>,
  #                                                    ...
  #                                                    <riscv64_to_x86_64/macro_def.json>,
  #                                                    ...

  def open_out_file(name):
    try:
      os.makedirs(os.path.dirname(name))
    except:
      pass
    return open(name, 'w')

  # Temporary special case for riscv64 to arm64.
  # TODO(b/362520361): generalize and combine with the below.
  option = argv[1]
  if option == 'arm64':
    mode = argv[2]
    out_files_end = 5
    def_files_end = out_files_end
    while argv[def_files_end].endswith('intrinsic_def.json'):
      def_files_end += 1
      if (def_files_end == len(argv)):
        break
    intrs = sorted(_load_intrs_def_files(argv[out_files_end:def_files_end]).items())
    _gen_intrinsics_inl_h(open_out_file(argv[3]), intrs)
    _gen_semantic_player_types(intrs)
    _gen_interpreter_intrinsics_hooks_impl_inl_h(open_out_file(argv[4]), intrs, option)
    return 0

  mode = argv[1]
  if mode in ('--text_asm_intrinsics_bindings', '--public_headers'):
    out_files_end = 3 if mode == '--text_asm_intrinsics_bindings' else 7
    def_files_end = out_files_end
    while argv[def_files_end].endswith('intrinsic_def.json'):
      def_files_end += 1
    arch_def_files_end = def_files_end
    while argv[arch_def_files_end].endswith('machine_ir_intrinsic_binding.json'):
      arch_def_files_end += 1
    archs, intrs, expanded_intrs = _open_asm_def_files(
      argv[out_files_end:def_files_end],
      argv[def_files_end:arch_def_files_end],
      argv[arch_def_files_end:],
      True)
    if mode == '--text_asm_intrinsics_bindings':
      _gen_make_intrinsics(open_out_file(argv[2]), expanded_intrs, archs)
    else:
      _gen_intrinsics_inl_h(open_out_file(argv[2]), intrs)
      _gen_process_bindings(open_out_file(argv[3]), expanded_intrs, archs)
      _gen_semantic_player_types(intrs)
      _gen_interpreter_intrinsics_hooks_impl_inl_h(open_out_file(argv[4]), intrs, '')
      _gen_translator_intrinsics_hooks_impl_inl_h(
          open_out_file(argv[5]), intrs)
      _gen_mock_semantics_listener_intrinsics_hooks_impl_inl_h(
          open_out_file(argv[6]), intrs)
  else:
    assert False, 'unknown option %s' % (mode)

  return 0


if __name__ == '__main__':
  sys.exit(main(sys.argv))
