/*
 * Copyright (C) 2020 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "code_generator_arm64.h"

#include "arch/arm64/instruction_set_features_arm64.h"
#include "base/bit_utils_iterator.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"

using namespace vixl::aarch64;  // NOLINT(build/namespaces)

namespace art HIDDEN {
namespace arm64 {

using helpers::DRegisterFrom;
using helpers::InputRegisterAt;
using helpers::Int64FromLocation;
using helpers::LocationFrom;
using helpers::OutputRegister;
using helpers::SveStackOperandFrom;
using helpers::VRegisterFrom;
using helpers::ZRegisterFrom;
using helpers::XRegisterFrom;

#define __ GetVIXLAssembler()->

// Returns whether the value of the constant can be directly encoded into the instruction as
// immediate.
static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
  if (instr->IsVecReplicateScalar()) {
    if (constant->IsLongConstant()) {
      return false;
    } else if (constant->IsFloatConstant()) {
      return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
    } else if (constant->IsDoubleConstant()) {
      return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
    }
    // TODO: Make use of shift part of DUP instruction.
    int64_t value = CodeGenerator::GetInt64ValueOf(constant);
    return IsInt<8>(value);
  }

  return false;
}

// Returns
//  - constant location - if 'constant' is an actual constant and its value can be
//    encoded into the instruction.
//  - register location otherwise.
inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
  if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
    return Location::ConstantLocation(constant);
  }

  return Location::RequiresRegister();
}

void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
  DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
            codegen_->GetSIMDRegisterWidth());
}

void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  HInstruction* input = instruction->InputAt(0);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
      locations->SetInAt(0, SVEEncodableConstantOrRegister(input, instruction));
      locations->SetOut(Location::RequiresFpuRegister());
      break;
    case DataType::Type::kFloat32:
    case DataType::Type::kFloat64:
      if (input->IsConstant() &&
          SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
        locations->SetInAt(0, Location::ConstantLocation(input));
        locations->SetOut(Location::RequiresFpuRegister());
      } else {
        locations->SetInAt(0, Location::RequiresFpuRegister());
        locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  Location src_loc = locations->InAt(0);
  const ZRegister dst = ZRegisterFrom(locations->Out());
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      if (src_loc.IsConstant()) {
        __ Dup(dst.VnB(), Int64FromLocation(src_loc));
      } else {
        __ Dup(dst.VnB(), InputRegisterAt(instruction, 0));
      }
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      if (src_loc.IsConstant()) {
        __ Dup(dst.VnH(), Int64FromLocation(src_loc));
      } else {
        __ Dup(dst.VnH(), InputRegisterAt(instruction, 0));
      }
      break;
    case DataType::Type::kInt32:
      if (src_loc.IsConstant()) {
        __ Dup(dst.VnS(), Int64FromLocation(src_loc));
      } else {
        __ Dup(dst.VnS(), InputRegisterAt(instruction, 0));
      }
      break;
    case DataType::Type::kInt64:
      if (src_loc.IsConstant()) {
        __ Dup(dst.VnD(), Int64FromLocation(src_loc));
      } else {
        __ Dup(dst.VnD(), XRegisterFrom(src_loc));
      }
      break;
    case DataType::Type::kFloat32:
      if (src_loc.IsConstant()) {
        __ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
      } else {
        __ Dup(dst.VnS(), ZRegisterFrom(src_loc).VnS(), 0);
      }
      break;
    case DataType::Type::kFloat64:
      if (src_loc.IsConstant()) {
        __ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
      } else {
        __ Dup(dst.VnD(), ZRegisterFrom(src_loc).VnD(), 0);
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresRegister());
      break;
    case DataType::Type::kFloat32:
    case DataType::Type::kFloat64:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetOut(Location::SameAsFirstInput());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const VRegister src = VRegisterFrom(locations->InAt(0));
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt32:
      __ Umov(OutputRegister(instruction), src.V4S(), 0);
      break;
    case DataType::Type::kInt64:
      __ Umov(OutputRegister(instruction), src.V2D(), 0);
      break;
    case DataType::Type::kFloat32:
    case DataType::Type::kFloat64:
      DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresFpuRegister(),
                        instruction->IsVecNot() ? Location::kOutputOverlap
                                                : Location::kNoOutputOverlap);
      break;
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
    case DataType::Type::kFloat32:
    case DataType::Type::kFloat64:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister src = ZRegisterFrom(locations->InAt(0));
  const VRegister dst = DRegisterFrom(locations->Out());
  const PRegister p_reg = GetVecGoverningPReg(instruction);
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt32:
      switch (instruction->GetReductionKind()) {
        case HVecReduce::kSum:
          __ Saddv(dst.S(), p_reg, src.VnS());
          break;
        default:
          LOG(FATAL) << "Unsupported SIMD instruction";
          UNREACHABLE();
      }
      break;
    case DataType::Type::kInt64:
      switch (instruction->GetReductionKind()) {
        case HVecReduce::kSum:
          __ Uaddv(dst.D(), p_reg, src.VnD());
          break;
        default:
          LOG(FATAL) << "Unsupported SIMD instruction";
          UNREACHABLE();
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister src = ZRegisterFrom(locations->InAt(0));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  DataType::Type from = instruction->GetInputType();
  DataType::Type to = instruction->GetResultType();
  ValidateVectorLength(instruction);
  if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
    __ Scvtf(dst.VnS(), p_reg, src.VnS());
  } else {
    LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
  }
}

void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister src = ZRegisterFrom(locations->InAt(0));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Neg(dst.VnB(), p_reg, src.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Neg(dst.VnH(), p_reg, src.VnH());
      break;
    case DataType::Type::kInt32:
      __ Neg(dst.VnS(), p_reg, src.VnS());
      break;
    case DataType::Type::kInt64:
      __ Neg(dst.VnD(), p_reg, src.VnD());
      break;
    case DataType::Type::kFloat32:
      __ Fneg(dst.VnS(), p_reg, src.VnS());
      break;
    case DataType::Type::kFloat64:
      __ Fneg(dst.VnD(), p_reg, src.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister src = ZRegisterFrom(locations->InAt(0));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt8:
      __ Abs(dst.VnB(), p_reg, src.VnB());
      break;
    case DataType::Type::kInt16:
      __ Abs(dst.VnH(), p_reg, src.VnH());
      break;
    case DataType::Type::kInt32:
      __ Abs(dst.VnS(), p_reg, src.VnS());
      break;
    case DataType::Type::kInt64:
      __ Abs(dst.VnD(), p_reg, src.VnD());
      break;
    case DataType::Type::kFloat32:
      __ Fabs(dst.VnS(), p_reg, src.VnS());
      break;
    case DataType::Type::kFloat64:
      __ Fabs(dst.VnD(), p_reg, src.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
  CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister src = ZRegisterFrom(locations->InAt(0));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:  // special case boolean-not
      __ Dup(dst.VnB(), 1);
      __ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
      break;
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Not(dst.VnB(), p_reg, src.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Not(dst.VnH(), p_reg, src.VnH());
      break;
    case DataType::Type::kInt32:
      __ Not(dst.VnS(), p_reg, src.VnS());
      break;
    case DataType::Type::kInt64:
      __ Not(dst.VnD(), p_reg, src.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector binary operations.
static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
    case DataType::Type::kFloat32:
    case DataType::Type::kFloat64:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetInAt(1, Location::RequiresFpuRegister());
      locations->SetOut(Location::SameAsFirstInput());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
      break;
    case DataType::Type::kInt32:
      __ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kInt64:
      __ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    case DataType::Type::kFloat32:
      __ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
      break;
    case DataType::Type::kFloat64:
      __ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
      break;
    case DataType::Type::kInt32:
      __ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kInt64:
      __ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    case DataType::Type::kFloat32:
      __ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kFloat64:
      __ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
      break;
    case DataType::Type::kInt32:
      __ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kInt64:
      __ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    case DataType::Type::kFloat32:
      __ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
      break;
    case DataType::Type::kFloat64:
      __ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);

  // Note: VIXL guarantees StrictNaNPropagation for Fdiv.
  switch (instruction->GetPackedType()) {
    case DataType::Type::kFloat32:
      __ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kFloat64:
      __ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
  // TODO: Allow constants supported by BIC (vector, immediate).
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ And(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ And(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
      break;
    case DataType::Type::kInt32:
    case DataType::Type::kFloat32:
      __ And(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kInt64:
    case DataType::Type::kFloat64:
      __ And(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
}

void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
  // TODO: Use BIC (vector, register).
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
}

void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Orr(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Orr(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
      break;
    case DataType::Type::kInt32:
    case DataType::Type::kFloat32:
      __ Orr(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kInt64:
    case DataType::Type::kFloat64:
      __ Orr(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
  CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Eor(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Eor(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
      break;
    case DataType::Type::kInt32:
    case DataType::Type::kFloat32:
      __ Eor(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
      break;
    case DataType::Type::kInt64:
    case DataType::Type::kFloat64:
      __ Eor(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector shift operations.
static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
  CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
      break;
    case DataType::Type::kInt32:
      __ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
      break;
    case DataType::Type::kInt64:
      __ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
  CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
      break;
    case DataType::Type::kInt32:
      __ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
      break;
    case DataType::Type::kInt64:
      __ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
  CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
}

void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
  const ZRegister dst = ZRegisterFrom(locations->Out());
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
  ValidateVectorLength(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
      break;
    case DataType::Type::kInt32:
      __ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
      break;
    case DataType::Type::kInt64:
      __ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);

  DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.

  HInstruction* input = instruction->InputAt(0);
  bool is_zero = IsZeroBitPattern(input);

  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
                                    : Location::RequiresRegister());
      locations->SetOut(Location::RequiresFpuRegister());
      break;
    case DataType::Type::kFloat32:
    case DataType::Type::kFloat64:
      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
                                    : Location::RequiresFpuRegister());
      locations->SetOut(Location::RequiresFpuRegister());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister z_dst = ZRegisterFrom(locations->Out());

  DCHECK_EQ(2u, instruction->InputCount());  // only one input currently implemented + predicate.

  // Zero out all other elements first.
  __ Dup(z_dst.VnB(), 0);

  const VRegister dst = VRegisterFrom(locations->Out());
  // Shorthand for any type of zero.
  if (IsZeroBitPattern(instruction->InputAt(0))) {
    return;
  }
  ValidateVectorLength(instruction);

  // Set required elements.
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
      break;
    case DataType::Type::kInt32:
      __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
      break;
    case DataType::Type::kInt64:
      __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

// Helper to set up locations for vector accumulations.
static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
      locations->SetInAt(0, Location::RequiresFpuRegister());
      locations->SetInAt(1, Location::RequiresFpuRegister());
      locations->SetInAt(2, Location::RequiresFpuRegister());
      locations->SetOut(Location::SameAsFirstInput());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
  CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
}

// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
// However vector MultiplyAccumulate instruction is not affected.
void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
    HVecMultiplyAccumulate* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister acc = ZRegisterFrom(locations->InAt(0));
  const ZRegister left = ZRegisterFrom(locations->InAt(1));
  const ZRegister right = ZRegisterFrom(locations->InAt(2));
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();

  DCHECK(locations->InAt(0).Equals(locations->Out()));
  ValidateVectorLength(instruction);

  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      if (instruction->GetOpKind() == HInstruction::kAdd) {
        __ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
      } else {
        __ Mls(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
      }
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      if (instruction->GetOpKind() == HInstruction::kAdd) {
        __ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
      } else {
        __ Mls(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
      }
      break;
    case DataType::Type::kInt32:
      if (instruction->GetOpKind() == HInstruction::kAdd) {
        __ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
      } else {
        __ Mls(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
  UNREACHABLE();
}

void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
  locations->SetInAt(0, Location::RequiresFpuRegister());
  locations->SetInAt(1, Location::RequiresFpuRegister());
  locations->SetInAt(2, Location::RequiresFpuRegister());
  locations->SetOut(Location::SameAsFirstInput());

  locations->AddTemp(Location::RequiresFpuRegister());
}

void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  DCHECK(locations->InAt(0).Equals(locations->Out()));
  const ZRegister acc = ZRegisterFrom(locations->InAt(0));
  const ZRegister left = ZRegisterFrom(locations->InAt(1));
  const ZRegister right = ZRegisterFrom(locations->InAt(2));
  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
  HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
  HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
            HVecOperation::ToSignedType(b->GetPackedType()));
  DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
  ValidateVectorLength(instruction);

  size_t inputs_data_size = DataType::Size(a->GetPackedType());
  switch (inputs_data_size) {
    case 1u: {
      UseScratchRegisterScope temps(GetVIXLAssembler());
      const ZRegister tmp0 = temps.AcquireZ();
      const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));

      __ Dup(tmp1.VnB(), 0u);
      __ Sel(tmp0.VnB(), p_reg, left.VnB(), tmp1.VnB());
      __ Sel(tmp1.VnB(), p_reg, right.VnB(), tmp1.VnB());
      if (instruction->IsZeroExtending()) {
        __ Udot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
      } else {
        __ Sdot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
      }
      break;
    }
    default:
      LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
  }
}

// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* allocator,
                                  HVecMemoryOperation* instruction,
                                  bool is_load) {
  LocationSummary* locations = new (allocator) LocationSummary(instruction);
  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
    case DataType::Type::kInt32:
    case DataType::Type::kInt64:
    case DataType::Type::kFloat32:
    case DataType::Type::kFloat64:
      locations->SetInAt(0, Location::RequiresRegister());
      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
      if (is_load) {
        locations->SetOut(Location::RequiresFpuRegister());
      } else {
        locations->SetInAt(2, Location::RequiresFpuRegister());
      }
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
  CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
}

void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  size_t size = DataType::Size(instruction->GetPackedType());
  const ZRegister reg = ZRegisterFrom(locations->Out());
  UseScratchRegisterScope temps(GetVIXLAssembler());
  Register scratch;
  const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
  ValidateVectorLength(instruction);

  switch (instruction->GetPackedType()) {
    case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
    case DataType::Type::kUint16:
      __ Ld1h(reg.VnH(), p_reg,
              VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Ld1b(reg.VnB(), p_reg,
              VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    case DataType::Type::kInt32:
    case DataType::Type::kFloat32:
      __ Ld1w(reg.VnS(), p_reg,
              VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    case DataType::Type::kInt64:
    case DataType::Type::kFloat64:
      __ Ld1d(reg.VnD(), p_reg,
              VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
  CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
}

void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  size_t size = DataType::Size(instruction->GetPackedType());
  const ZRegister reg = ZRegisterFrom(locations->InAt(2));
  UseScratchRegisterScope temps(GetVIXLAssembler());
  Register scratch;
  const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
  ValidateVectorLength(instruction);

  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ St1b(reg.VnB(), p_reg,
          VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ St1h(reg.VnH(), p_reg,
          VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    case DataType::Type::kInt32:
    case DataType::Type::kFloat32:
      __ St1w(reg.VnS(), p_reg,
          VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    case DataType::Type::kInt64:
    case DataType::Type::kFloat64:
      __ St1d(reg.VnD(), p_reg,
          VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  DCHECK(instruction->InputAt(0)->IsIntConstant());
  locations->SetInAt(0, Location::NoLocation());
  locations->SetOut(Location::NoLocation());
}

void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
  // Instruction is not predicated, see nodes_vector.h
  DCHECK(!instruction->IsPredicated());
  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);

  switch (instruction->GetPackedType()) {
    case DataType::Type::kBool:
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      __ Ptrue(output_p_reg.VnB(), vixl::aarch64::SVE_ALL);
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      __ Ptrue(output_p_reg.VnH(), vixl::aarch64::SVE_ALL);
      break;
    case DataType::Type::kInt32:
    case DataType::Type::kFloat32:
      __ Ptrue(output_p_reg.VnS(), vixl::aarch64::SVE_ALL);
      break;
    case DataType::Type::kInt64:
    case DataType::Type::kFloat64:
      __ Ptrue(output_p_reg.VnD(), vixl::aarch64::SVE_ALL);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void InstructionCodeGeneratorARM64Sve::GenerateIntegerVecComparison(
    const PRegisterWithLaneSize& pd,
    const PRegisterZ& pg,
    const ZRegister& zn,
    const ZRegister& zm,
    IfCondition cond) {
  switch (cond) {
    case kCondEQ:
      __ Cmpeq(pd, pg, zn, zm);
      return;
    case kCondNE:
      __ Cmpne(pd, pg, zn, zm);
      return;
    case kCondLT:
      __ Cmplt(pd, pg, zn, zm);
      return;
    case kCondLE:
      __ Cmple(pd, pg, zn, zm);
      return;
    case kCondGT:
      __ Cmpgt(pd, pg, zn, zm);
      return;
    case kCondGE:
      __ Cmpge(pd, pg, zn, zm);
      return;
    case kCondB:
      __ Cmplo(pd, pg, zn, zm);
      return;
    case kCondBE:
      __ Cmpls(pd, pg, zn, zm);
      return;
    case kCondA:
      __ Cmphi(pd, pg, zn, zm);
      return;
    case kCondAE:
      __ Cmphs(pd, pg, zn, zm);
      return;
  }
  LOG(FATAL) << "Condition '" << enum_cast<uint32_t>(cond) << "' not supported: ";
  UNREACHABLE();
}

void LocationsBuilderARM64Sve::HandleVecCondition(HVecCondition* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  locations->SetInAt(0, Location::RequiresFpuRegister());
  locations->SetInAt(1, Location::RequiresFpuRegister());
  locations->SetOut(Location::RequiresRegister());
}

void InstructionCodeGeneratorARM64Sve::HandleVecCondition(HVecCondition* instruction) {
  DCHECK(instruction->IsPredicated());
  LocationSummary* locations = instruction->GetLocations();
  const ZRegister left = ZRegisterFrom(locations->InAt(0));
  const ZRegister right = ZRegisterFrom(locations->InAt(1));
  const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);

  HVecOperation* a = instruction->InputAt(0)->AsVecOperation();
  HVecOperation* b = instruction->InputAt(1)->AsVecOperation();
  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
            HVecOperation::ToSignedType(b->GetPackedType()));
  ValidateVectorLength(instruction);

  // TODO: Support other types, e.g: boolean, float and double.
  switch (instruction->GetPackedType()) {
    case DataType::Type::kUint8:
    case DataType::Type::kInt8:
      GenerateIntegerVecComparison(output_p_reg.VnB(),
                                   p_reg,
                                   left.VnB(),
                                   right.VnB(),
                                   instruction->GetCondition());
      break;
    case DataType::Type::kUint16:
    case DataType::Type::kInt16:
      GenerateIntegerVecComparison(output_p_reg.VnH(),
                                   p_reg,
                                   left.VnH(),
                                   right.VnH(),
                                   instruction->GetCondition());
      break;
    case DataType::Type::kInt32:
      GenerateIntegerVecComparison(output_p_reg.VnS(),
                                   p_reg,
                                   left.VnS(),
                                   right.VnS(),
                                   instruction->GetCondition());
      break;
    case DataType::Type::kInt64:
      GenerateIntegerVecComparison(output_p_reg.VnD(),
                                   p_reg,
                                   left.VnD(),
                                   right.VnD(),
                                   instruction->GetCondition());
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

#define FOR_EACH_VEC_CONDITION_INSTRUCTION(M) \
  M(VecEqual)                                 \
  M(VecNotEqual)                              \
  M(VecLessThan)                              \
  M(VecLessThanOrEqual)                       \
  M(VecGreaterThan)                           \
  M(VecGreaterThanOrEqual)                    \
  M(VecBelow)                                 \
  M(VecBelowOrEqual)                          \
  M(VecAbove)                                 \
  M(VecAboveOrEqual)
#define DEFINE_VEC_CONDITION_VISITORS(Name)                                                     \
void LocationsBuilderARM64Sve::Visit##Name(H##Name* comp) { HandleVecCondition(comp); }         \
void InstructionCodeGeneratorARM64Sve::Visit##Name(H##Name* comp) { HandleVecCondition(comp); }
FOR_EACH_VEC_CONDITION_INSTRUCTION(DEFINE_VEC_CONDITION_VISITORS)
#undef DEFINE_VEC_CONDITION_VISITORS
#undef FOR_EACH_VEC_CONDITION_INSTRUCTION

void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  DCHECK(instruction->InputAt(0)->IsVecPredSetOperation());
  locations->SetInAt(0, Location::NoLocation());
  locations->SetOut(Location::RequiresRegister());
}

void InstructionCodeGeneratorARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
  DCHECK(instruction->IsPredicated());

  const PRegister input_p_reg = GetVecPredSetFixedOutPReg(
      instruction->InputAt(0)->AsVecPredSetOperation());
  const PRegister control_p_reg = GetVecGoverningPReg(instruction);
  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);

  __ Not(output_p_reg.VnB(), control_p_reg.Zeroing(), input_p_reg.VnB());
}

void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  locations->SetInAt(0, Location::RequiresRegister());
  locations->SetInAt(1, Location::RequiresRegister());
  // The instruction doesn't really need a core register as out location; this is a hack
  // to workaround absence of support for vector predicates in register allocation.
  //
  // Semantically, the out location of this instruction and predicate inputs locations of
  // its users should be a fixed predicate register (similar to
  // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
  // SIMD regs (e.g. predicate), so fixed registers are used explicitly without exposing it
  // to the RA (through GetVecPredSetFixedOutPReg()).
  //
  // To make the RA happy Location::NoLocation() was used for all the vector instructions
  // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
  // can't be used without changes to RA - "ssa_liveness_analysis.cc] Check failed:
  // input->IsEmittedAtUseSite()" would fire.
  //
  // Using a core register as a hack is the easiest way to tackle this problem. The RA will
  // block one core register for the loop without actually using it; this should not be
  // a performance issue as a SIMD loop operates mainly on SIMD registers.
  //
  // TODO: Support SIMD types in register allocator.
  locations->SetOut(Location::RequiresRegister());
}

void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
  // Instruction is not predicated, see nodes_vector.h
  DCHECK(!instruction->IsPredicated());
  // Current implementation of predicated loop execution only supports kLO condition.
  DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
  Register left = InputRegisterAt(instruction, 0);
  Register right = InputRegisterAt(instruction, 1);
  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);

  DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);

  switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
    case 1u:
      __ Whilelo(output_p_reg.VnB(), left, right);
      break;
    case 2u:
      __ Whilelo(output_p_reg.VnH(), left, right);
      break;
    case 4u:
      __ Whilelo(output_p_reg.VnS(), left, right);
      break;
    case 8u:
      __ Whilelo(output_p_reg.VnD(), left, right);
      break;
    default:
      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
      UNREACHABLE();
  }
}

void LocationsBuilderARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
  locations->SetInAt(0, Location::NoLocation());
  // Result of the operation - a boolean value in a core register.
  locations->SetOut(Location::RequiresRegister());
}

void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
  // Instruction is not predicated, see nodes_vector.h
  DCHECK(!instruction->IsPredicated());
  Register reg = OutputRegister(instruction);
  HInstruction *input = instruction->InputAt(0);
  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(input->AsVecPredSetOperation());
  __ Ptest(output_p_reg, output_p_reg.VnB());
  __ Cset(reg, ARM64PCondition(instruction->GetPCondKind()));
}

Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
    vixl::aarch64::UseScratchRegisterScope* scope) {
  return LocationFrom(scope->AcquireZ());
}

void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
    vixl::aarch64::UseScratchRegisterScope* scope) {
  scope->Release(ZRegisterFrom(loc));
}

void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
                                                            Location source) {
  __ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
}

void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
                                                            Location source) {
  __ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
}

void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
                                                           Location source) {
  DCHECK(destination.IsSIMDStackSlot());

  if (source.IsFpuRegister()) {
    __ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
  } else {
    DCHECK(source.IsSIMDStackSlot());
    UseScratchRegisterScope temps(GetVIXLAssembler());
    if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
      // Very rare situation, only when there are cycles in ParallelMoveResolver graph.
      const Register temp = temps.AcquireX();
      DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
      // Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
      // when copying a stack slot.
      for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
           offset < e;
           offset += kArm64WordSize) {
        __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
        __ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
      }
    } else {
      const ZRegister temp = temps.AcquireZ();
      __ Ldr(temp, SveStackOperandFrom(source));
      __ Str(temp, SveStackOperandFrom(destination));
    }
  }
}

template <bool is_save>
void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
                                           LocationSummary* locations,
                                           int64_t spill_offset) {
  const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
  const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
  DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
                                                  codegen->GetNumberOfCoreRegisters(),
                                                  fp_spills,
                                                  codegen->GetNumberOfFloatingPointRegisters()));
  MacroAssembler* masm = codegen->GetVIXLAssembler();
  Register base = masm->StackPointer();

  CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
  int64_t core_spill_size = core_list.GetTotalSizeInBytes();
  int64_t fp_spill_offset = spill_offset + core_spill_size;

  if (codegen->GetGraph()->HasSIMD()) {
    if (is_save) {
      masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
    } else {
      masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
    }
    codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
    return;
  }

  // Case when we only need to restore D-registers.
  DCHECK(!codegen->GetGraph()->HasSIMD());
  DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
  CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
  if (is_save) {
    masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
    masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
  } else {
    masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
    masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
  }
}

void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
                                                               int64_t spill_offset) {
  SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
}

void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
                                                                  int64_t spill_offset) {
  SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
}

#undef __

}  // namespace arm64
}  // namespace art
