//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H #include "GCNRegPressure.h" #include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/MachineScheduler.h" namespace llvm { class SIMachineFunctionInfo; class SIRegisterInfo; class GCNSubtarget; class GCNSchedStage; enum class GCNSchedStageID : unsigned { OccInitialSchedule = 0, UnclusteredHighRPReschedule = 1, ClusteredLowOccupancyReschedule = 2, PreRARematerialize = 3, ILPInitialSchedule = 4 }; #ifndef NDEBUG raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID); #endif /// This is a minimal scheduler strategy. The main difference between this /// and the GenericScheduler is that GCNSchedStrategy uses different /// heuristics to determine excess/critical pressure sets. class GCNSchedStrategy : public GenericScheduler { protected: SUnit *pickNodeBidirectional(bool &IsTopNode); void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand); void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure); std::vector Pressure; std::vector MaxPressure; unsigned SGPRExcessLimit; unsigned VGPRExcessLimit; unsigned TargetOccupancy; MachineFunction *MF; // Scheduling stages for this strategy. SmallVector SchedStages; // Pointer to the current SchedStageID. SmallVectorImpl::iterator CurrentStage = nullptr; public: // schedule() have seen register pressure over the critical limits and had to // track register pressure for actual scheduling heuristics. bool HasHighPressure; // Schedule known to have excess register pressure. Be more conservative in // increasing ILP and preserving VGPRs. bool KnownExcessRP = false; // An error margin is necessary because of poor performance of the generic RP // tracker and can be adjusted up for tuning heuristics to try and more // aggressively reduce register pressure. unsigned ErrorMargin = 3; // Bias for SGPR limits under a high register pressure. const unsigned HighRPSGPRBias = 7; // Bias for VGPR limits under a high register pressure. const unsigned HighRPVGPRBias = 7; unsigned SGPRCriticalLimit; unsigned VGPRCriticalLimit; unsigned SGPRLimitBias = 0; unsigned VGPRLimitBias = 0; GCNSchedStrategy(const MachineSchedContext *C); SUnit *pickNode(bool &IsTopNode) override; void initialize(ScheduleDAGMI *DAG) override; unsigned getTargetOccupancy() { return TargetOccupancy; } void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; } GCNSchedStageID getCurrentStage(); // Advances stage. Returns true if there are remaining stages. bool advanceStage(); bool hasNextStage() const; GCNSchedStageID getNextStage() const; }; /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e. /// maximum number of waves per simd). class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy { public: GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); }; /// The goal of this scheduling strategy is to maximize ILP for a single wave /// (i.e. latency hiding). class GCNMaxILPSchedStrategy final : public GCNSchedStrategy { protected: bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override; public: GCNMaxILPSchedStrategy(const MachineSchedContext *C); }; class ScheduleMetrics { unsigned ScheduleLength; unsigned BubbleCycles; public: ScheduleMetrics() {} ScheduleMetrics(unsigned L, unsigned BC) : ScheduleLength(L), BubbleCycles(BC) {} unsigned getLength() const { return ScheduleLength; } unsigned getBubbles() const { return BubbleCycles; } unsigned getMetric() const { unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength; // Metric is zero if the amount of bubbles is less than 1% which is too // small. So, return 1. return Metric ? Metric : 1; } static const unsigned ScaleFactor; }; inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) { dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/" << Sm.getLength() << " ]\n"; return OS; } class GCNScheduleDAGMILive final : public ScheduleDAGMILive { friend class GCNSchedStage; friend class OccInitialScheduleStage; friend class UnclusteredHighRPStage; friend class ClusteredLowOccStage; friend class PreRARematStage; friend class ILPInitialScheduleStage; const GCNSubtarget &ST; SIMachineFunctionInfo &MFI; // Occupancy target at the beginning of function scheduling cycle. unsigned StartingOccupancy; // Minimal real occupancy recorder for the function. unsigned MinOccupancy; // Vector of regions recorder for later rescheduling SmallVector, 32> Regions; // Records if a region is not yet scheduled, or schedule has been reverted, // or we generally desire to reschedule it. BitVector RescheduleRegions; // Record regions with high register pressure. BitVector RegionsWithHighRP; // Record regions with excess register pressure over the physical register // limit. Register pressure in these regions usually will result in spilling. BitVector RegionsWithExcessRP; // Regions that has the same occupancy as the latest MinOccupancy BitVector RegionsWithMinOcc; // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT). BitVector RegionsWithIGLPInstrs; // Region live-in cache. SmallVector LiveIns; // Region pressure cache. SmallVector Pressure; // Temporary basic block live-in cache. DenseMap MBBLiveIns; DenseMap BBLiveInMap; DenseMap getBBLiveInMap() const; // Return current region pressure. GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; // Compute and cache live-ins and pressure for all regions in block. void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB); // Update region boundaries when removing MI or inserting NewMI before MI. void updateRegionBoundaries( SmallVectorImpl> &RegionBoundaries, MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing = false); void runSchedStages(); std::unique_ptr createSchedStage(GCNSchedStageID SchedStageID); public: GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S); void schedule() override; void finalizeSchedule() override; }; // GCNSchedStrategy applies multiple scheduling stages to a function. class GCNSchedStage { protected: GCNScheduleDAGMILive &DAG; GCNSchedStrategy &S; MachineFunction &MF; SIMachineFunctionInfo &MFI; const GCNSubtarget &ST; const GCNSchedStageID StageID; // The current block being scheduled. MachineBasicBlock *CurrentMBB = nullptr; // Current region index. unsigned RegionIdx = 0; // Record the original order of instructions before scheduling. std::vector Unsched; // RP before scheduling the current region. GCNRegPressure PressureBefore; // RP after scheduling the current region. GCNRegPressure PressureAfter; std::vector> SavedMutations; GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG); public: // Initialize state for a scheduling stage. Returns false if the current stage // should be skipped. virtual bool initGCNSchedStage(); // Finalize state after finishing a scheduling pass on the function. virtual void finalizeGCNSchedStage(); // Setup for scheduling a region. Returns false if the current region should // be skipped. virtual bool initGCNRegion(); // Track whether a new region is also a new MBB. void setupNewBlock(); // Finalize state after scheudling a region. void finalizeGCNRegion(); // Check result of scheduling. void checkScheduling(); // computes the given schedule virtual execution time in clocks ScheduleMetrics getScheduleMetrics(const std::vector &InputSchedule); ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG); unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap &ReadyCycles, const TargetSchedModel &SM); // Returns true if scheduling should be reverted. virtual bool shouldRevertScheduling(unsigned WavesAfter); // Returns true if current region has known excess pressure. bool isRegionWithExcessRP() const { return DAG.RegionsWithExcessRP[RegionIdx]; } // Returns true if the new schedule may result in more spilling. bool mayCauseSpilling(unsigned WavesAfter); // Attempt to revert scheduling for this region. void revertScheduling(); void advanceRegion() { RegionIdx++; } virtual ~GCNSchedStage() = default; }; class OccInitialScheduleStage : public GCNSchedStage { public: bool shouldRevertScheduling(unsigned WavesAfter) override; OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : GCNSchedStage(StageID, DAG) {} }; class UnclusteredHighRPStage : public GCNSchedStage { private: // Save the initial occupancy before starting this stage. unsigned InitialOccupancy; public: bool initGCNSchedStage() override; void finalizeGCNSchedStage() override; bool initGCNRegion() override; bool shouldRevertScheduling(unsigned WavesAfter) override; UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : GCNSchedStage(StageID, DAG) {} }; // Retry function scheduling if we found resulting occupancy and it is // lower than used for other scheduling passes. This will give more freedom // to schedule low register pressure blocks. class ClusteredLowOccStage : public GCNSchedStage { public: bool initGCNSchedStage() override; bool initGCNRegion() override; bool shouldRevertScheduling(unsigned WavesAfter) override; ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : GCNSchedStage(StageID, DAG) {} }; class PreRARematStage : public GCNSchedStage { private: // Each region at MinOccupancy will have their own list of trivially // rematerializable instructions we can remat to reduce RP. The list maps an // instruction to the position we should remat before, usually the MI using // the rematerializable instruction. MapVector> RematerializableInsts; // Map a trivially remateriazable def to a list of regions at MinOccupancy // that has the defined reg as a live-in. DenseMap> RematDefToLiveInRegions; // Collect all trivially rematerializable VGPR instructions with a single def // and single use outside the defining block into RematerializableInsts. void collectRematerializableInstructions(); bool isTriviallyReMaterializable(const MachineInstr &MI); // TODO: Should also attempt to reduce RP of SGPRs and AGPRs // Attempt to reduce RP of VGPR by sinking trivially rematerializable // instructions. Returns true if we were able to sink instruction(s). bool sinkTriviallyRematInsts(const GCNSubtarget &ST, const TargetInstrInfo *TII); public: bool initGCNSchedStage() override; bool initGCNRegion() override; bool shouldRevertScheduling(unsigned WavesAfter) override; PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : GCNSchedStage(StageID, DAG) {} }; class ILPInitialScheduleStage : public GCNSchedStage { public: bool shouldRevertScheduling(unsigned WavesAfter) override; ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : GCNSchedStage(StageID, DAG) {} }; class GCNPostScheduleDAGMILive final : public ScheduleDAGMI { private: std::vector> SavedMutations; bool HasIGLPInstrs = false; public: void schedule() override; void finalizeSchedule() override; GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr S, bool RemoveKillFlags); }; } // End namespace llvm #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H