@@ -4019,16 +4019,17 @@ InstructionSelector::ComplexRendererFns
4019
4019
AMDGPUInstructionSelector::selectWMMAModsF32NegAbs (MachineOperand &Root) const {
4020
4020
Register Src = Root.getReg ();
4021
4021
unsigned Mods = SISrcMods::OP_SEL_1;
4022
- unsigned ModOpcode;
4023
4022
SmallVector<Register, 8 > EltsF32;
4024
4023
4025
4024
if (GBuildVector *BV = dyn_cast<GBuildVector>(MRI->getVRegDef (Src))) {
4025
+ assert (BV->getNumSources () > 0 );
4026
+ // Based on first element decide which mod we match, neg or abs
4027
+ MachineInstr *ElF32 = MRI->getVRegDef (BV->getSourceReg (0 ));
4028
+ unsigned ModOpcode = (ElF32->getOpcode () == AMDGPU::G_FNEG)
4029
+ ? AMDGPU::G_FNEG
4030
+ : AMDGPU::G_FABS;
4026
4031
for (unsigned i = 0 ; i < BV->getNumSources (); ++i) {
4027
- MachineInstr *ElF32 = MRI->getVRegDef (BV->getSourceReg (i));
4028
- // Based on first element decide which mod we match, neg or abs
4029
- if (EltsF32.empty ())
4030
- ModOpcode = (ElF32->getOpcode () == AMDGPU::G_FNEG) ? AMDGPU::G_FNEG
4031
- : AMDGPU::G_FABS;
4032
+ ElF32 = MRI->getVRegDef (BV->getSourceReg (i));
4032
4033
if (ElF32->getOpcode () != ModOpcode)
4033
4034
break ;
4034
4035
EltsF32.push_back (ElF32->getOperand (1 ).getReg ());
@@ -4077,29 +4078,29 @@ AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(MachineOperand &Root) const {
4077
4078
unsigned Mods = SISrcMods::OP_SEL_1;
4078
4079
SmallVector<Register, 8 > EltsV2F16;
4079
4080
4080
- if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef (Src)))
4081
- if (CV->getNumSources () > 0 ) {
4082
- MachineInstr *ElV2F16 = MRI->getVRegDef (CV->getSourceReg (0 ));
4083
- // Based on first element decide which mod we match, neg or abs
4084
- unsigned ModOpcode = (ElV2F16->getOpcode () == AMDGPU::G_FNEG)
4085
- ? AMDGPU::G_FNEG
4086
- : AMDGPU::G_FABS;
4087
-
4088
- for (unsigned i = 0 ; i < CV->getNumSources (); ++i) {
4089
- ElV2F16 = MRI->getVRegDef (CV->getSourceReg (i));
4090
- if (ElV2F16->getOpcode () != ModOpcode)
4091
- break ;
4092
- EltsV2F16.push_back (ElV2F16->getOperand (1 ).getReg ());
4093
- }
4081
+ if (GConcatVectors *CV = dyn_cast<GConcatVectors>(MRI->getVRegDef (Src))) {
4082
+ assert (CV->getNumSources () > 0 );
4083
+ MachineInstr *ElV2F16 = MRI->getVRegDef (CV->getSourceReg (0 ));
4084
+ // Based on first element decide which mod we match, neg or abs
4085
+ unsigned ModOpcode = (ElV2F16->getOpcode () == AMDGPU::G_FNEG)
4086
+ ? AMDGPU::G_FNEG
4087
+ : AMDGPU::G_FABS;
4094
4088
4095
- // All elements had ModOpcode modifier
4096
- if (CV->getNumSources () == EltsV2F16.size ()) {
4097
- MachineIRBuilder B (*Root.getParent ());
4098
- selectWMMAModsNegAbs (ModOpcode, Mods, EltsV2F16, Src, Root.getParent (),
4099
- *MRI);
4100
- }
4089
+ for (unsigned i = 0 ; i < CV->getNumSources (); ++i) {
4090
+ ElV2F16 = MRI->getVRegDef (CV->getSourceReg (i));
4091
+ if (ElV2F16->getOpcode () != ModOpcode)
4092
+ break ;
4093
+ EltsV2F16.push_back (ElV2F16->getOperand (1 ).getReg ());
4101
4094
}
4102
4095
4096
+ // All elements had ModOpcode modifier
4097
+ if (CV->getNumSources () == EltsV2F16.size ()) {
4098
+ MachineIRBuilder B (*Root.getParent ());
4099
+ selectWMMAModsNegAbs (ModOpcode, Mods, EltsV2F16, Src, Root.getParent (),
4100
+ *MRI);
4101
+ }
4102
+ }
4103
+
4103
4104
return {{[=](MachineInstrBuilder &MIB) { MIB.addReg (Src); },
4104
4105
[=](MachineInstrBuilder &MIB) { MIB.addImm (Mods); }}};
4105
4106
}
0 commit comments