@@ -7940,3 +7940,143 @@ unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) {
7940
7940
return 0 ;
7941
7941
}
7942
7942
}
7943
+
7944
+ bool SIInstrInfo::analyzeCompare (const MachineInstr &MI, Register &SrcReg,
7945
+ Register &SrcReg2, int64_t &CmpMask,
7946
+ int64_t &CmpValue) const {
7947
+ if (!MI.getOperand (0 ).isReg () || MI.getOperand (0 ).getSubReg ())
7948
+ return false ;
7949
+
7950
+ switch (MI.getOpcode ()) {
7951
+ default :
7952
+ break ;
7953
+ case AMDGPU::S_CMP_EQ_U32:
7954
+ case AMDGPU::S_CMP_EQ_I32:
7955
+ case AMDGPU::S_CMP_LG_U32:
7956
+ case AMDGPU::S_CMP_LG_I32:
7957
+ case AMDGPU::S_CMP_LT_U32:
7958
+ case AMDGPU::S_CMP_LT_I32:
7959
+ case AMDGPU::S_CMP_GT_U32:
7960
+ case AMDGPU::S_CMP_GT_I32:
7961
+ case AMDGPU::S_CMP_LE_U32:
7962
+ case AMDGPU::S_CMP_LE_I32:
7963
+ case AMDGPU::S_CMP_GE_U32:
7964
+ case AMDGPU::S_CMP_GE_I32:
7965
+ case AMDGPU::S_CMP_EQ_U64:
7966
+ case AMDGPU::S_CMP_LG_U64:
7967
+ SrcReg = MI.getOperand (0 ).getReg ();
7968
+ if (MI.getOperand (1 ).isReg ()) {
7969
+ if (MI.getOperand (1 ).getSubReg ())
7970
+ return false ;
7971
+ SrcReg2 = MI.getOperand (1 ).getReg ();
7972
+ CmpValue = 0 ;
7973
+ } else if (MI.getOperand (1 ).isImm ()) {
7974
+ SrcReg2 = Register ();
7975
+ CmpValue = MI.getOperand (1 ).getImm ();
7976
+ } else {
7977
+ return false ;
7978
+ }
7979
+ CmpMask = ~0 ;
7980
+ return true ;
7981
+ case AMDGPU::S_CMPK_EQ_U32:
7982
+ case AMDGPU::S_CMPK_EQ_I32:
7983
+ case AMDGPU::S_CMPK_LG_U32:
7984
+ case AMDGPU::S_CMPK_LG_I32:
7985
+ case AMDGPU::S_CMPK_LT_U32:
7986
+ case AMDGPU::S_CMPK_LT_I32:
7987
+ case AMDGPU::S_CMPK_GT_U32:
7988
+ case AMDGPU::S_CMPK_GT_I32:
7989
+ case AMDGPU::S_CMPK_LE_U32:
7990
+ case AMDGPU::S_CMPK_LE_I32:
7991
+ case AMDGPU::S_CMPK_GE_U32:
7992
+ case AMDGPU::S_CMPK_GE_I32:
7993
+ SrcReg = MI.getOperand (0 ).getReg ();
7994
+ SrcReg2 = Register ();
7995
+ CmpValue = MI.getOperand (1 ).getImm ();
7996
+ CmpMask = ~0 ;
7997
+ return true ;
7998
+ }
7999
+
8000
+ return false ;
8001
+ }
8002
+
8003
+ bool SIInstrInfo::optimizeCompareInstr (MachineInstr &CmpInstr, Register SrcReg,
8004
+ Register SrcReg2, int64_t CmpMask,
8005
+ int64_t CmpValue,
8006
+ const MachineRegisterInfo *MRI) const {
8007
+ if (SrcReg2 || SrcReg.isPhysical ())
8008
+ return false ;
8009
+
8010
+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
8011
+ this ](int64_t ExpectedValue) -> bool {
8012
+ // s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
8013
+ // s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
8014
+ // s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
8015
+ // s_cmp_ge_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1
8016
+ // s_cmp_eq_u64 (s_and_b64 $src, 1), 1 => s_and_b64 $src, 1
8017
+ // s_cmp_lg_u32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
8018
+ // s_cmp_lg_i32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
8019
+ // s_cmp_gt_u32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
8020
+ // s_cmp_gt_i32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1
8021
+ // s_cmp_lg_u64 (s_and_b64 $src, 1), 0 => s_and_b64 $src, 1
8022
+
8023
+ // TODO: Fold this into s_bitcmp* if result of an AND is unused.
8024
+ // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can
8025
+ // process any power of 2.
8026
+
8027
+ if (CmpValue != ExpectedValue)
8028
+ return false ;
8029
+
8030
+ MachineInstr *Def = MRI->getUniqueVRegDef (SrcReg);
8031
+ if (!Def || Def->getParent () != CmpInstr.getParent ())
8032
+ return false ;
8033
+
8034
+ if (Def->getOpcode () != AMDGPU::S_AND_B32 &&
8035
+ Def->getOpcode () != AMDGPU::S_AND_B64)
8036
+ return false ;
8037
+
8038
+ if ((!Def->getOperand (1 ).isImm () || Def->getOperand (1 ).getImm () != 1 ) &&
8039
+ (!Def->getOperand (2 ).isImm () || Def->getOperand (2 ).getImm () != 1 ))
8040
+ return false ;
8041
+
8042
+ for (auto I = std::next (Def->getIterator ()), E = CmpInstr.getIterator ();
8043
+ I != E; ++I) {
8044
+ if (I->modifiesRegister (AMDGPU::SCC, &RI) ||
8045
+ I->killsRegister (AMDGPU::SCC, &RI))
8046
+ return false ;
8047
+ }
8048
+
8049
+ MachineOperand *SccDef = Def->findRegisterDefOperand (AMDGPU::SCC);
8050
+ SccDef->setIsDead (false );
8051
+ CmpInstr.eraseFromParent ();
8052
+
8053
+ return true ;
8054
+ };
8055
+
8056
+ switch (CmpInstr.getOpcode ()) {
8057
+ default :
8058
+ break ;
8059
+ case AMDGPU::S_CMP_EQ_U32:
8060
+ case AMDGPU::S_CMP_EQ_I32:
8061
+ case AMDGPU::S_CMP_GE_U32:
8062
+ case AMDGPU::S_CMP_GE_I32:
8063
+ case AMDGPU::S_CMP_EQ_U64:
8064
+ case AMDGPU::S_CMPK_EQ_U32:
8065
+ case AMDGPU::S_CMPK_EQ_I32:
8066
+ case AMDGPU::S_CMPK_GE_U32:
8067
+ case AMDGPU::S_CMPK_GE_I32:
8068
+ return optimizeCmpAnd (1 );
8069
+ case AMDGPU::S_CMP_LG_U32:
8070
+ case AMDGPU::S_CMP_LG_I32:
8071
+ case AMDGPU::S_CMP_GT_U32:
8072
+ case AMDGPU::S_CMP_GT_I32:
8073
+ case AMDGPU::S_CMP_LG_U64:
8074
+ case AMDGPU::S_CMPK_LG_U32:
8075
+ case AMDGPU::S_CMPK_LG_I32:
8076
+ case AMDGPU::S_CMPK_GT_U32:
8077
+ case AMDGPU::S_CMPK_GT_I32:
8078
+ return optimizeCmpAnd (0 );
8079
+ }
8080
+
8081
+ return false ;
8082
+ }
0 commit comments