Skip to content

Commit 8a3537b

Browse files
authored
[llvm][ScheduleDAG] SUnit::biasCriticalPath() does not find the critical path consistently (#93001)
Patch co-authored by AtariDreams ([email protected]). Fixes #38037. [AMDGPU] Update test results to fix build (#92982)
1 parent a282463 commit 8a3537b

File tree

7 files changed

+2115
-2114
lines changed

7 files changed

+2115
-2114
lines changed

llvm/lib/CodeGen/ScheduleDAG.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,8 +331,10 @@ void SUnit::biasCriticalPath() {
331331
unsigned MaxDepth = BestI->getSUnit()->getDepth();
332332
for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E;
333333
++I) {
334-
if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth)
334+
if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) {
335+
MaxDepth = I->getSUnit()->getDepth();
335336
BestI = I;
337+
}
336338
}
337339
if (BestI != Preds.begin())
338340
std::swap(*Preds.begin(), *BestI);

llvm/test/CodeGen/AMDGPU/fp_to_sint.ll

Lines changed: 196 additions & 199 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/fp_to_uint.ll

Lines changed: 196 additions & 199 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.exp.ll

Lines changed: 788 additions & 804 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/llvm.exp10.ll

Lines changed: 788 additions & 804 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/shl.ll

Lines changed: 109 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -795,17 +795,17 @@ define amdgpu_kernel void @shl_i64(ptr addrspace(1) %out, ptr addrspace(1) %in)
795795
; EG-NEXT: ALU clause starting at 8:
796796
; EG-NEXT: MOV * T0.X, KC0[2].Z,
797797
; EG-NEXT: ALU clause starting at 9:
798-
; EG-NEXT: AND_INT T1.Y, T0.Z, literal.x,
799-
; EG-NEXT: LSHR T1.Z, T0.Y, 1,
798+
; EG-NEXT: LSHR T1.Y, T0.Y, 1,
799+
; EG-NEXT: NOT_INT T1.Z, T0.Z,
800800
; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
801-
; EG-NEXT: NOT_INT * T1.W, T0.Z,
801+
; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
802802
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
803-
; EG-NEXT: BIT_ALIGN_INT T1.Z, PV.Z, PV.W, PS,
804-
; EG-NEXT: LSHL T0.W, T0.X, PV.Y,
803+
; EG-NEXT: LSHL T2.Z, T0.X, PS,
804+
; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Y, PV.W, PV.Z,
805805
; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x,
806806
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
807-
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W,
808-
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
807+
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
808+
; EG-NEXT: CNDE_INT T0.X, T1.W, T2.Z, 0.0,
809809
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
810810
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
811811
%b_ptr = getelementptr i64, ptr addrspace(1) %in, i64 1
@@ -858,8 +858,8 @@ define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in
858858
; EG: ; %bb.0:
859859
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
860860
; EG-NEXT: TEX 1 @6
861-
; EG-NEXT: ALU 22, @11, KC0[CB0:0-32], KC1[]
862-
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1
861+
; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[]
862+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1
863863
; EG-NEXT: CF_END
864864
; EG-NEXT: PAD
865865
; EG-NEXT: Fetch clause starting at 6:
@@ -868,27 +868,28 @@ define amdgpu_kernel void @shl_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in
868868
; EG-NEXT: ALU clause starting at 10:
869869
; EG-NEXT: MOV * T0.X, KC0[2].Z,
870870
; EG-NEXT: ALU clause starting at 11:
871-
; EG-NEXT: AND_INT T1.Y, T1.Z, literal.x,
871+
; EG-NEXT: AND_INT * T1.W, T1.Z, literal.x,
872+
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
873+
; EG-NEXT: LSHL T2.X, T0.Z, PV.W,
874+
; EG-NEXT: AND_INT T1.Y, T1.Z, literal.x, BS:VEC_120/SCL_212
872875
; EG-NEXT: LSHR T2.Z, T0.W, 1,
873-
; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1,
876+
; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1, BS:VEC_102/SCL_221
874877
; EG-NEXT: NOT_INT * T1.W, T1.Z,
878+
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
879+
; EG-NEXT: BIT_ALIGN_INT T3.X, PV.Z, PV.W, PS,
880+
; EG-NEXT: LSHR T2.Y, T0.Y, 1,
881+
; EG-NEXT: NOT_INT T0.Z, T1.X,
882+
; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
883+
; EG-NEXT: AND_INT * T1.W, T1.X, literal.x,
875884
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
876-
; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Z, PV.W, PS,
877-
; EG-NEXT: LSHL * T1.W, T0.Z, PV.Y,
878-
; EG-NEXT: AND_INT T2.X, T1.Z, literal.x,
879-
; EG-NEXT: AND_INT T1.Y, T1.X, literal.y,
880-
; EG-NEXT: LSHR T0.Z, T0.Y, 1,
881-
; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1,
882-
; EG-NEXT: NOT_INT * T3.W, T1.X,
883-
; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44)
884-
; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS,
885-
; EG-NEXT: LSHL T0.Z, T0.X, PV.Y,
886-
; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212
887-
; EG-NEXT: CNDE_INT * T3.W, PV.X, T0.W, T1.W,
885+
; EG-NEXT: LSHL T0.Y, T0.X, PS, BS:VEC_120/SCL_212
886+
; EG-NEXT: AND_INT T1.Z, T1.X, literal.x, BS:VEC_201
887+
; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Y, PV.W, PV.Z,
888+
; EG-NEXT: CNDE_INT * T2.W, T1.Y, PV.X, T2.X,
888889
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
889-
; EG-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z,
890-
; EG-NEXT: CNDE_INT * T3.Z, T2.X, T1.W, 0.0,
891-
; EG-NEXT: CNDE_INT T3.X, T2.W, T0.Z, 0.0,
890+
; EG-NEXT: CNDE_INT T2.Y, PV.Z, PV.W, PV.Y,
891+
; EG-NEXT: CNDE_INT * T2.Z, T1.Y, T2.X, 0.0,
892+
; EG-NEXT: CNDE_INT T2.X, T1.Z, T0.Y, 0.0,
892893
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
893894
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
894895
%b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %in, i64 1
@@ -955,65 +956,66 @@ define amdgpu_kernel void @shl_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in
955956
; EG: ; %bb.0:
956957
; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[]
957958
; EG-NEXT: TEX 3 @6
958-
; EG-NEXT: ALU 47, @15, KC0[CB0:0-32], KC1[]
959-
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0
960-
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 1
959+
; EG-NEXT: ALU 48, @15, KC0[CB0:0-32], KC1[]
960+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 0
961+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T1.X, 1
961962
; EG-NEXT: CF_END
962963
; EG-NEXT: Fetch clause starting at 6:
963-
; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1
964-
; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 0, #1
965-
; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 32, #1
966-
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1
964+
; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 32, #1
965+
; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 48, #1
966+
; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 16, #1
967+
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
967968
; EG-NEXT: ALU clause starting at 14:
968969
; EG-NEXT: MOV * T0.X, KC0[2].Z,
969970
; EG-NEXT: ALU clause starting at 15:
970-
; EG-NEXT: AND_INT T4.Z, T1.Z, literal.x,
971-
; EG-NEXT: LSHR T1.W, T0.W, 1,
972-
; EG-NEXT: NOT_INT * T3.W, T1.Z,
971+
; EG-NEXT: AND_INT * T1.W, T1.Z, literal.x,
973972
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
974-
; EG-NEXT: BIT_ALIGN_INT T4.X, T0.W, T0.Z, 1,
975-
; EG-NEXT: AND_INT T1.Y, T3.Z, literal.x, BS:VEC_201
976-
; EG-NEXT: LSHR T5.Z, T2.W, 1, BS:VEC_120/SCL_212
977-
; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, T2.Z, 1, BS:VEC_102/SCL_221
978-
; EG-NEXT: NOT_INT * T2.W, T3.Z,
979-
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
980-
; EG-NEXT: BIT_ALIGN_INT T3.Y, PV.Z, PV.W, PS,
981-
; EG-NEXT: LSHL T2.Z, T2.Z, PV.Y,
982-
; EG-NEXT: BIT_ALIGN_INT T0.W, T1.W, PV.X, T3.W,
983-
; EG-NEXT: LSHL * T1.W, T0.Z, T4.Z,
973+
; EG-NEXT: LSHL * T1.W, T0.Z, PV.W,
984974
; EG-NEXT: AND_INT T4.X, T1.Z, literal.x,
985-
; EG-NEXT: AND_INT T1.Y, T1.X, literal.y,
986-
; EG-NEXT: LSHR T0.Z, T0.Y, 1,
987-
; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1,
988-
; EG-NEXT: NOT_INT * T3.W, T1.X,
975+
; EG-NEXT: LSHR T1.Y, T3.W, 1,
976+
; EG-NEXT: NOT_INT T4.Z, T2.Z, BS:VEC_201
977+
; EG-NEXT: BIT_ALIGN_INT T2.W, T3.W, T3.Z, 1,
978+
; EG-NEXT: AND_INT * T3.W, T2.Z, literal.y,
989979
; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44)
990-
; EG-NEXT: AND_INT T5.X, T3.Z, literal.x,
991-
; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS,
992-
; EG-NEXT: LSHL T0.Z, T0.X, PV.Y,
993-
; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212
994-
; EG-NEXT: CNDE_INT * T4.W, PV.X, T0.W, T1.W,
980+
; EG-NEXT: LSHL T5.X, T3.Z, PS,
981+
; EG-NEXT: AND_INT T2.Y, T2.Z, literal.x, BS:VEC_120/SCL_212
982+
; EG-NEXT: BIT_ALIGN_INT T2.Z, PV.Y, PV.W, PV.Z,
983+
; EG-NEXT: LSHR T2.W, T3.Y, 1,
984+
; EG-NEXT: NOT_INT * T3.W, T2.X,
985+
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
986+
; EG-NEXT: BIT_ALIGN_INT T6.X, T3.Y, T3.X, 1,
987+
; EG-NEXT: AND_INT T1.Y, T2.X, literal.x,
988+
; EG-NEXT: LSHR T3.Z, T0.W, 1,
989+
; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1,
990+
; EG-NEXT: NOT_INT * T4.W, T1.Z,
991+
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
992+
; EG-NEXT: BIT_ALIGN_INT T7.X, PV.Z, PV.W, PS,
993+
; EG-NEXT: LSHL T1.Y, T3.X, PV.Y, BS:VEC_120/SCL_212
994+
; EG-NEXT: AND_INT T0.Z, T2.X, literal.x, BS:VEC_201
995+
; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, PV.X, T3.W,
996+
; EG-NEXT: CNDE_INT * T3.W, T2.Y, T2.Z, T5.X,
995997
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
996-
; EG-NEXT: AND_INT T0.X, T3.X, literal.x,
997-
; EG-NEXT: CNDE_INT T4.Y, PV.W, PV.Y, PV.Z,
998-
; EG-NEXT: LSHR T1.Z, T2.Y, 1,
999-
; EG-NEXT: BIT_ALIGN_INT T0.W, T2.Y, T2.X, 1,
1000-
; EG-NEXT: NOT_INT * T3.W, T3.X,
998+
; EG-NEXT: LSHR T2.X, T0.Y, 1,
999+
; EG-NEXT: CNDE_INT T3.Y, PV.Z, PV.W, PV.Y,
1000+
; EG-NEXT: NOT_INT T1.Z, T1.X,
1001+
; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1,
1002+
; EG-NEXT: AND_INT * T2.W, T1.X, literal.x,
10011003
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
1002-
; EG-NEXT: BIT_ALIGN_INT T1.X, PV.Z, PV.W, PS,
1003-
; EG-NEXT: LSHL T0.Y, T2.X, PV.X,
1004-
; EG-NEXT: CNDE_INT T4.Z, T4.X, T1.W, 0.0, BS:VEC_120/SCL_212
1005-
; EG-NEXT: AND_INT * T0.W, T3.X, literal.x, BS:VEC_201
1004+
; EG-NEXT: LSHL T0.X, T0.X, PS,
1005+
; EG-NEXT: AND_INT T0.Y, T1.X, literal.x, BS:VEC_120/SCL_212
1006+
; EG-NEXT: CNDE_INT T3.Z, T2.Y, T5.X, 0.0, BS:VEC_021/SCL_122
1007+
; EG-NEXT: BIT_ALIGN_INT * T0.W, PV.X, PV.W, PV.Z,
10061008
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
1007-
; EG-NEXT: CNDE_INT * T1.W, T5.X, T3.Y, T2.Z,
1008-
; EG-NEXT: CNDE_INT T4.X, T2.W, T0.Z, 0.0,
1009-
; EG-NEXT: CNDE_INT T1.Y, T0.W, T1.X, T0.Y, BS:VEC_120/SCL_212
1010-
; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x,
1009+
; EG-NEXT: CNDE_INT * T2.W, T4.X, T7.X, T1.W,
1010+
; EG-NEXT: CNDE_INT T3.X, T0.Z, T1.Y, 0.0,
1011+
; EG-NEXT: CNDE_INT T2.Y, T0.Y, T0.W, T0.X,
1012+
; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x,
10111013
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
1012-
; EG-NEXT: LSHR T0.X, PV.W, literal.x,
1013-
; EG-NEXT: CNDE_INT T1.Z, T5.X, T2.Z, 0.0,
1014-
; EG-NEXT: CNDE_INT * T1.X, T0.W, T0.Y, 0.0,
1014+
; EG-NEXT: LSHR T1.X, PV.W, literal.x,
1015+
; EG-NEXT: CNDE_INT T2.Z, T4.X, T1.W, 0.0,
1016+
; EG-NEXT: CNDE_INT * T2.X, T0.Y, T0.X, 0.0,
10151017
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1016-
; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x,
1018+
; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
10171019
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
10181020
%b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %in, i64 1
10191021
%a = load <4 x i64>, ptr addrspace(1) %in
@@ -1172,17 +1174,17 @@ define amdgpu_kernel void @s_shl_constant_i64(ptr addrspace(1) %out, i64 %a) {
11721174
; EG-NEXT: CF_END
11731175
; EG-NEXT: PAD
11741176
; EG-NEXT: ALU clause starting at 4:
1175-
; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
1176-
; EG-NEXT: MOV T0.W, literal.y,
1177-
; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
1178-
; EG-NEXT: 31(4.344025e-44), -1(nan)
1179-
; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS,
1180-
; EG-NEXT: LSHL T0.W, literal.y, PV.Z,
1177+
; EG-NEXT: MOV T0.Z, literal.x,
1178+
; EG-NEXT: NOT_INT T0.W, KC0[2].W,
1179+
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y,
1180+
; EG-NEXT: -1(nan), 31(4.344025e-44)
1181+
; EG-NEXT: LSHL T1.Z, literal.x, PS,
1182+
; EG-NEXT: BIT_ALIGN_INT T0.W, literal.y, PV.Z, PV.W,
11811183
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z,
1182-
; EG-NEXT: 32767(4.591635e-41), -1(nan)
1184+
; EG-NEXT: -1(nan), 32767(4.591635e-41)
11831185
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
1184-
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W,
1185-
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
1186+
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
1187+
; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0,
11861188
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
11871189
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
11881190
%shl = shl i64 281474976710655, %a
@@ -1423,15 +1425,15 @@ define amdgpu_kernel void @s_shl_inline_imm_64_i64(ptr addrspace(1) %out, ptr ad
14231425
; EG-NEXT: CF_END
14241426
; EG-NEXT: PAD
14251427
; EG-NEXT: ALU clause starting at 4:
1426-
; EG-NEXT: NOT_INT T0.W, KC0[2].W,
1427-
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x,
1428+
; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x,
1429+
; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
14281430
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
1429-
; EG-NEXT: LSHL T0.Z, literal.x, PS,
1430-
; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W,
1431-
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y,
1432-
; EG-NEXT: 64(8.968310e-44), 32(4.484155e-44)
1433-
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
1434-
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0,
1431+
; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS,
1432+
; EG-NEXT: AND_INT T1.W, KC0[2].W, literal.x,
1433+
; EG-NEXT: LSHL * T0.W, literal.y, PV.W,
1434+
; EG-NEXT: 32(4.484155e-44), 64(8.968310e-44)
1435+
; EG-NEXT: CNDE_INT * T0.Y, PV.W, PV.Z, PS,
1436+
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
14351437
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
14361438
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
14371439
%shl = shl i64 64, %a
@@ -1903,16 +1905,16 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(ptr addrspace(1) %out, p
19031905
; EG-NEXT: CF_END
19041906
; EG-NEXT: PAD
19051907
; EG-NEXT: ALU clause starting at 4:
1906-
; EG-NEXT: NOT_INT T0.W, KC0[2].W,
1907-
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x,
1908+
; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x,
1909+
; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
19081910
; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
1909-
; EG-NEXT: LSHL T0.Z, literal.x, PS,
1910-
; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W,
1911-
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z,
1912-
; EG-NEXT: 1082130432(4.000000e+00), 541065216(1.626303e-19)
1913-
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
1914-
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
1915-
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0,
1911+
; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS,
1912+
; EG-NEXT: AND_INT T1.W, KC0[2].W, literal.y,
1913+
; EG-NEXT: LSHL * T0.W, literal.z, PV.W,
1914+
; EG-NEXT: 541065216(1.626303e-19), 32(4.484155e-44)
1915+
; EG-NEXT: 1082130432(4.000000e+00), 0(0.000000e+00)
1916+
; EG-NEXT: CNDE_INT * T0.Y, PV.W, PV.Z, PS,
1917+
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
19161918
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
19171919
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
19181920
%shl = shl i64 1082130432, %a
@@ -1959,17 +1961,17 @@ define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(ptr addrspace(1) %ou
19591961
; EG-NEXT: CF_END
19601962
; EG-NEXT: PAD
19611963
; EG-NEXT: ALU clause starting at 4:
1962-
; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
1963-
; EG-NEXT: MOV T0.W, literal.y,
1964-
; EG-NEXT: NOT_INT * T1.W, KC0[2].W,
1965-
; EG-NEXT: 31(4.344025e-44), -532676608(-5.534023e+19)
1966-
; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS,
1967-
; EG-NEXT: LSHL T0.W, literal.y, PV.Z,
1964+
; EG-NEXT: MOV T0.Z, literal.x,
1965+
; EG-NEXT: NOT_INT T0.W, KC0[2].W,
1966+
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y,
1967+
; EG-NEXT: -532676608(-5.534023e+19), 31(4.344025e-44)
1968+
; EG-NEXT: LSHL T1.Z, literal.x, PS,
1969+
; EG-NEXT: BIT_ALIGN_INT T0.W, literal.y, PV.Z, PV.W,
19681970
; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z,
1969-
; EG-NEXT: 2147483647(nan), -1065353216(-4.000000e+00)
1971+
; EG-NEXT: -1065353216(-4.000000e+00), 2147483647(nan)
19701972
; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00)
1971-
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W,
1972-
; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0,
1973+
; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z,
1974+
; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0,
19731975
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
19741976
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
19751977
%shl = shl i64 -1065353216, %a
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -misched-print-dags -o - 2>&1 > /dev/null | FileCheck %s
2+
; REQUIRES: asserts
3+
4+
@sc = common global i8 0
5+
@uc = common global i8 0
6+
@ui = common global i32 0
7+
8+
; Regression Test for PR92368.
9+
;
10+
; CHECK: SU(8): CMP8rr %4:gr8, %3:gr8, implicit-def $eflags
11+
; CHECK: Predecessors:
12+
; CHECK-NEXT: SU(6): Data Latency=0 Reg=%4
13+
; CHECK-NEXT: SU(7): Out Latency=0
14+
; CHECK-NEXT: SU(5): Out Latency=0
15+
; CHECK-NEXT: SU(3): Data Latency=4 Reg=%3
16+
define void @misched_bug() nounwind {
17+
entry:
18+
%v0 = load i8, ptr @sc, align 1
19+
%v1 = zext i8 %v0 to i32
20+
%v2 = load i8, ptr @uc, align 1
21+
%v3 = zext i8 %v2 to i32
22+
%v4 = trunc i32 %v3 to i8
23+
%v5 = trunc i32 %v1 to i8
24+
%pair74 = cmpxchg ptr @sc, i8 %v4, i8 %v5 monotonic monotonic
25+
%v6 = extractvalue { i8, i1 } %pair74, 0
26+
%v7 = icmp eq i8 %v6, %v4
27+
%v8 = zext i1 %v7 to i8
28+
%v9 = zext i8 %v8 to i32
29+
store i32 %v9, ptr @ui, align 4
30+
br label %return
31+
32+
return: ; preds = %ventry
33+
ret void
34+
}
35+

0 commit comments

Comments
 (0)