Skip to content

Commit 08c0745

Browse files
committed
[VPlan] Preserve IR flags when widening casts
We have `nneg` for both `sext` and `uitofp`. Fixes #114856
1 parent e4e19c1 commit 08c0745

File tree

10 files changed

+24
-3
lines changed

10 files changed

+24
-3
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,8 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
14841484
Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
14851485
State.set(this, Cast);
14861486
State.addMetadata(Cast, cast_or_null<Instruction>(getUnderlyingValue()));
1487+
if (auto *CastOp = dyn_cast<Instruction>(Cast))
1488+
setFlags(CastOp);
14871489
}
14881490

14891491
InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,

llvm/test/CodeGen/MIR/AMDGPU/long-branch-reg-all-sgpr-used.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
; CHECK-NEXT: fp64-fp16-output-denormals: true
3939
; CHECK-NEXT: highBitsOf32BitAddress: 0
4040
; CHECK-NEXT: occupancy: 5
41+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
4142
; CHECK-NEXT: scavengeFI: '%stack.0'
4243
; CHECK-NEXT: vgprForAGPRCopy: ''
4344
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
@@ -304,6 +305,7 @@
304305
; CHECK-NEXT: fp64-fp16-output-denormals: true
305306
; CHECK-NEXT: highBitsOf32BitAddress: 0
306307
; CHECK-NEXT: occupancy: 5
308+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
307309
; CHECK-NEXT: scavengeFI: '%stack.0'
308310
; CHECK-NEXT: vgprForAGPRCopy: ''
309311
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-after-pei.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
; AFTER-PEI-NEXT: fp64-fp16-output-denormals: true
3939
; AFTER-PEI-NEXT: highBitsOf32BitAddress: 0
4040
; AFTER-PEI-NEXT: occupancy: 5
41+
; AFTER-PEI-NEXT: numPhysicalVGPRSpillLanes: 0
4142
; AFTER-PEI-NEXT: scavengeFI: '%stack.3'
4243
; AFTER-PEI-NEXT: vgprForAGPRCopy: ''
4344
; AFTER-PEI-NEXT: sgprForEXECCopy: ''

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg-debug.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
; CHECK-NEXT: fp64-fp16-output-denormals: true
4040
; CHECK-NEXT: BitsOf32BitAddress: 0
4141
; CHECK-NEXT: occupancy: 8
42+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
4243
; CHECK-NEXT: vgprForAGPRCopy: ''
4344
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
4445
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-long-branch-reg.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
; CHECK-NEXT: fp64-fp16-output-denormals: true
4040
; CHECK-NEXT: BitsOf32BitAddress: 0
4141
; CHECK-NEXT: occupancy: 8
42+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
4243
; CHECK-NEXT: vgprForAGPRCopy: ''
4344
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
4445
; CHECK-NEXT: longBranchReservedReg: '$sgpr2_sgpr3'

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
# FULL-NEXT: fp64-fp16-output-denormals: true
4949
# FULL-NEXT: highBitsOf32BitAddress: 0
5050
# FULL-NEXT: occupancy: 8
51+
# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
5152
# FULL-NEXT: vgprForAGPRCopy: ''
5253
# FULL-NEXT: sgprForEXECCopy: ''
5354
# FULL-NEXT: longBranchReservedReg: ''
@@ -81,6 +82,7 @@
8182
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
8283
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
8384
# SIMPLE-NEXT: occupancy: 8
85+
# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
8486
# SIMPLE-NEXT: body:
8587
name: kernel0
8688
machineFunctionInfo:
@@ -152,6 +154,7 @@ body: |
152154
# FULL-NEXT: fp64-fp16-output-denormals: true
153155
# FULL-NEXT: highBitsOf32BitAddress: 0
154156
# FULL-NEXT: occupancy: 8
157+
# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
155158
# FULL-NEXT: vgprForAGPRCopy: ''
156159
# FULL-NEXT: sgprForEXECCopy: ''
157160
# FULL-NEXT: longBranchReservedReg: ''
@@ -174,6 +177,7 @@ body: |
174177
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
175178
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
176179
# SIMPLE-NEXT: occupancy: 8
180+
# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
177181
# SIMPLE-NEXT: body:
178182

179183
name: no_mfi
@@ -227,6 +231,7 @@ body: |
227231
# FULL-NEXT: fp64-fp16-output-denormals: true
228232
# FULL-NEXT: highBitsOf32BitAddress: 0
229233
# FULL-NEXT: occupancy: 8
234+
# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
230235
# FULL-NEXT: vgprForAGPRCopy: ''
231236
# FULL-NEXT: sgprForEXECCopy: ''
232237
# FULL-NEXT: longBranchReservedReg: ''
@@ -249,6 +254,7 @@ body: |
249254
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
250255
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
251256
# SIMPLE-NEXT: occupancy: 8
257+
# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
252258
# SIMPLE-NEXT: body:
253259

254260
name: empty_mfi
@@ -303,6 +309,7 @@ body: |
303309
# FULL-NEXT: fp64-fp16-output-denormals: true
304310
# FULL-NEXT: highBitsOf32BitAddress: 0
305311
# FULL-NEXT: occupancy: 8
312+
# FULL-NEXT: numPhysicalVGPRSpillLanes: 0
306313
# FULL-NEXT: vgprForAGPRCopy: ''
307314
# FULL-NEXT: sgprForEXECCopy: ''
308315
# FULL-NEXT: longBranchReservedReg: ''
@@ -326,6 +333,7 @@ body: |
326333
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
327334
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
328335
# SIMPLE-NEXT: occupancy: 8
336+
# SIMPLE-NEXT: numPhysicalVGPRSpillLanes: 0
329337
# SIMPLE-NEXT: body:
330338

331339
name: empty_mfi_entry_func

llvm/test/CodeGen/MIR/AMDGPU/machine-function-info.ll

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,11 @@
4848
; CHECK-NEXT: fp64-fp16-output-denormals: true
4949
; CHECK-NEXT: highBitsOf32BitAddress: 0
5050
; CHECK-NEXT: occupancy: 8
51+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
5152
; CHECK-NEXT: vgprForAGPRCopy: ''
5253
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
5354
; CHECK-NEXT: longBranchReservedReg: ''
54-
; CHECK-NEXT: hasInitWholeWave: false
55+
; CHECK-NEXT: hasInitWholeWave: false
5556
; CHECK-NEXT: body:
5657
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
5758
%gep = getelementptr inbounds [512 x float], ptr addrspace(3) @lds, i32 0, i32 %arg0
@@ -94,6 +95,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
9495
; CHECK-NEXT: fp64-fp16-output-denormals: true
9596
; CHECK-NEXT: highBitsOf32BitAddress: 0
9697
; CHECK-NEXT: occupancy: 10
98+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
9799
; CHECK-NEXT: vgprForAGPRCopy: ''
98100
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
99101
; CHECK-NEXT: longBranchReservedReg: ''
@@ -164,6 +166,7 @@ define amdgpu_ps void @gds_size_shader(i32 %arg0, i32 inreg %arg1) #5 {
164166
; CHECK-NEXT: fp64-fp16-output-denormals: true
165167
; CHECK-NEXT: highBitsOf32BitAddress: 0
166168
; CHECK-NEXT: occupancy: 8
169+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
167170
; CHECK-NEXT: vgprForAGPRCopy: ''
168171
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
169172
; CHECK-NEXT: longBranchReservedReg: ''
@@ -216,6 +219,7 @@ define void @function() {
216219
; CHECK-NEXT: fp64-fp16-output-denormals: true
217220
; CHECK-NEXT: highBitsOf32BitAddress: 0
218221
; CHECK-NEXT: occupancy: 8
222+
; CHECK-NEXT: numPhysicalVGPRSpillLanes: 0
219223
; CHECK-NEXT: vgprForAGPRCopy: ''
220224
; CHECK-NEXT: sgprForEXECCopy: '$sgpr100_sgpr101'
221225
; CHECK-NEXT: longBranchReservedReg: ''

llvm/test/Transforms/LoopVectorize/X86/uitofp-preserve-nneg.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ define dso_local void @uitofp_preserve_nneg(ptr nocapture noundef writeonly %res
2323
; CHECK: vector.body:
2424
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2525
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
26-
; CHECK-NEXT: [[TMP0:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x float>
26+
; CHECK-NEXT: [[TMP0:%.*]] = uitofp nneg <4 x i32> [[VEC_IND]] to <4 x float>
2727
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[BROADCAST_SPLAT]], <4 x float> [[TMP0]], <4 x float> [[BROADCAST_SPLAT3]])
2828
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[RESULT:%.*]], i64 [[INDEX]]
2929
; CHECK-NEXT: store <4 x float> [[TMP1]], ptr [[TMP2]], align 4

llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ define void @test3(float %0) {
109109
; CHECK: for.body:
110110
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4
111111
; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]]
112-
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> <i1 true, i1 true>, i64 0)
112+
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0)
113113
; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2)
114114
; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0)
115115
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>

llvm/test/tools/llvm-reduce/mir/preserve-machine-function-info-amdgpu.mir

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,12 @@
5252
# RESULT-NEXT: fp64-fp16-output-denormals: false
5353
# RESULT-NEXT: highBitsOf32BitAddress: 4276993775
5454
# RESULT-NEXT: occupancy: 8
55+
# RESULT-NEXT: numPhysicalVGPRSpillLanes: 0
5556
# RESULT-NEXT: wwmReservedRegs:
5657
# RESULT-NEXT: - '$vgpr2'
5758
# RESULT-NEXT: - '$vgpr3'
5859
# RESULT-NEXT: vgprForAGPRCopy: '$vgpr33'
60+
# RESULT-NEXT: body:
5961

6062
# RESULT: S_NOP 0, implicit $sgpr48_sgpr49_sgpr50_sgpr51
6163
# RESULT: S_NOP 0, implicit $vgpr33

0 commit comments

Comments
 (0)