Skip to content

Commit 648f4d0

Browse files
authored
[ScalarizeMaskedMemIntr] Pre-commit tests for splat optimizations (#104527)
Commit tests that track the current behavior when the mask argument to a llvm.masked.load or llvm.masked.store is a splat of a con-constant value (that is, it does nothing special).
1 parent 293aa56 commit 648f4d0

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-load.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,41 @@ define <2 x i64> @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %passthru) {
5858
ret <2 x i64> %ret
5959
}
6060

61+
; To be fixed: If the mask is the splat/broadcast of a non-constant value, use a
62+
; vector load
63+
define <2 x i64> @scalarize_v2i64_splat_mask(ptr %p, i1 %mask, <2 x i64> %passthrough) {
64+
; CHECK-LABEL: @scalarize_v2i64_splat_mask(
65+
; CHECK-NEXT: [[MASK_VEC:%.*]] = insertelement <2 x i1> poison, i1 [[MASK:%.*]], i32 0
66+
; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <2 x i1> [[MASK_VEC]], <2 x i1> poison, <2 x i32> zeroinitializer
67+
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK_SPLAT]] to i2
68+
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
69+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
70+
; CHECK-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
71+
; CHECK: cond.load:
72+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 0
73+
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
74+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[PASSTHROUGH:%.*]], i64 [[TMP4]], i64 0
75+
; CHECK-NEXT: br label [[ELSE]]
76+
; CHECK: else:
77+
; CHECK-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i64> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHROUGH]], [[TMP0:%.*]] ]
78+
; CHECK-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
79+
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
80+
; CHECK-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
81+
; CHECK: cond.load1:
82+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[P]], i32 1
83+
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
84+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[RES_PHI_ELSE]], i64 [[TMP9]], i64 1
85+
; CHECK-NEXT: br label [[ELSE2]]
86+
; CHECK: else2:
87+
; CHECK-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
88+
; CHECK-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
89+
;
90+
%mask.vec = insertelement <2 x i1> poison, i1 %mask, i32 0
91+
%mask.splat = shufflevector <2 x i1> %mask.vec, <2 x i1> poison, <2 x i32> zeroinitializer
92+
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> %mask.splat, <2 x i64> %passthrough)
93+
ret <2 x i64> %ret
94+
}
95+
6196
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
6297
define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
6398
; CHECK-LABEL: @scalarize_v2i24(

llvm/test/Transforms/ScalarizeMaskedMemIntrin/X86/expand-masked-store.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,37 @@ define void @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %data) {
5656
ret void
5757
}
5858

59+
; To be fixed: If the mask is the splat/broadcast of a non-constant value, use a
60+
; vector store
61+
define void @scalarize_v2i64_splat_mask(ptr %p, <2 x i64> %data, i1 %mask) {
62+
; CHECK-LABEL: @scalarize_v2i64_splat_mask(
63+
; CHECK-NEXT: [[MASK_VEC:%.*]] = insertelement <2 x i1> poison, i1 [[MASK:%.*]], i32 0
64+
; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <2 x i1> [[MASK_VEC]], <2 x i1> poison, <2 x i32> zeroinitializer
65+
; CHECK-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK_SPLAT]] to i2
66+
; CHECK-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
67+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
68+
; CHECK-NEXT: br i1 [[TMP2]], label [[COND_STORE:%.*]], label [[ELSE:%.*]]
69+
; CHECK: cond.store:
70+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[DATA:%.*]], i64 0
71+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 0
72+
; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8
73+
; CHECK-NEXT: br label [[ELSE]]
74+
; CHECK: else:
75+
; CHECK-NEXT: [[TMP5:%.*]] = and i2 [[SCALAR_MASK]], -2
76+
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i2 [[TMP5]], 0
77+
; CHECK-NEXT: br i1 [[TMP6]], label [[COND_STORE1:%.*]], label [[ELSE2:%.*]]
78+
; CHECK: cond.store1:
79+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[DATA]], i64 1
80+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[P]], i32 1
81+
; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP8]], align 8
82+
; CHECK-NEXT: br label [[ELSE2]]
83+
; CHECK: else2:
84+
; CHECK-NEXT: ret void
85+
;
86+
%mask.vec = insertelement <2 x i1> poison, i1 %mask, i32 0
87+
%mask.splat = shufflevector <2 x i1> %mask.vec, <2 x i1> poison, <2 x i32> zeroinitializer
88+
call void @llvm.masked.store.v2i64.p0(<2 x i64> %data, ptr %p, i32 8, <2 x i1> %mask.splat)
89+
ret void
90+
}
91+
5992
declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>)

0 commit comments

Comments
 (0)