Skip to content

Commit ad8a2e4

Browse files
authored
[SLP][NFC] Add freeze instruction tests for upcoming support (#102215)
1 parent 01b488f commit ad8a2e4

File tree

2 files changed

+132
-20
lines changed

2 files changed

+132
-20
lines changed

llvm/test/Transforms/SLPVectorizer/X86/addsub.ll

Lines changed: 101 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,87 @@ entry:
6565
ret void
6666
}
6767

68+
define void @addsub_freeze() #0 {
69+
; CHECK-LABEL: @addsub_freeze(
70+
; CHECK-NEXT: entry:
71+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @b, align 4
72+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @c, align 4
73+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
74+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @d, align 4
75+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @e, align 4
76+
; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
77+
; CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[ADD1]]
78+
; CHECK-NEXT: [[FREEZE_ADD2:%.*]] = freeze i32 [[ADD2]]
79+
; CHECK-NEXT: store i32 [[FREEZE_ADD2]], ptr @a, align 4
80+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
81+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
82+
; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
83+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
84+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
85+
; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
86+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[ADD3]], [[ADD4]]
87+
; CHECK-NEXT: [[FREEZE_SUB:%.*]] = freeze i32 [[SUB]]
88+
; CHECK-NEXT: store i32 [[FREEZE_SUB]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
89+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
90+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
91+
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
92+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
93+
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
94+
; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], [[TMP11]]
95+
; CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[ADD6]]
96+
; CHECK-NEXT: [[FREEZE_ADD7:%.*]] = freeze i32 [[ADD7]]
97+
; CHECK-NEXT: store i32 [[FREEZE_ADD7]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
98+
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
99+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
100+
; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
101+
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
102+
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
103+
; CHECK-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
104+
; CHECK-NEXT: [[SUB10:%.*]] = sub nsw i32 [[ADD8]], [[ADD9]]
105+
; CHECK-NEXT: [[FREEZE_SUB10:%.*]] = freeze i32 [[SUB10]]
106+
; CHECK-NEXT: store i32 [[FREEZE_SUB10]], ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
107+
; CHECK-NEXT: ret void
108+
;
109+
entry:
110+
%0 = load i32, ptr @b, align 4
111+
%1 = load i32, ptr @c, align 4
112+
%add = add nsw i32 %0, %1
113+
%2 = load i32, ptr @d, align 4
114+
%3 = load i32, ptr @e, align 4
115+
%add1 = add nsw i32 %2, %3
116+
%add2 = add nsw i32 %add, %add1
117+
%freeze.add2 = freeze i32 %add2
118+
store i32 %freeze.add2, ptr @a, align 4
119+
%4 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 1), align 4
120+
%5 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 1), align 4
121+
%add3 = add nsw i32 %4, %5
122+
%6 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 1), align 4
123+
%7 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 1), align 4
124+
%add4 = add nsw i32 %6, %7
125+
%sub = sub nsw i32 %add3, %add4
126+
%freeze.sub = freeze i32 %sub
127+
store i32 %freeze.sub, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 1), align 4
128+
%8 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 2), align 4
129+
%9 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 2), align 4
130+
%add5 = add nsw i32 %8, %9
131+
%10 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 2), align 4
132+
%11 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 2), align 4
133+
%add6 = add nsw i32 %10, %11
134+
%add7 = add nsw i32 %add5, %add6
135+
%freeze.add7 = freeze i32 %add7
136+
store i32 %freeze.add7, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 2), align 4
137+
%12 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @b, i32 0, i64 3), align 4
138+
%13 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @c, i32 0, i64 3), align 4
139+
%add8 = add nsw i32 %12, %13
140+
%14 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @d, i32 0, i64 3), align 4
141+
%15 = load i32, ptr getelementptr inbounds ([4 x i32], ptr @e, i32 0, i64 3), align 4
142+
%add9 = add nsw i32 %14, %15
143+
%sub10 = sub nsw i32 %add8, %add9
144+
%freeze.sub10 = freeze i32 %sub10
145+
store i32 %freeze.sub10, ptr getelementptr inbounds ([4 x i32], ptr @a, i32 0, i64 3), align 4
146+
ret void
147+
}
148+
68149
; Function Attrs: nounwind uwtable
69150
define void @subadd() #0 {
70151
; CHECK-LABEL: @subadd(
@@ -301,14 +382,14 @@ define void @reorder_alt_subTree() #0 {
301382

302383
define void @reorder_alt_rightsubTree(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %d) {
303384
; CHECK-LABEL: @reorder_alt_rightsubTree(
304-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[D:%.*]], align 8
305-
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
306-
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
307-
; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP4]], [[TMP6]]
308-
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x double> [[TMP7]], [[TMP2]]
309-
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP7]], [[TMP2]]
310-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
311-
; CHECK-NEXT: store <2 x double> [[TMP10]], ptr [[C:%.*]], align 8
385+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[D:%.*]], align 8
386+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
387+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[B:%.*]], align 8
388+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP2]], [[TMP3]]
389+
; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], [[TMP1]]
390+
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP1]]
391+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> [[TMP6]], <2 x i32> <i32 0, i32 3>
392+
; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[C:%.*]], align 8
312393
; CHECK-NEXT: ret void
313394
;
314395
%1 = load double, ptr %a
@@ -332,20 +413,20 @@ define void @reorder_alt_rightsubTree(ptr nocapture %c, ptr noalias nocapture re
332413

333414
define void @vec_shuff_reorder() #0 {
334415
; CHECK-LABEL: @vec_shuff_reorder(
335-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr @fa, align 4
336-
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr @fb, align 4
337-
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
338-
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
416+
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr @fa, align 4
417+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr @fb, align 4
418+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fb, i32 0, i64 2), align 4
419+
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr getelementptr inbounds ([4 x float], ptr @fa, i32 0, i64 2), align 4
420+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
421+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
422+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
339423
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
340-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
424+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
341425
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
342-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
343-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
344-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
345-
; CHECK-NEXT: [[TMP15:%.*]] = fadd <4 x float> [[TMP10]], [[TMP14]]
346-
; CHECK-NEXT: [[TMP16:%.*]] = fsub <4 x float> [[TMP10]], [[TMP14]]
347-
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x float> [[TMP15]], <4 x float> [[TMP16]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
348-
; CHECK-NEXT: store <4 x float> [[TMP17]], ptr @fc, align 4
426+
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[TMP10]]
427+
; CHECK-NEXT: [[TMP12:%.*]] = fsub <4 x float> [[TMP7]], [[TMP10]]
428+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
429+
; CHECK-NEXT: store <4 x float> [[TMP13]], ptr @fc, align 4
349430
; CHECK-NEXT: ret void
350431
;
351432
%1 = load float, ptr @fb, align 4

llvm/test/Transforms/SLPVectorizer/X86/fmuladd.ll

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,37 @@ define void @fmuladd_2f64() #0 {
4646
ret void
4747
}
4848

49+
define void @fmuladd_2f64_freeze() #0 {
50+
; CHECK-LABEL: @fmuladd_2f64_freeze(
51+
; CHECK-NEXT: [[A0:%.*]] = load double, ptr @srcA64, align 8
52+
; CHECK-NEXT: [[A1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
53+
; CHECK-NEXT: [[B0:%.*]] = load double, ptr @srcB64, align 8
54+
; CHECK-NEXT: [[B1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
55+
; CHECK-NEXT: [[C0:%.*]] = load double, ptr @srcC64, align 8
56+
; CHECK-NEXT: [[C1:%.*]] = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
57+
; CHECK-NEXT: [[FMULADD0:%.*]] = call double @llvm.fmuladd.f64(double [[A0]], double [[B0]], double [[C0]])
58+
; CHECK-NEXT: [[FMULADD1:%.*]] = call double @llvm.fmuladd.f64(double [[A1]], double [[B1]], double [[C1]])
59+
; CHECK-NEXT: [[FREEZE0:%.*]] = freeze double [[FMULADD0]]
60+
; CHECK-NEXT: [[FREEZE1:%.*]] = freeze double [[FMULADD1]]
61+
; CHECK-NEXT: store double [[FREEZE0]], ptr @dst64, align 8
62+
; CHECK-NEXT: store double [[FREEZE1]], ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
63+
; CHECK-NEXT: ret void
64+
;
65+
%a0 = load double, ptr @srcA64, align 8
66+
%a1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcA64, i32 0, i64 1), align 8
67+
%b0 = load double, ptr @srcB64, align 8
68+
%b1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcB64, i32 0, i64 1), align 8
69+
%c0 = load double, ptr @srcC64, align 8
70+
%c1 = load double, ptr getelementptr inbounds ([8 x double], ptr @srcC64, i32 0, i64 1), align 8
71+
%fmuladd0 = call double @llvm.fmuladd.f64(double %a0, double %b0, double %c0)
72+
%fmuladd1 = call double @llvm.fmuladd.f64(double %a1, double %b1, double %c1)
73+
%freeze0 = freeze double %fmuladd0
74+
%freeze1 = freeze double %fmuladd1
75+
store double %freeze0, ptr @dst64, align 8
76+
store double %freeze1, ptr getelementptr inbounds ([8 x double], ptr @dst64, i32 0, i64 1), align 8
77+
ret void
78+
}
79+
4980
define void @fmuladd_4f64() #0 {
5081
; SSE-LABEL: @fmuladd_4f64(
5182
; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr @srcA64, align 8

0 commit comments

Comments
 (0)