@@ -57,6 +57,86 @@ loop.exit:
57
57
ret void
58
58
}
59
59
60
+ ; Variant with getelementptr nusw.
61
+ define void @drop_scalar_gep_nusw (ptr noalias nocapture readonly %input ,
62
+ ptr %output ) local_unnamed_addr #0 {
63
+ ; CHECK-LABEL: @drop_scalar_gep_nusw(
64
+ ; CHECK: vector.body:
65
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
66
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
67
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
68
+ ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
69
+ ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
70
+ ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
71
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
72
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
73
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
74
+ entry:
75
+ br label %loop.header
76
+
77
+ loop.header:
78
+ %iv = phi i64 [ 0 , %entry ], [ %iv.inc , %if.end ]
79
+ %i23 = icmp eq i64 %iv , 0
80
+ br i1 %i23 , label %if.end , label %if.then
81
+
82
+ if.then:
83
+ %i27 = sub nuw nsw i64 %iv , 1
84
+ %i29 = getelementptr nusw float , ptr %input , i64 %i27
85
+ %i30 = load float , ptr %i29 , align 4 , !invariant.load !0
86
+ br label %if.end
87
+
88
+ if.end:
89
+ %i34 = phi float [ 0 .000000e+00 , %loop.header ], [ %i30 , %if.then ]
90
+ %i35 = getelementptr nusw float , ptr %output , i64 %iv
91
+ store float %i34 , ptr %i35 , align 4
92
+ %iv.inc = add nuw nsw i64 %iv , 1
93
+ %exitcond = icmp eq i64 %iv.inc , 4
94
+ br i1 %exitcond , label %loop.exit , label %loop.header
95
+
96
+ loop.exit:
97
+ ret void
98
+ }
99
+
100
+ ; Variant with getelementptr nuw.
101
+ define void @drop_scalar_gep_nuw (ptr noalias nocapture readonly %input ,
102
+ ptr %output ) local_unnamed_addr #0 {
103
+ ; CHECK-LABEL: @drop_scalar_gep_nuw(
104
+ ; CHECK: vector.body:
105
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
106
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
107
+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
108
+ ; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
109
+ ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
110
+ ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
111
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[INPUT:%.*]], i64 [[TMP5]]
112
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
113
+ ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP8]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
114
+ entry:
115
+ br label %loop.header
116
+
117
+ loop.header:
118
+ %iv = phi i64 [ 0 , %entry ], [ %iv.inc , %if.end ]
119
+ %i23 = icmp eq i64 %iv , 0
120
+ br i1 %i23 , label %if.end , label %if.then
121
+
122
+ if.then:
123
+ %i27 = sub nuw nsw i64 %iv , 1
124
+ %i29 = getelementptr nuw float , ptr %input , i64 %i27
125
+ %i30 = load float , ptr %i29 , align 4 , !invariant.load !0
126
+ br label %if.end
127
+
128
+ if.end:
129
+ %i34 = phi float [ 0 .000000e+00 , %loop.header ], [ %i30 , %if.then ]
130
+ %i35 = getelementptr nuw float , ptr %output , i64 %iv
131
+ store float %i34 , ptr %i35 , align 4
132
+ %iv.inc = add nuw nsw i64 %iv , 1
133
+ %exitcond = icmp eq i64 %iv.inc , 4
134
+ br i1 %exitcond , label %loop.exit , label %loop.header
135
+
136
+ loop.exit:
137
+ ret void
138
+ }
139
+
60
140
; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
61
141
; In this case, 'sub' and 'getelementptr' are not guarded by the predicate.
62
142
define void @drop_nonpred_scalar_nuw_nsw (ptr noalias nocapture readonly %input ,
0 commit comments