@@ -100,17 +100,41 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
100
100
define void @store_i64 (ptr nocapture %0 , i32 %1 , i32 %2 ) {
101
101
; SSE-LABEL: @store_i64(
102
102
; SSE-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
103
- ; SSE-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
104
- ; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
105
- ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer
106
- ; SSE-NEXT: [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]]
107
- ; SSE-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], <i64 15, i64 15, i64 15, i64 15>
108
- ; SSE-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
109
- ; SSE-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], <i32 255, i32 255, i32 255, i32 255>
110
- ; SSE-NEXT: [[TMP12:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
111
- ; SSE-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
112
- ; SSE-NEXT: [[TMP14:%.*]] = zext <4 x i32> [[TMP13]] to <4 x i64>
113
- ; SSE-NEXT: store <4 x i64> [[TMP14]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
103
+ ; SSE-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
104
+ ; SSE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP4]]
105
+ ; SSE-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP6]], 15
106
+ ; SSE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
107
+ ; SSE-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 255
108
+ ; SSE-NEXT: [[TMP10:%.*]] = and i64 [[TMP7]], 4294967295
109
+ ; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP10]], i64 255
110
+ ; SSE-NEXT: store i64 [[TMP11]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
111
+ ; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
112
+ ; SSE-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
113
+ ; SSE-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP4]]
114
+ ; SSE-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 15
115
+ ; SSE-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
116
+ ; SSE-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP16]], 255
117
+ ; SSE-NEXT: [[TMP18:%.*]] = and i64 [[TMP15]], 4294967295
118
+ ; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i64 [[TMP18]], i64 255
119
+ ; SSE-NEXT: store i64 [[TMP19]], ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
120
+ ; SSE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
121
+ ; SSE-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
122
+ ; SSE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], [[TMP4]]
123
+ ; SSE-NEXT: [[TMP23:%.*]] = lshr i64 [[TMP22]], 15
124
+ ; SSE-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
125
+ ; SSE-NEXT: [[TMP25:%.*]] = icmp ult i32 [[TMP24]], 255
126
+ ; SSE-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 4294967295
127
+ ; SSE-NEXT: [[TMP27:%.*]] = select i1 [[TMP25]], i64 [[TMP26]], i64 255
128
+ ; SSE-NEXT: store i64 [[TMP27]], ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
129
+ ; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
130
+ ; SSE-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
131
+ ; SSE-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], [[TMP4]]
132
+ ; SSE-NEXT: [[TMP31:%.*]] = lshr i64 [[TMP30]], 15
133
+ ; SSE-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
134
+ ; SSE-NEXT: [[TMP33:%.*]] = icmp ult i32 [[TMP32]], 255
135
+ ; SSE-NEXT: [[TMP34:%.*]] = and i64 [[TMP31]], 4294967295
136
+ ; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP33]], i64 [[TMP34]], i64 255
137
+ ; SSE-NEXT: store i64 [[TMP35]], ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
114
138
; SSE-NEXT: ret void
115
139
;
116
140
; AVX-LABEL: @store_i64(
0 commit comments