@@ -195,9 +195,9 @@ define void @test_2xi64(ptr noalias %data, ptr noalias %factor) {
195
195
; VF2-NEXT: [[WIDE_LOAD:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
196
196
; VF2-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1
197
197
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
198
- ; VF2-NEXT: [[TMP23 :%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
199
- ; VF2-NEXT: [[TMP24 :%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[TMP23 ]]
200
- ; VF2-NEXT: store <2 x i64> [[TMP24 ]], ptr [[TMP7]], align 8
198
+ ; VF2-NEXT: [[STRIDED_VEC1 :%.*]] = load <2 x i64>, ptr [[TMP7]], align 8
199
+ ; VF2-NEXT: [[TMP8 :%.*]] = mul <2 x i64> [[WIDE_LOAD]], [[STRIDED_VEC1 ]]
200
+ ; VF2-NEXT: store <2 x i64> [[TMP8 ]], ptr [[TMP7]], align 8
201
201
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
202
202
; VF2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
203
203
; VF2-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -1099,30 +1099,28 @@ define void @test_2xi64_sub_of_wide_loads_ops_swapped(ptr noalias %data, ptr noa
1099
1099
; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
1100
1100
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
1101
1101
; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
1102
- ; VF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 8
1103
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
1104
- ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
1105
- ; VF2-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8
1106
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
1107
- ; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
1102
+ ; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
1103
+ ; VF2-NEXT: [[BROADCAST_SPLAT4:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
1108
1104
; VF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
1109
1105
; VF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
1110
1106
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 2
1111
- ; VF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
1112
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0
1113
- ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
1114
- ; VF2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
1115
- ; VF2-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i64 0
1116
- ; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT5]], <2 x i64> poison, <2 x i32> zeroinitializer
1107
+ ; VF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = load <2 x i64>, ptr [[TMP8]], align 8
1108
+ ; VF2-NEXT: [[BROADCAST_SPLAT6:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
1117
1109
; VF2-NEXT: [[TMP12:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
1118
1110
; VF2-NEXT: [[TMP13:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT4]], [[BROADCAST_SPLAT6]]
1119
1111
; VF2-NEXT: [[TMP19:%.*]] = shl nsw i64 [[TMP0]], 1
1120
1112
; VF2-NEXT: [[TMP20:%.*]] = shl nsw i64 [[TMP1]], 1
1121
1113
; VF2-NEXT: [[DATA_0:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP19]]
1122
1114
; VF2-NEXT: [[DATA_1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP20]]
1123
- ; VF2-NEXT: store <2 x i64> [[TMP12]], ptr [[DATA_0]], align 8
1124
- ; VF2-NEXT: store <2 x i64> [[TMP13]], ptr [[DATA_1]], align 8
1125
- ; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 2
1115
+ ; VF2-NEXT: [[TMP14:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT]]
1116
+ ; VF2-NEXT: [[TMP15:%.*]] = sub <2 x i64> [[BROADCAST_SPLAT6]], [[BROADCAST_SPLAT4]]
1117
+ ; VF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1118
+ ; VF2-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i64> [[TMP16]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1119
+ ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC]], ptr [[DATA_0]], align 8
1120
+ ; VF2-NEXT: [[TMP17:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1121
+ ; VF2-NEXT: [[INTERLEAVED_VEC4:%.*]] = shufflevector <4 x i64> [[TMP17]], <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
1122
+ ; VF2-NEXT: store <4 x i64> [[INTERLEAVED_VEC4]], ptr [[DATA_1]], align 8
1123
+ ; VF2-NEXT: [[IV_NEXT]] = add nuw i64 [[INDEX]], 4
1126
1124
; VF2-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 100
1127
1125
; VF2-NEXT: br i1 [[EC]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1128
1126
; VF2: [[MIDDLE_BLOCK]]:
0 commit comments