@@ -1016,22 +1016,10 @@ define i32 @smax_intrinsic_rdx_v8i32(i32* %p0) {
1016
1016
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 5
1017
1017
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 6
1018
1018
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 7
1019
- ; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[P0]], align 4
1020
- ; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[P1]], align 4
1021
- ; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[P2]], align 4
1022
- ; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[P3]], align 4
1023
- ; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[P4]], align 4
1024
- ; CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[P5]], align 4
1025
- ; CHECK-NEXT: [[T6:%.*]] = load i32, i32* [[P6]], align 4
1026
- ; CHECK-NEXT: [[T7:%.*]] = load i32, i32* [[P7]], align 4
1027
- ; CHECK-NEXT: [[M10:%.*]] = tail call i32 @llvm.smax.i32(i32 [[T1]], i32 [[T0]])
1028
- ; CHECK-NEXT: [[M32:%.*]] = tail call i32 @llvm.smax.i32(i32 [[T3]], i32 [[T2]])
1029
- ; CHECK-NEXT: [[M54:%.*]] = tail call i32 @llvm.smax.i32(i32 [[T5]], i32 [[T4]])
1030
- ; CHECK-NEXT: [[M76:%.*]] = tail call i32 @llvm.smax.i32(i32 [[T7]], i32 [[T6]])
1031
- ; CHECK-NEXT: [[M3210:%.*]] = tail call i32 @llvm.smax.i32(i32 [[M32]], i32 [[M10]])
1032
- ; CHECK-NEXT: [[M7654:%.*]] = tail call i32 @llvm.smax.i32(i32 [[M76]], i32 [[M54]])
1033
- ; CHECK-NEXT: [[M:%.*]] = tail call i32 @llvm.smax.i32(i32 [[M7654]], i32 [[M3210]])
1034
- ; CHECK-NEXT: ret i32 [[M]]
1019
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <8 x i32>*
1020
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
1021
+ ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP2]])
1022
+ ; CHECK-NEXT: ret i32 [[TMP3]]
1035
1023
;
1036
1024
%p1 = getelementptr inbounds i32 , i32* %p0 , i64 1
1037
1025
%p2 = getelementptr inbounds i32 , i32* %p0 , i64 2
@@ -1067,22 +1055,10 @@ define i16 @smin_intrinsic_rdx_v8i16(i16* %p0) {
1067
1055
; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 5
1068
1056
; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 6
1069
1057
; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 7
1070
- ; CHECK-NEXT: [[T0:%.*]] = load i16, i16* [[P0]], align 4
1071
- ; CHECK-NEXT: [[T1:%.*]] = load i16, i16* [[P1]], align 4
1072
- ; CHECK-NEXT: [[T2:%.*]] = load i16, i16* [[P2]], align 4
1073
- ; CHECK-NEXT: [[T3:%.*]] = load i16, i16* [[P3]], align 4
1074
- ; CHECK-NEXT: [[T4:%.*]] = load i16, i16* [[P4]], align 4
1075
- ; CHECK-NEXT: [[T5:%.*]] = load i16, i16* [[P5]], align 4
1076
- ; CHECK-NEXT: [[T6:%.*]] = load i16, i16* [[P6]], align 4
1077
- ; CHECK-NEXT: [[T7:%.*]] = load i16, i16* [[P7]], align 4
1078
- ; CHECK-NEXT: [[M10:%.*]] = tail call i16 @llvm.smin.i16(i16 [[T1]], i16 [[T0]])
1079
- ; CHECK-NEXT: [[M32:%.*]] = tail call i16 @llvm.smin.i16(i16 [[T3]], i16 [[T2]])
1080
- ; CHECK-NEXT: [[M54:%.*]] = tail call i16 @llvm.smin.i16(i16 [[T5]], i16 [[T4]])
1081
- ; CHECK-NEXT: [[M76:%.*]] = tail call i16 @llvm.smin.i16(i16 [[T7]], i16 [[T6]])
1082
- ; CHECK-NEXT: [[M3210:%.*]] = tail call i16 @llvm.smin.i16(i16 [[M32]], i16 [[M10]])
1083
- ; CHECK-NEXT: [[M7654:%.*]] = tail call i16 @llvm.smin.i16(i16 [[M76]], i16 [[M54]])
1084
- ; CHECK-NEXT: [[M:%.*]] = tail call i16 @llvm.smin.i16(i16 [[M7654]], i16 [[M3210]])
1085
- ; CHECK-NEXT: ret i16 [[M]]
1058
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
1059
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 4
1060
+ ; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP2]])
1061
+ ; CHECK-NEXT: ret i16 [[TMP3]]
1086
1062
;
1087
1063
%p1 = getelementptr inbounds i16 , i16* %p0 , i64 1
1088
1064
%p2 = getelementptr inbounds i16 , i16* %p0 , i64 2
@@ -1110,18 +1086,27 @@ define i16 @smin_intrinsic_rdx_v8i16(i16* %p0) {
1110
1086
}
1111
1087
1112
1088
define i64 @umax_intrinsic_rdx_v4i64 (i64* %p0 ) {
1113
- ; CHECK-LABEL: @umax_intrinsic_rdx_v4i64(
1114
- ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1
1115
- ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2
1116
- ; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3
1117
- ; CHECK-NEXT: [[T0:%.*]] = load i64, i64* [[P0]], align 4
1118
- ; CHECK-NEXT: [[T1:%.*]] = load i64, i64* [[P1]], align 4
1119
- ; CHECK-NEXT: [[T2:%.*]] = load i64, i64* [[P2]], align 4
1120
- ; CHECK-NEXT: [[T3:%.*]] = load i64, i64* [[P3]], align 4
1121
- ; CHECK-NEXT: [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
1122
- ; CHECK-NEXT: [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
1123
- ; CHECK-NEXT: [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
1124
- ; CHECK-NEXT: ret i64 [[M]]
1089
+ ; DEFAULT-LABEL: @umax_intrinsic_rdx_v4i64(
1090
+ ; DEFAULT-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1
1091
+ ; DEFAULT-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2
1092
+ ; DEFAULT-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3
1093
+ ; DEFAULT-NEXT: [[T0:%.*]] = load i64, i64* [[P0]], align 4
1094
+ ; DEFAULT-NEXT: [[T1:%.*]] = load i64, i64* [[P1]], align 4
1095
+ ; DEFAULT-NEXT: [[T2:%.*]] = load i64, i64* [[P2]], align 4
1096
+ ; DEFAULT-NEXT: [[T3:%.*]] = load i64, i64* [[P3]], align 4
1097
+ ; DEFAULT-NEXT: [[M10:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T1]], i64 [[T0]])
1098
+ ; DEFAULT-NEXT: [[M32:%.*]] = tail call i64 @llvm.umax.i64(i64 [[T3]], i64 [[T2]])
1099
+ ; DEFAULT-NEXT: [[M:%.*]] = tail call i64 @llvm.umax.i64(i64 [[M32]], i64 [[M10]])
1100
+ ; DEFAULT-NEXT: ret i64 [[M]]
1101
+ ;
1102
+ ; THRESH-LABEL: @umax_intrinsic_rdx_v4i64(
1103
+ ; THRESH-NEXT: [[P1:%.*]] = getelementptr inbounds i64, i64* [[P0:%.*]], i64 1
1104
+ ; THRESH-NEXT: [[P2:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 2
1105
+ ; THRESH-NEXT: [[P3:%.*]] = getelementptr inbounds i64, i64* [[P0]], i64 3
1106
+ ; THRESH-NEXT: [[TMP1:%.*]] = bitcast i64* [[P0]] to <4 x i64>*
1107
+ ; THRESH-NEXT: [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 4
1108
+ ; THRESH-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP2]])
1109
+ ; THRESH-NEXT: ret i64 [[TMP3]]
1125
1110
;
1126
1111
%p1 = getelementptr inbounds i64 , i64* %p0 , i64 1
1127
1112
%p2 = getelementptr inbounds i64 , i64* %p0 , i64 2
@@ -1153,38 +1138,10 @@ define i8 @umin_intrinsic_rdx_v16i8(i8* %p0) {
1153
1138
; CHECK-NEXT: [[PD:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 13
1154
1139
; CHECK-NEXT: [[PE:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 14
1155
1140
; CHECK-NEXT: [[PF:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 15
1156
- ; CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[P0]], align 4
1157
- ; CHECK-NEXT: [[T1:%.*]] = load i8, i8* [[P1]], align 4
1158
- ; CHECK-NEXT: [[T2:%.*]] = load i8, i8* [[P2]], align 4
1159
- ; CHECK-NEXT: [[T3:%.*]] = load i8, i8* [[P3]], align 4
1160
- ; CHECK-NEXT: [[T4:%.*]] = load i8, i8* [[P4]], align 4
1161
- ; CHECK-NEXT: [[T5:%.*]] = load i8, i8* [[P5]], align 4
1162
- ; CHECK-NEXT: [[T6:%.*]] = load i8, i8* [[P6]], align 4
1163
- ; CHECK-NEXT: [[T7:%.*]] = load i8, i8* [[P7]], align 4
1164
- ; CHECK-NEXT: [[T8:%.*]] = load i8, i8* [[P8]], align 4
1165
- ; CHECK-NEXT: [[T9:%.*]] = load i8, i8* [[P9]], align 4
1166
- ; CHECK-NEXT: [[TA:%.*]] = load i8, i8* [[PA]], align 4
1167
- ; CHECK-NEXT: [[TB:%.*]] = load i8, i8* [[PB]], align 4
1168
- ; CHECK-NEXT: [[TC:%.*]] = load i8, i8* [[PC]], align 4
1169
- ; CHECK-NEXT: [[TD:%.*]] = load i8, i8* [[PD]], align 4
1170
- ; CHECK-NEXT: [[TE:%.*]] = load i8, i8* [[PE]], align 4
1171
- ; CHECK-NEXT: [[TF:%.*]] = load i8, i8* [[PF]], align 4
1172
- ; CHECK-NEXT: [[M10:%.*]] = tail call i8 @llvm.umin.i8(i8 [[T1]], i8 [[T0]])
1173
- ; CHECK-NEXT: [[M32:%.*]] = tail call i8 @llvm.umin.i8(i8 [[T3]], i8 [[T2]])
1174
- ; CHECK-NEXT: [[M54:%.*]] = tail call i8 @llvm.umin.i8(i8 [[T5]], i8 [[T4]])
1175
- ; CHECK-NEXT: [[M76:%.*]] = tail call i8 @llvm.umin.i8(i8 [[T7]], i8 [[T6]])
1176
- ; CHECK-NEXT: [[M98:%.*]] = tail call i8 @llvm.umin.i8(i8 [[T9]], i8 [[T8]])
1177
- ; CHECK-NEXT: [[MBA:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TB]], i8 [[TA]])
1178
- ; CHECK-NEXT: [[MDC:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TD]], i8 [[TC]])
1179
- ; CHECK-NEXT: [[MFE:%.*]] = tail call i8 @llvm.umin.i8(i8 [[TF]], i8 [[TE]])
1180
- ; CHECK-NEXT: [[M3210:%.*]] = tail call i8 @llvm.umin.i8(i8 [[M32]], i8 [[M10]])
1181
- ; CHECK-NEXT: [[M7654:%.*]] = tail call i8 @llvm.umin.i8(i8 [[M76]], i8 [[M54]])
1182
- ; CHECK-NEXT: [[MDC98:%.*]] = tail call i8 @llvm.umin.i8(i8 [[MDC]], i8 [[M98]])
1183
- ; CHECK-NEXT: [[MFEBA:%.*]] = tail call i8 @llvm.umin.i8(i8 [[MFE]], i8 [[MBA]])
1184
- ; CHECK-NEXT: [[ML:%.*]] = tail call i8 @llvm.umin.i8(i8 [[M3210]], i8 [[M7654]])
1185
- ; CHECK-NEXT: [[MH:%.*]] = tail call i8 @llvm.umin.i8(i8 [[MFEBA]], i8 [[MDC98]])
1186
- ; CHECK-NEXT: [[M:%.*]] = tail call i8 @llvm.umin.i8(i8 [[MH]], i8 [[ML]])
1187
- ; CHECK-NEXT: ret i8 [[M]]
1141
+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <16 x i8>*
1142
+ ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 4
1143
+ ; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP2]])
1144
+ ; CHECK-NEXT: ret i8 [[TMP3]]
1188
1145
;
1189
1146
%p1 = getelementptr inbounds i8 , i8* %p0 , i64 1
1190
1147
%p2 = getelementptr inbounds i8 , i8* %p0 , i64 2
0 commit comments