1
- // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
1
+ // RUN: %clang_cc1 -O0 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK %s
2
+ // RUN: %clang_cc1 -O1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,OPT %s
2
3
3
4
typedef double dx5x5_t __attribute__((matrix_type (5 , 5 )));
4
5
typedef float fx2x3_t __attribute__((matrix_type (2 , 3 )));
@@ -506,7 +507,7 @@ void multiply_matrix_matrix_double(dx5x5_t b, dx5x5_t c) {
506
507
// CHECK-NEXT: [[RES:%.*]] = call <25 x double> @llvm.matrix.multiply.v25f64.v25f64.v25f64(<25 x double> [[B]], <25 x double> [[C]], i32 5, i32 5, i32 5)
507
508
// CHECK-NEXT: [[A_ADDR:%.*]] = bitcast [25 x double]* %a to <25 x double>*
508
509
// CHECK-NEXT: store <25 x double> [[RES]], <25 x double>* [[A_ADDR]], align 8
509
- // CHECK-NEXT: ret void
510
+ // CHECK: ret void
510
511
//
511
512
512
513
dx5x5_t a ;
@@ -531,7 +532,7 @@ typedef int ix9x9_t __attribute__((matrix_type(9, 9)));
531
532
// CHECK-NEXT: [[RES:%.*]] = call <81 x i32> @llvm.matrix.multiply.v81i32.v27i32.v27i32(<27 x i32> [[B]], <27 x i32> [[C]], i32 9, i32 3, i32 9)
532
533
// CHECK-NEXT: [[A_ADDR:%.*]] = bitcast [81 x i32]* %a to <81 x i32>*
533
534
// CHECK-NEXT: store <81 x i32> [[RES]], <81 x i32>* [[A_ADDR]], align 4
534
- // CHECK-NEXT: ret void
535
+ // CHECK: ret void
535
536
//
536
537
void multiply_matrix_matrix_int (ix9x3_t b , ix3x9_t c ) {
537
538
ix9x9_t a ;
@@ -874,6 +875,8 @@ void insert_float_matrix_idx_i_u_float(fx2x3_t b, float e, int j, unsigned k) {
874
875
// CHECK-NEXT: [[K_EXT:%.*]] = zext i32 [[K]] to i64
875
876
// CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
876
877
// CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
878
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
879
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
877
880
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
878
881
// CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
879
882
// CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
@@ -890,6 +893,8 @@ void insert_float_matrix_idx_s_ull_float(fx2x3_t b, float e, short j, unsigned l
890
893
// CHECK-NEXT: [[K:%.*]] = load i64, i64* %k.addr, align 8
891
894
// CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K]], 2
892
895
// CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
896
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
897
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
893
898
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
894
899
// CHECK-NEXT: [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
895
900
// CHECK-NEXT: store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
@@ -907,6 +912,8 @@ void insert_int_idx_expr(ix9x3_t a, int i) {
907
912
// CHECK-NEXT: [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
908
913
// CHECK-NEXT: [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
909
914
// CHECK-NEXT: [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
915
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
916
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
910
917
// CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
911
918
// CHECK-NEXT: [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
912
919
// CHECK-NEXT: store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -980,9 +987,11 @@ int extract_int(ix9x3_t c, unsigned long j) {
980
987
// CHECK-LABEL: @extract_int(
981
988
// CHECK: [[J1:%.*]] = load i64, i64* %j.addr, align 8
982
989
// CHECK-NEXT: [[J2:%.*]] = load i64, i64* %j.addr, align 8
983
- // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
984
990
// CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J2]], 9
985
991
// CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
992
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
993
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
994
+ // CHECK-NEXT: [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
986
995
// CHECK-NEXT: [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
987
996
// CHECK-NEXT: ret i32 [[MATEXT]]
988
997
@@ -995,13 +1004,15 @@ double test_extract_matrix_pointer1(dx3x2_t **ptr, unsigned j) {
995
1004
// CHECK-LABEL: @test_extract_matrix_pointer1(
996
1005
// CHECK: [[J:%.*]] = load i32, i32* %j.addr, align 4
997
1006
// CHECK-NEXT: [[J_EXT:%.*]] = zext i32 [[J]] to i64
1007
+ // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]]
1008
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
1009
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
998
1010
// CHECK-NEXT: [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
999
1011
// CHECK-NEXT: [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 1
1000
1012
// CHECK-NEXT: [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
1001
1013
// CHECK-NEXT: [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 2
1002
1014
// CHECK-NEXT: [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
1003
1015
// CHECK-NEXT: [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
1004
- // CHECK-NEXT: [[IDX:%.*]] = add i64 3, [[J_EXT]]
1005
1016
// CHECK-NEXT: [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
1006
1017
// CHECK-NEXT: ret double [[MATEXT]]
1007
1018
@@ -1027,13 +1038,17 @@ void insert_extract(dx5x5_t a, fx3x3_t b, unsigned long j, short k) {
1027
1038
// CHECK-LABEL: @insert_extract(
1028
1039
// CHECK: [[K:%.*]] = load i16, i16* %k.addr, align 2
1029
1040
// CHECK-NEXT: [[K_EXT:%.*]] = sext i16 [[K]] to i64
1030
- // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
1031
1041
// CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
1032
1042
// CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], 0
1033
- // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX]]
1043
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
1044
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1045
+ // CHECK-NEXT: [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
1046
+ // CHECK-NEXT: [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
1034
1047
// CHECK-NEXT: [[J:%.*]] = load i64, i64* %j.addr, align 8
1035
1048
// CHECK-NEXT: [[IDX3:%.*]] = mul i64 [[J]], 3
1036
1049
// CHECK-NEXT: [[IDX4:%.*]] = add i64 [[IDX3]], 2
1050
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
1051
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1037
1052
// CHECK-NEXT: [[MAT2:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR]], align 4
1038
1053
// CHECK-NEXT: [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
1039
1054
// CHECK-NEXT: store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
@@ -1068,9 +1083,13 @@ void insert_compound_stmt_field(struct Foo *a, float f, unsigned i, unsigned j)
1068
1083
// CHECK-NEXT: [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
1069
1084
// CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
1070
1085
// CHECK-NEXT: [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
1086
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1087
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1071
1088
// CHECK-NEXT: [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1072
1089
// CHECK-NEXT: [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
1073
1090
// CHECK-NEXT: [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
1091
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
1092
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1074
1093
// CHECK-NEXT: [[MAT2:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
1075
1094
// CHECK-NEXT: [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
1076
1095
// CHECK-NEXT: store <6 x float> [[INS]], <6 x float>* [[MAT_PTR]], align 4
@@ -1085,23 +1104,29 @@ void matrix_as_idx(ix9x3_t a, int i, int j, dx5x5_t b) {
1085
1104
// CHECK-NEXT: [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
1086
1105
// CHECK-NEXT: [[J1:%.*]] = load i32, i32* %j.addr, align 4
1087
1106
// CHECK-NEXT: [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
1088
- // CHECK-NEXT: [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
1089
1107
// CHECK-NEXT: [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
1090
1108
// CHECK-NEXT: [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
1109
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
1110
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1111
+ // CHECK-NEXT: [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
1091
1112
// CHECK-NEXT: [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
1092
1113
// CHECK-NEXT: [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
1093
1114
// CHECK-NEXT: [[J2:%.*]] = load i32, i32* %j.addr, align 4
1094
1115
// CHECK-NEXT: [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
1095
1116
// CHECK-NEXT: [[I2:%.*]] = load i32, i32* %i.addr, align 4
1096
1117
// CHECK-NEXT: [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
1097
- // CHECK-NEXT: [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
1098
1118
// CHECK-NEXT: [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
1099
1119
// CHECK-NEXT: [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
1120
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
1121
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1122
+ // CHECK-NEXT: [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
1100
1123
// CHECK-NEXT: [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
1101
1124
// CHECK-NEXT: [[MI3:%.*]] = add nsw i32 [[MI2]], 2
1102
1125
// CHECK-NEXT: [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
1103
1126
// CHECK-NEXT: [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
1104
1127
// CHECK-NEXT: [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
1128
+ // OPT-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
1129
+ // OPT-NEXT: call void @llvm.assume(i1 [[CMP]])
1105
1130
// CHECK-NEXT: [[B:%.*]] = load <25 x double>, <25 x double>* [[B_PTR:%.*]], align 8
1106
1131
// CHECK-NEXT: [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
1107
1132
// CHECK-NEXT: store <25 x double> [[INS]], <25 x double>* [[B_PTR]], align 8
0 commit comments