@@ -46,113 +46,110 @@ define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) l
46
46
; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW1:%.*]] = getelementptr i8, i8* [[TMP12]], i64 48
47
47
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[TMP12]], i64 16
48
48
; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL2:%.*]] = bitcast i8* [[TMP14]] to i16*
49
- ; CHECK-NEXT: [[TMP15:%.*]] = trunc i16 8 to i8
50
- ; CHECK-NEXT: store i8 [[TMP15]], i8* [[AMX_TMM_0_SHAPE_ROW1]], align 1
49
+ ; CHECK-NEXT: store i8 8, i8* [[AMX_TMM_0_SHAPE_ROW1]], align 1
51
50
; CHECK-NEXT: store i16 [[COL:%.*]], i16* [[AMX_TMM_0_SHAPE_COL2]], align 2
52
51
; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP12]])
53
52
; CHECK-NEXT: [[I9:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
54
53
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], i8* [[I3]], i64 64, x86_amx [[I9]])
55
- ; CHECK-NEXT: [[TMP16 :%.*]] = bitcast <16 x i32>* [[TMP5]] to i8*
54
+ ; CHECK-NEXT: [[TMP15 :%.*]] = bitcast <16 x i32>* [[TMP5]] to i8*
56
55
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP5]], align 4
57
- ; CHECK-NEXT: [[TMP17 :%.*]] = getelementptr i8, i8* [[TMP16 ]], i64 0
58
- ; CHECK-NEXT: store i8 1, i8* [[TMP17 ]], align 1
59
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW3:%.*]] = getelementptr i8, i8* [[TMP16 ]], i64 48
60
- ; CHECK-NEXT: [[TMP18 :%.*]] = getelementptr i8, i8* [[TMP16 ]], i64 16
61
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL4:%.*]] = bitcast i8* [[TMP18 ]] to i16*
62
- ; CHECK-NEXT: [[TMP19 :%.*]] = trunc i16 [[ROW]] to i8
63
- ; CHECK-NEXT: store i8 [[TMP19 ]], i8* [[AMX_TMM_0_SHAPE_ROW3]], align 1
56
+ ; CHECK-NEXT: [[TMP16 :%.*]] = getelementptr i8, i8* [[TMP15 ]], i64 0
57
+ ; CHECK-NEXT: store i8 1, i8* [[TMP16 ]], align 1
58
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW3:%.*]] = getelementptr i8, i8* [[TMP15 ]], i64 48
59
+ ; CHECK-NEXT: [[TMP17 :%.*]] = getelementptr i8, i8* [[TMP15 ]], i64 16
60
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL4:%.*]] = bitcast i8* [[TMP17 ]] to i16*
61
+ ; CHECK-NEXT: [[TMP18 :%.*]] = trunc i16 [[ROW]] to i8
62
+ ; CHECK-NEXT: store i8 [[TMP18 ]], i8* [[AMX_TMM_0_SHAPE_ROW3]], align 1
64
63
; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL4]], align 2
65
- ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP16 ]])
64
+ ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP15 ]])
66
65
; CHECK-NEXT: [[I10:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32)
67
66
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I1]], i64 64, x86_amx [[I10]])
68
67
; CHECK-NEXT: br label [[IF_END:%.*]]
69
68
; CHECK: if.else:
70
- ; CHECK-NEXT: [[TMP20 :%.*]] = bitcast <16 x i32>* [[TMP4]] to i8*
69
+ ; CHECK-NEXT: [[TMP19 :%.*]] = bitcast <16 x i32>* [[TMP4]] to i8*
71
70
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP4]], align 4
72
- ; CHECK-NEXT: [[TMP21 :%.*]] = getelementptr i8, i8* [[TMP20 ]], i64 0
73
- ; CHECK-NEXT: store i8 1, i8* [[TMP21 ]], align 1
74
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW5:%.*]] = getelementptr i8, i8* [[TMP20 ]], i64 48
75
- ; CHECK-NEXT: [[TMP22 :%.*]] = getelementptr i8, i8* [[TMP20 ]], i64 16
76
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL6:%.*]] = bitcast i8* [[TMP22 ]] to i16*
77
- ; CHECK-NEXT: [[TMP23 :%.*]] = trunc i16 [[ROW]] to i8
78
- ; CHECK-NEXT: store i8 [[TMP23 ]], i8* [[AMX_TMM_0_SHAPE_ROW5]], align 1
71
+ ; CHECK-NEXT: [[TMP20 :%.*]] = getelementptr i8, i8* [[TMP19 ]], i64 0
72
+ ; CHECK-NEXT: store i8 1, i8* [[TMP20 ]], align 1
73
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW5:%.*]] = getelementptr i8, i8* [[TMP19 ]], i64 48
74
+ ; CHECK-NEXT: [[TMP21 :%.*]] = getelementptr i8, i8* [[TMP19 ]], i64 16
75
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL6:%.*]] = bitcast i8* [[TMP21 ]] to i16*
76
+ ; CHECK-NEXT: [[TMP22 :%.*]] = trunc i16 [[ROW]] to i8
77
+ ; CHECK-NEXT: store i8 [[TMP22 ]], i8* [[AMX_TMM_0_SHAPE_ROW5]], align 1
79
78
; CHECK-NEXT: store i16 8, i16* [[AMX_TMM_0_SHAPE_COL6]], align 2
80
- ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP20 ]])
79
+ ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP19 ]])
81
80
; CHECK-NEXT: [[I11:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
82
81
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 8, i8* [[I5]], i64 64, x86_amx [[I11]])
83
- ; CHECK-NEXT: [[TMP24 :%.*]] = bitcast <16 x i32>* [[TMP3]] to i8*
82
+ ; CHECK-NEXT: [[TMP23 :%.*]] = bitcast <16 x i32>* [[TMP3]] to i8*
84
83
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP3]], align 4
85
- ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[TMP24]], i64 0
86
- ; CHECK-NEXT: store i8 1, i8* [[TMP25]], align 1
87
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW7:%.*]] = getelementptr i8, i8* [[TMP24]], i64 48
88
- ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, i8* [[TMP24]], i64 16
89
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL8:%.*]] = bitcast i8* [[TMP26]] to i16*
90
- ; CHECK-NEXT: [[TMP27:%.*]] = trunc i16 8 to i8
91
- ; CHECK-NEXT: store i8 [[TMP27]], i8* [[AMX_TMM_0_SHAPE_ROW7]], align 1
84
+ ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, i8* [[TMP23]], i64 0
85
+ ; CHECK-NEXT: store i8 1, i8* [[TMP24]], align 1
86
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW7:%.*]] = getelementptr i8, i8* [[TMP23]], i64 48
87
+ ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i8, i8* [[TMP23]], i64 16
88
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL8:%.*]] = bitcast i8* [[TMP25]] to i16*
89
+ ; CHECK-NEXT: store i8 8, i8* [[AMX_TMM_0_SHAPE_ROW7]], align 1
92
90
; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL8]], align 2
93
- ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP24 ]])
91
+ ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP23 ]])
94
92
; CHECK-NEXT: [[I12:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
95
93
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 [[COL]], i8* [[I3]], i64 64, x86_amx [[I12]])
96
- ; CHECK-NEXT: [[TMP28 :%.*]] = bitcast <16 x i32>* [[TMP2]] to i8*
94
+ ; CHECK-NEXT: [[TMP26 :%.*]] = bitcast <16 x i32>* [[TMP2]] to i8*
97
95
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP2]], align 4
98
- ; CHECK-NEXT: [[TMP29 :%.*]] = getelementptr i8, i8* [[TMP28 ]], i64 0
99
- ; CHECK-NEXT: store i8 1, i8* [[TMP29 ]], align 1
100
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW9:%.*]] = getelementptr i8, i8* [[TMP28 ]], i64 48
101
- ; CHECK-NEXT: [[TMP30 :%.*]] = getelementptr i8, i8* [[TMP28 ]], i64 16
102
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL10:%.*]] = bitcast i8* [[TMP30 ]] to i16*
103
- ; CHECK-NEXT: [[TMP31 :%.*]] = trunc i16 [[ROW]] to i8
104
- ; CHECK-NEXT: store i8 [[TMP31 ]], i8* [[AMX_TMM_0_SHAPE_ROW9]], align 1
96
+ ; CHECK-NEXT: [[TMP27 :%.*]] = getelementptr i8, i8* [[TMP26 ]], i64 0
97
+ ; CHECK-NEXT: store i8 1, i8* [[TMP27 ]], align 1
98
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW9:%.*]] = getelementptr i8, i8* [[TMP26 ]], i64 48
99
+ ; CHECK-NEXT: [[TMP28 :%.*]] = getelementptr i8, i8* [[TMP26 ]], i64 16
100
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL10:%.*]] = bitcast i8* [[TMP28 ]] to i16*
101
+ ; CHECK-NEXT: [[TMP29 :%.*]] = trunc i16 [[ROW]] to i8
102
+ ; CHECK-NEXT: store i8 [[TMP29 ]], i8* [[AMX_TMM_0_SHAPE_ROW9]], align 1
105
103
; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL10]], align 2
106
- ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP28 ]])
104
+ ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP26 ]])
107
105
; CHECK-NEXT: [[I13:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf2, i64 0, i64 0), i64 32)
108
106
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I1]], i64 64, x86_amx [[I13]])
109
107
; CHECK-NEXT: br label [[IF_END]]
110
108
; CHECK: if.end:
111
- ; CHECK-NEXT: [[TMP32 :%.*]] = bitcast <16 x i32>* [[TMP1]] to i8*
109
+ ; CHECK-NEXT: [[TMP30 :%.*]] = bitcast <16 x i32>* [[TMP1]] to i8*
112
110
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP1]], align 4
113
- ; CHECK-NEXT: [[TMP33 :%.*]] = getelementptr i8, i8* [[TMP32 ]], i64 0
114
- ; CHECK-NEXT: store i8 1, i8* [[TMP33 ]], align 1
115
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW11:%.*]] = getelementptr i8, i8* [[TMP32 ]], i64 48
116
- ; CHECK-NEXT: [[TMP34 :%.*]] = getelementptr i8, i8* [[TMP32 ]], i64 16
117
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL12:%.*]] = bitcast i8* [[TMP34 ]] to i16*
118
- ; CHECK-NEXT: [[TMP35 :%.*]] = trunc i16 [[ROW]] to i8
119
- ; CHECK-NEXT: store i8 [[TMP35 ]], i8* [[AMX_TMM_0_SHAPE_ROW11]], align 1
111
+ ; CHECK-NEXT: [[TMP31 :%.*]] = getelementptr i8, i8* [[TMP30 ]], i64 0
112
+ ; CHECK-NEXT: store i8 1, i8* [[TMP31 ]], align 1
113
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW11:%.*]] = getelementptr i8, i8* [[TMP30 ]], i64 48
114
+ ; CHECK-NEXT: [[TMP32 :%.*]] = getelementptr i8, i8* [[TMP30 ]], i64 16
115
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL12:%.*]] = bitcast i8* [[TMP32 ]] to i16*
116
+ ; CHECK-NEXT: [[TMP33 :%.*]] = trunc i16 [[ROW]] to i8
117
+ ; CHECK-NEXT: store i8 [[TMP33 ]], i8* [[AMX_TMM_0_SHAPE_ROW11]], align 1
120
118
; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL12]], align 2
121
- ; CHECK-NEXT: [[AMX_TMM_1_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP32 ]], i64 49
122
- ; CHECK-NEXT: [[TMP36 :%.*]] = getelementptr i8, i8* [[TMP32 ]], i64 18
123
- ; CHECK-NEXT: [[AMX_TMM_1_SHAPE_COL:%.*]] = bitcast i8* [[TMP36 ]] to i16*
124
- ; CHECK-NEXT: [[TMP37 :%.*]] = trunc i16 [[ROW]] to i8
125
- ; CHECK-NEXT: store i8 [[TMP37 ]], i8* [[AMX_TMM_1_SHAPE_ROW]], align 1
119
+ ; CHECK-NEXT: [[AMX_TMM_1_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP30 ]], i64 49
120
+ ; CHECK-NEXT: [[TMP34 :%.*]] = getelementptr i8, i8* [[TMP30 ]], i64 18
121
+ ; CHECK-NEXT: [[AMX_TMM_1_SHAPE_COL:%.*]] = bitcast i8* [[TMP34 ]] to i16*
122
+ ; CHECK-NEXT: [[TMP35 :%.*]] = trunc i16 [[ROW]] to i8
123
+ ; CHECK-NEXT: store i8 [[TMP35 ]], i8* [[AMX_TMM_1_SHAPE_ROW]], align 1
126
124
; CHECK-NEXT: store i16 8, i16* [[AMX_TMM_1_SHAPE_COL]], align 2
127
- ; CHECK-NEXT: [[AMX_TMM_2_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP32]], i64 50
128
- ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i8, i8* [[TMP32]], i64 20
129
- ; CHECK-NEXT: [[AMX_TMM_2_SHAPE_COL:%.*]] = bitcast i8* [[TMP38]] to i16*
130
- ; CHECK-NEXT: [[TMP39:%.*]] = trunc i16 8 to i8
131
- ; CHECK-NEXT: store i8 [[TMP39]], i8* [[AMX_TMM_2_SHAPE_ROW]], align 1
125
+ ; CHECK-NEXT: [[AMX_TMM_2_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP30]], i64 50
126
+ ; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, i8* [[TMP30]], i64 20
127
+ ; CHECK-NEXT: [[AMX_TMM_2_SHAPE_COL:%.*]] = bitcast i8* [[TMP36]] to i16*
128
+ ; CHECK-NEXT: store i8 8, i8* [[AMX_TMM_2_SHAPE_ROW]], align 1
132
129
; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_2_SHAPE_COL]], align 2
133
- ; CHECK-NEXT: [[AMX_TMM_3_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP32 ]], i64 51
134
- ; CHECK-NEXT: [[TMP40 :%.*]] = getelementptr i8, i8* [[TMP32 ]], i64 22
135
- ; CHECK-NEXT: [[AMX_TMM_3_SHAPE_COL:%.*]] = bitcast i8* [[TMP40 ]] to i16*
136
- ; CHECK-NEXT: [[TMP41 :%.*]] = trunc i16 [[ROW]] to i8
137
- ; CHECK-NEXT: store i8 [[TMP41 ]], i8* [[AMX_TMM_3_SHAPE_ROW]], align 1
130
+ ; CHECK-NEXT: [[AMX_TMM_3_SHAPE_ROW:%.*]] = getelementptr i8, i8* [[TMP30 ]], i64 51
131
+ ; CHECK-NEXT: [[TMP37 :%.*]] = getelementptr i8, i8* [[TMP30 ]], i64 22
132
+ ; CHECK-NEXT: [[AMX_TMM_3_SHAPE_COL:%.*]] = bitcast i8* [[TMP37 ]] to i16*
133
+ ; CHECK-NEXT: [[TMP38 :%.*]] = trunc i16 [[ROW]] to i8
134
+ ; CHECK-NEXT: store i8 [[TMP38 ]], i8* [[AMX_TMM_3_SHAPE_ROW]], align 1
138
135
; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_3_SHAPE_COL]], align 2
139
- ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP32 ]])
136
+ ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP30 ]])
140
137
; CHECK-NEXT: [[I14:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 8, i8* [[I5]], i64 64)
141
138
; CHECK-NEXT: [[I15:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 [[COL]], i8* [[I3]], i64 64)
142
139
; CHECK-NEXT: [[I16:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I1]], i64 64)
143
140
; CHECK-NEXT: [[I17:%.*]] = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 [[ROW]], i16 [[COL]], i16 8, x86_amx [[I16]], x86_amx [[I14]], x86_amx [[I15]])
144
141
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I7]], i64 64, x86_amx [[I17]])
145
- ; CHECK-NEXT: [[TMP42 :%.*]] = bitcast <16 x i32>* [[TMP0]] to i8*
142
+ ; CHECK-NEXT: [[TMP39 :%.*]] = bitcast <16 x i32>* [[TMP0]] to i8*
146
143
; CHECK-NEXT: store <16 x i32> zeroinitializer, <16 x i32>* [[TMP0]], align 4
147
- ; CHECK-NEXT: [[TMP43 :%.*]] = getelementptr i8, i8* [[TMP42 ]], i64 0
148
- ; CHECK-NEXT: store i8 1, i8* [[TMP43 ]], align 1
149
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW13:%.*]] = getelementptr i8, i8* [[TMP42 ]], i64 48
150
- ; CHECK-NEXT: [[TMP44 :%.*]] = getelementptr i8, i8* [[TMP42 ]], i64 16
151
- ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL14:%.*]] = bitcast i8* [[TMP44 ]] to i16*
152
- ; CHECK-NEXT: [[TMP45 :%.*]] = trunc i16 [[ROW]] to i8
153
- ; CHECK-NEXT: store i8 [[TMP45 ]], i8* [[AMX_TMM_0_SHAPE_ROW13]], align 1
144
+ ; CHECK-NEXT: [[TMP40 :%.*]] = getelementptr i8, i8* [[TMP39 ]], i64 0
145
+ ; CHECK-NEXT: store i8 1, i8* [[TMP40 ]], align 1
146
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_ROW13:%.*]] = getelementptr i8, i8* [[TMP39 ]], i64 48
147
+ ; CHECK-NEXT: [[TMP41 :%.*]] = getelementptr i8, i8* [[TMP39 ]], i64 16
148
+ ; CHECK-NEXT: [[AMX_TMM_0_SHAPE_COL14:%.*]] = bitcast i8* [[TMP41 ]] to i16*
149
+ ; CHECK-NEXT: [[TMP42 :%.*]] = trunc i16 [[ROW]] to i8
150
+ ; CHECK-NEXT: store i8 [[TMP42 ]], i8* [[AMX_TMM_0_SHAPE_ROW13]], align 1
154
151
; CHECK-NEXT: store i16 [[COL]], i16* [[AMX_TMM_0_SHAPE_COL14]], align 2
155
- ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP42 ]])
152
+ ; CHECK-NEXT: call void @llvm.x86.ldtilecfg.internal(i8* [[TMP39 ]])
156
153
; CHECK-NEXT: [[I18:%.*]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 [[ROW]], i16 [[COL]], i8* [[I7]], i64 64)
157
154
; CHECK-NEXT: tail call void @llvm.x86.tilestored64.internal(i16 [[ROW]], i16 [[COL]], i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @buf, i64 0, i64 0), i64 32, x86_amx [[I18]])
158
155
; CHECK-NEXT: ret void
0 commit comments