@@ -52,26 +52,18 @@ declare x86_amx @llvm.x86.tdpbuud.internal(i16, i16, i16, x86_amx, x86_amx, x86_
52
52
declare x86_amx @llvm.x86.tdpbf16ps.internal (i16 , i16 , i16 , x86_amx, x86_amx, x86_amx)
53
53
declare void @llvm.x86.tilestored64.internal (i16 , i16 , ptr , i64 , x86_amx)
54
54
55
- define void @PR90954 (ptr %0 , ptr %1 , i32 %2 ) {
55
+ define void @PR90954 (ptr %0 , ptr %1 , i32 %2 ) nounwind {
56
56
; CHECK-LABEL: PR90954:
57
57
; CHECK: # %bb.0:
58
58
; CHECK-NEXT: pushq %rbp
59
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
60
- ; CHECK-NEXT: .cfi_offset %rbp, -16
61
59
; CHECK-NEXT: movq %rsp, %rbp
62
- ; CHECK-NEXT: .cfi_def_cfa_register %rbp
63
60
; CHECK-NEXT: pushq %r15
64
61
; CHECK-NEXT: pushq %r14
65
62
; CHECK-NEXT: pushq %r13
66
63
; CHECK-NEXT: pushq %r12
67
64
; CHECK-NEXT: pushq %rbx
68
65
; CHECK-NEXT: andq $-1024, %rsp # imm = 0xFC00
69
66
; CHECK-NEXT: subq $5120, %rsp # imm = 0x1400
70
- ; CHECK-NEXT: .cfi_offset %rbx, -56
71
- ; CHECK-NEXT: .cfi_offset %r12, -48
72
- ; CHECK-NEXT: .cfi_offset %r13, -40
73
- ; CHECK-NEXT: .cfi_offset %r14, -32
74
- ; CHECK-NEXT: .cfi_offset %r15, -24
75
67
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
76
68
; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
77
69
; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp)
@@ -202,5 +194,37 @@ define void @PR90954(ptr %0, ptr %1, i32 %2) {
202
194
br label %6
203
195
}
204
196
197
+ define void @multi_use () nounwind {
198
+ ; CHECK-LABEL: multi_use:
199
+ ; CHECK: # %bb.0:
200
+ ; CHECK-NEXT: pushq %rbp
201
+ ; CHECK-NEXT: subq $2928, %rsp # imm = 0xB70
202
+ ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
203
+ ; CHECK-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
204
+ ; CHECK-NEXT: movb $1, {{[0-9]+}}(%rsp)
205
+ ; CHECK-NEXT: movb $16, {{[0-9]+}}(%rsp)
206
+ ; CHECK-NEXT: movw $64, {{[0-9]+}}(%rsp)
207
+ ; CHECK-NEXT: movb $16, {{[0-9]+}}(%rsp)
208
+ ; CHECK-NEXT: movw $64, {{[0-9]+}}(%rsp)
209
+ ; CHECK-NEXT: movw $64, %ax
210
+ ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
211
+ ; CHECK-NEXT: movw $16, %cx
212
+ ; CHECK-NEXT: tilezero %tmm0
213
+ ; CHECK-NEXT: movabsq $64, %rbp
214
+ ; CHECK-NEXT: tilestored %tmm0, 896(%rsp,%rbp) # 1024-byte Folded Spill
215
+ ; CHECK-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm1 # 1024-byte Folded Reload
216
+ ; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm1
217
+ ; CHECK-NEXT: tdpbf16ps %tmm0, %tmm0, %tmm0
218
+ ; CHECK-NEXT: addq $2928, %rsp # imm = 0xB70
219
+ ; CHECK-NEXT: popq %rbp
220
+ ; CHECK-NEXT: tilerelease
221
+ ; CHECK-NEXT: vzeroupper
222
+ ; CHECK-NEXT: retq
223
+ %1 = call x86_amx @llvm.x86.tilezero.internal (i16 16 , i16 64 )
224
+ %2 = call x86_amx @llvm.x86.tdpbf16ps.internal (i16 16 , i16 64 , i16 64 , x86_amx %1 , x86_amx %1 , x86_amx %1 )
225
+ %3 = call x86_amx @llvm.x86.tdpbf16ps.internal (i16 16 , i16 64 , i16 64 , x86_amx %1 , x86_amx %1 , x86_amx %1 )
226
+ ret void
227
+ }
228
+
205
229
declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32 (<256 x i32 >)
206
230
declare <256 x i32 > @llvm.x86.cast.tile.to.vector.v256i32 (x86_amx)
0 commit comments