|
1 | 1 | # RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx940 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s
|
2 | 2 |
|
3 | 3 | # CHECK: Iterations: 1
|
4 |
| -# CHECK: Instructions: 58 |
5 |
| -# CHECK: Total Cycles: 543 |
6 |
| -# CHECK: Total uOps: 58 |
7 |
| - |
8 |
| -# CHECK: Resources: |
9 |
| -# CHECK: [0] - HWBranch |
10 |
| -# CHECK: [1] - HWExport |
11 |
| -# CHECK: [2] - HWLGKM |
12 |
| -# CHECK: [3] - HWSALU |
13 |
| -# CHECK: [4] - HWVALU |
14 |
| -# CHECK: [5] - HWVMEM |
15 |
| -# CHECK: [6] - HWXDL |
| 4 | +# CHECK: Instructions: 78 |
| 5 | +# CHECK: Total Cycles: 699 |
| 6 | +# CHECK: Total uOps: 78 |
16 | 7 |
|
17 | 8 | v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
|
18 | 9 | v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
|
@@ -101,6 +92,57 @@ v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
|
101 | 92 | v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
|
102 | 93 | v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
|
103 | 94 |
|
| 95 | +v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 96 | +v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 97 | + |
| 98 | +v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 99 | +v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 100 | + |
| 101 | +v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 102 | +v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 103 | + |
| 104 | +v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 105 | +v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 106 | + |
| 107 | +v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 108 | +v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 109 | +v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 110 | +v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 111 | + |
| 112 | +v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 113 | +v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 114 | +v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 115 | +v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 116 | + |
| 117 | +v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 118 | +v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 119 | +v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 120 | +v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 121 | + |
| 122 | +# CHECK: Instruction Info: |
| 123 | +# CHECK-NEXT:[1]: #uOps |
| 124 | +# CHECK-NEXT:[2]: Latency |
| 125 | +# CHECK-NEXT:[3]: RThroughput |
| 126 | +# CHECK-NEXT:[4]: MayLoad |
| 127 | +# CHECK-NEXT:[5]: MayStore |
| 128 | +# CHECK-NEXT:[6]: HasSideEffects (U) |
| 129 | + |
| 130 | +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: |
| 131 | + |
| 132 | +# CHECK: 1 8 1.00 U v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] |
| 133 | +# CHECK-NEXT: 1 8 1.00 U v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] |
| 134 | +# CHECK-NEXT: 1 20 1.00 U v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] |
| 135 | +# CHECK-NEXT: 1 20 1.00 U v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] |
| 136 | + |
| 137 | +# CHECK: Resources: |
| 138 | +# CHECK: [0] - HWBranch |
| 139 | +# CHECK: [1] - HWExport |
| 140 | +# CHECK: [2] - HWLGKM |
| 141 | +# CHECK: [3] - HWSALU |
| 142 | +# CHECK: [4] - HWVALU |
| 143 | +# CHECK: [5] - HWVMEM |
| 144 | +# CHECK: [6] - HWXDL |
| 145 | + |
104 | 146 | # CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions:
|
105 | 147 | # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5]
|
106 | 148 | # CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5]
|
@@ -160,3 +202,23 @@ v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
|
160 | 202 | # CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9
|
161 | 203 | # CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1
|
162 | 204 | # CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11
|
| 205 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 206 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 207 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 208 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 209 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 210 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 211 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] |
| 212 | +# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] |
| 213 | +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 214 | +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 215 | +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 216 | +# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] |
| 217 | +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 218 | +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 219 | +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 220 | +# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 221 | +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 222 | +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 223 | +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
| 224 | +# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 |
0 commit comments