|
8 | 8 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
|
9 | 9 | ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
|
10 | 10 |
|
11 |
| -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s |
12 |
| -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s |
| 11 | +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG,GFX900-SDAG,GFX900 %s |
| 12 | +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL,GFX900-GISEL,GFX900 %s |
13 | 13 |
|
14 | 14 | ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-SDAG,GFX90A-SDAG %s
|
15 | 15 | ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-GISEL,GFX90A-GISEL %s
|
@@ -5482,23 +5482,41 @@ define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %ar
|
5482 | 5482 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v1
|
5483 | 5483 | ; GFX8-NEXT: s_setpc_b64 s[30:31]
|
5484 | 5484 | ;
|
5485 |
| -; GFX900-LABEL: v_multi_use_mul_chain_add_other_use_all: |
5486 |
| -; GFX900: ; %bb.0: ; %bb |
5487 |
| -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
5488 |
| -; GFX900-NEXT: v_add_u32_e32 v0, 1, v0 |
5489 |
| -; GFX900-NEXT: v_mul_lo_u32 v2, v0, v1 |
5490 |
| -; GFX900-NEXT: v_add_u32_e32 v0, v2, v0 |
5491 |
| -; GFX900-NEXT: v_mul_lo_u32 v0, v0, v1 |
5492 |
| -; GFX900-NEXT: v_add_u32_e32 v1, 1, v2 |
5493 |
| -; GFX900-NEXT: v_mul_lo_u32 v5, v0, v1 |
5494 |
| -; GFX900-NEXT: global_store_dword v[3:4], v2, off |
5495 |
| -; GFX900-NEXT: s_waitcnt vmcnt(0) |
5496 |
| -; GFX900-NEXT: global_store_dword v[3:4], v0, off |
5497 |
| -; GFX900-NEXT: s_waitcnt vmcnt(0) |
5498 |
| -; GFX900-NEXT: global_store_dword v[3:4], v5, off |
5499 |
| -; GFX900-NEXT: s_waitcnt vmcnt(0) |
5500 |
| -; GFX900-NEXT: v_add_u32_e32 v0, v5, v1 |
5501 |
| -; GFX900-NEXT: s_setpc_b64 s[30:31] |
| 5485 | +; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all: |
| 5486 | +; GFX900-SDAG: ; %bb.0: ; %bb |
| 5487 | +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 5488 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 |
| 5489 | +; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 |
| 5490 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 |
| 5491 | +; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 |
| 5492 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 |
| 5493 | +; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1 |
| 5494 | +; GFX900-SDAG-NEXT: global_store_dword v[3:4], v2, off |
| 5495 | +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) |
| 5496 | +; GFX900-SDAG-NEXT: global_store_dword v[3:4], v0, off |
| 5497 | +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) |
| 5498 | +; GFX900-SDAG-NEXT: global_store_dword v[3:4], v5, off |
| 5499 | +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) |
| 5500 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v5, v1 |
| 5501 | +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 5502 | +; |
| 5503 | +; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all: |
| 5504 | +; GFX900-GISEL: ; %bb.0: ; %bb |
| 5505 | +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 5506 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| 5507 | +; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 |
| 5508 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 |
| 5509 | +; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 |
| 5510 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 |
| 5511 | +; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1 |
| 5512 | +; GFX900-GISEL-NEXT: global_store_dword v[3:4], v2, off |
| 5513 | +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 5514 | +; GFX900-GISEL-NEXT: global_store_dword v[3:4], v0, off |
| 5515 | +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 5516 | +; GFX900-GISEL-NEXT: global_store_dword v[3:4], v5, off |
| 5517 | +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 5518 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v5, v1 |
| 5519 | +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
5502 | 5520 | ;
|
5503 | 5521 | ; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
|
5504 | 5522 | ; GFX90A-SDAG: ; %bb.0: ; %bb
|
@@ -5686,21 +5704,37 @@ define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %a
|
5686 | 5704 | ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
|
5687 | 5705 | ; GFX8-NEXT: s_setpc_b64 s[30:31]
|
5688 | 5706 | ;
|
5689 |
| -; GFX900-LABEL: v_multi_use_mul_chain_add_other_use_some: |
5690 |
| -; GFX900: ; %bb.0: ; %bb |
5691 |
| -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
5692 |
| -; GFX900-NEXT: v_add_u32_e32 v0, 1, v0 |
5693 |
| -; GFX900-NEXT: v_mul_lo_u32 v2, v0, v1 |
5694 |
| -; GFX900-NEXT: v_add_u32_e32 v0, v2, v0 |
5695 |
| -; GFX900-NEXT: v_mul_lo_u32 v0, v0, v1 |
5696 |
| -; GFX900-NEXT: v_add_u32_e32 v1, 1, v2 |
5697 |
| -; GFX900-NEXT: v_mul_lo_u32 v0, v0, v1 |
5698 |
| -; GFX900-NEXT: global_store_dword v[3:4], v2, off |
5699 |
| -; GFX900-NEXT: s_waitcnt vmcnt(0) |
5700 |
| -; GFX900-NEXT: global_store_dword v[3:4], v0, off |
5701 |
| -; GFX900-NEXT: s_waitcnt vmcnt(0) |
5702 |
| -; GFX900-NEXT: v_add_u32_e32 v0, v0, v1 |
5703 |
| -; GFX900-NEXT: s_setpc_b64 s[30:31] |
| 5707 | +; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some: |
| 5708 | +; GFX900-SDAG: ; %bb.0: ; %bb |
| 5709 | +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 5710 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0 |
| 5711 | +; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1 |
| 5712 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0 |
| 5713 | +; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 |
| 5714 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2 |
| 5715 | +; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1 |
| 5716 | +; GFX900-SDAG-NEXT: global_store_dword v[3:4], v2, off |
| 5717 | +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) |
| 5718 | +; GFX900-SDAG-NEXT: global_store_dword v[3:4], v0, off |
| 5719 | +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) |
| 5720 | +; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v0, v1 |
| 5721 | +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 5722 | +; |
| 5723 | +; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some: |
| 5724 | +; GFX900-GISEL: ; %bb.0: ; %bb |
| 5725 | +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 5726 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0 |
| 5727 | +; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1 |
| 5728 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0 |
| 5729 | +; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 |
| 5730 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2 |
| 5731 | +; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1 |
| 5732 | +; GFX900-GISEL-NEXT: global_store_dword v[3:4], v2, off |
| 5733 | +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 5734 | +; GFX900-GISEL-NEXT: global_store_dword v[3:4], v0, off |
| 5735 | +; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) |
| 5736 | +; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v0, v1 |
| 5737 | +; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] |
5704 | 5738 | ;
|
5705 | 5739 | ; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
|
5706 | 5740 | ; GFX90A-SDAG: ; %bb.0: ; %bb
|
@@ -8291,7 +8325,102 @@ entry:
|
8291 | 8325 | ret <2 x i16> %add0
|
8292 | 8326 | }
|
8293 | 8327 |
|
| 8328 | +define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) { |
| 8329 | +; GFX67-LABEL: mul_u24_add64: |
| 8330 | +; GFX67: ; %bb.0: |
| 8331 | +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8332 | +; GFX67-NEXT: v_mul_hi_u32_u24_e32 v4, v0, v1 |
| 8333 | +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| 8334 | +; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v2 |
| 8335 | +; GFX67-NEXT: v_addc_u32_e32 v1, vcc, v4, v3, vcc |
| 8336 | +; GFX67-NEXT: s_setpc_b64 s[30:31] |
| 8337 | +; |
| 8338 | +; GFX8-LABEL: mul_u24_add64: |
| 8339 | +; GFX8: ; %bb.0: |
| 8340 | +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8341 | +; GFX8-NEXT: v_mul_hi_u32_u24_e32 v4, v0, v1 |
| 8342 | +; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| 8343 | +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| 8344 | +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v4, v3, vcc |
| 8345 | +; GFX8-NEXT: s_setpc_b64 s[30:31] |
| 8346 | +; |
| 8347 | +; GFX9-SDAG-LABEL: mul_u24_add64: |
| 8348 | +; GFX9-SDAG: ; %bb.0: |
| 8349 | +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8350 | +; GFX9-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3] |
| 8351 | +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 8352 | +; |
| 8353 | +; GFX9-GISEL-LABEL: mul_u24_add64: |
| 8354 | +; GFX9-GISEL: ; %bb.0: |
| 8355 | +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8356 | +; GFX9-GISEL-NEXT: v_mul_hi_u32_u24_e32 v4, v0, v1 |
| 8357 | +; GFX9-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| 8358 | +; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| 8359 | +; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v4, v3, vcc |
| 8360 | +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] |
| 8361 | +; |
| 8362 | +; GFX10-SDAG-LABEL: mul_u24_add64: |
| 8363 | +; GFX10-SDAG: ; %bb.0: |
| 8364 | +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8365 | +; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] |
| 8366 | +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] |
| 8367 | +; |
| 8368 | +; GFX10-GISEL-LABEL: mul_u24_add64: |
| 8369 | +; GFX10-GISEL: ; %bb.0: |
| 8370 | +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8371 | +; GFX10-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1 |
| 8372 | +; GFX10-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1 |
| 8373 | +; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2 |
| 8374 | +; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo |
| 8375 | +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] |
| 8376 | + %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y) |
| 8377 | + %add = add i64 %mul, %z |
| 8378 | + ret i64 %add |
| 8379 | +} |
| 8380 | + |
| 8381 | +define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) { |
| 8382 | +; GFX67-LABEL: mul_u24_zext_add64: |
| 8383 | +; GFX67: ; %bb.0: |
| 8384 | +; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8385 | +; GFX67-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| 8386 | +; GFX67-NEXT: v_add_i32_e32 v0, vcc, v0, v2 |
| 8387 | +; GFX67-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| 8388 | +; GFX67-NEXT: s_setpc_b64 s[30:31] |
| 8389 | +; |
| 8390 | +; GFX8-LABEL: mul_u24_zext_add64: |
| 8391 | +; GFX8: ; %bb.0: |
| 8392 | +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8393 | +; GFX8-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| 8394 | +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 |
| 8395 | +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v3, vcc |
| 8396 | +; GFX8-NEXT: s_setpc_b64 s[30:31] |
| 8397 | +; |
| 8398 | +; GFX9-LABEL: mul_u24_zext_add64: |
| 8399 | +; GFX9: ; %bb.0: |
| 8400 | +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8401 | +; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| 8402 | +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 |
| 8403 | +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc |
| 8404 | +; GFX9-NEXT: s_setpc_b64 s[30:31] |
| 8405 | +; |
| 8406 | +; GFX10-LABEL: mul_u24_zext_add64: |
| 8407 | +; GFX10: ; %bb.0: |
| 8408 | +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 8409 | +; GFX10-NEXT: v_mul_u32_u24_e32 v0, v0, v1 |
| 8410 | +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 |
| 8411 | +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo |
| 8412 | +; GFX10-NEXT: s_setpc_b64 s[30:31] |
| 8413 | + %mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y) |
| 8414 | + %mul.zext = zext i32 %mul to i64 |
| 8415 | + %add = add i64 %mul.zext, %z |
| 8416 | + ret i64 %add |
| 8417 | +} |
| 8418 | + |
| 8419 | +declare i64 @llvm.amdgcn.mul.u24.i64(i32, i32) |
| 8420 | +declare i32 @llvm.amdgcn.mul.u24(i32, i32) |
| 8421 | + |
8294 | 8422 | ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
8295 | 8423 | ; GFX6: {{.*}}
|
8296 | 8424 | ; GFX7: {{.*}}
|
| 8425 | +; GFX900: {{.*}} |
8297 | 8426 | ; GFX90A: {{.*}}
|
0 commit comments