Skip to content

Commit 5e64801

Browse files
committed
[AMDGPU] Update.
1 parent 307a275 commit 5e64801

File tree

2 files changed

+933
-42
lines changed

2 files changed

+933
-42
lines changed

llvm/test/CodeGen/AMDGPU/bfi_int.ll

Lines changed: 67 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,9 @@ define amdgpu_kernel void @s_bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y
135135
; GFX7-NEXT: s_mov_b32 s7, 0xf000
136136
; GFX7-NEXT: s_mov_b32 s6, -1
137137
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
138-
; GFX7-NEXT: s_xor_b32 s1, s1, s2
139-
; GFX7-NEXT: s_and_b32 s0, s0, s1
140-
; GFX7-NEXT: s_xor_b32 s0, s2, s0
138+
; GFX7-NEXT: s_andn2_b32 s2, s2, s0
139+
; GFX7-NEXT: s_and_b32 s0, s1, s0
140+
; GFX7-NEXT: s_or_b32 s0, s0, s2
141141
; GFX7-NEXT: v_mov_b32_e32 v0, s0
142142
; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0
143143
; GFX7-NEXT: s_endpgm
@@ -147,9 +147,9 @@ define amdgpu_kernel void @s_bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y
147147
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
148148
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
149149
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
150-
; GFX8-NEXT: s_xor_b32 s1, s1, s2
151-
; GFX8-NEXT: s_and_b32 s0, s0, s1
152-
; GFX8-NEXT: s_xor_b32 s0, s2, s0
150+
; GFX8-NEXT: s_andn2_b32 s2, s2, s0
151+
; GFX8-NEXT: s_and_b32 s0, s1, s0
152+
; GFX8-NEXT: s_or_b32 s0, s0, s2
153153
; GFX8-NEXT: v_mov_b32_e32 v0, s4
154154
; GFX8-NEXT: v_mov_b32_e32 v1, s5
155155
; GFX8-NEXT: v_mov_b32_e32 v2, s0
@@ -163,9 +163,9 @@ define amdgpu_kernel void @s_bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y
163163
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
164164
; GFX10-NEXT: v_mov_b32_e32 v0, 0
165165
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
166-
; GFX10-NEXT: s_xor_b32 s1, s1, s2
167-
; GFX10-NEXT: s_and_b32 s0, s0, s1
168-
; GFX10-NEXT: s_xor_b32 s0, s2, s0
166+
; GFX10-NEXT: s_andn2_b32 s2, s2, s0
167+
; GFX10-NEXT: s_and_b32 s0, s1, s0
168+
; GFX10-NEXT: s_or_b32 s0, s0, s2
169169
; GFX10-NEXT: v_mov_b32_e32 v1, s0
170170
; GFX10-NEXT: global_store_dword v0, v1, s[4:5]
171171
; GFX10-NEXT: s_endpgm
@@ -317,19 +317,26 @@ entry:
317317
define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) {
318318
; GFX7-LABEL: s_s_v_bfi_sha256_ch:
319319
; GFX7: ; %bb.0: ; %entry
320-
; GFX7-NEXT: v_mov_b32_e32 v1, s0
321-
; GFX7-NEXT: v_bfi_b32 v0, v1, s1, v0
320+
; GFX7-NEXT: s_not_b32 s1, s1
321+
; GFX7-NEXT: v_or_b32_e32 v0, s0, v0
322+
; GFX7-NEXT: s_nand_b32 s0, s1, s0
323+
; GFX7-NEXT: v_and_b32_e32 v0, s0, v0
322324
; GFX7-NEXT: ; return to shader part epilog
323325
;
324326
; GFX8-LABEL: s_s_v_bfi_sha256_ch:
325327
; GFX8: ; %bb.0: ; %entry
326-
; GFX8-NEXT: v_mov_b32_e32 v1, s0
327-
; GFX8-NEXT: v_bfi_b32 v0, v1, s1, v0
328+
; GFX8-NEXT: s_not_b32 s1, s1
329+
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
330+
; GFX8-NEXT: s_nand_b32 s0, s1, s0
331+
; GFX8-NEXT: v_and_b32_e32 v0, s0, v0
328332
; GFX8-NEXT: ; return to shader part epilog
329333
;
330334
; GFX10-LABEL: s_s_v_bfi_sha256_ch:
331335
; GFX10: ; %bb.0: ; %entry
332-
; GFX10-NEXT: v_bfi_b32 v0, s0, s1, v0
336+
; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
337+
; GFX10-NEXT: s_not_b32 s1, s1
338+
; GFX10-NEXT: s_nand_b32 s0, s1, s0
339+
; GFX10-NEXT: v_and_b32_e32 v0, s0, v0
333340
; GFX10-NEXT: ; return to shader part epilog
334341
;
335342
; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ch:
@@ -350,30 +357,40 @@ entry:
350357
ret float %cast
351358
}
352359

353-
define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 %z) {
360+
define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) {
354361
; GFX7-LABEL: s_v_v_bfi_sha256_ch:
355362
; GFX7: ; %bb.0: ; %entry
356-
; GFX7-NEXT: v_bfi_b32 v0, s0, v0, v1
363+
; GFX7-NEXT: s_not_b32 s1, s1
364+
; GFX7-NEXT: v_or_b32_e32 v0, s0, v0
365+
; GFX7-NEXT: s_nand_b32 s0, s1, s0
366+
; GFX7-NEXT: v_and_b32_e32 v0, s0, v0
357367
; GFX7-NEXT: ; return to shader part epilog
358368
;
359369
; GFX8-LABEL: s_v_v_bfi_sha256_ch:
360370
; GFX8: ; %bb.0: ; %entry
361-
; GFX8-NEXT: v_bfi_b32 v0, s0, v0, v1
371+
; GFX8-NEXT: s_not_b32 s1, s1
372+
; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
373+
; GFX8-NEXT: s_nand_b32 s0, s1, s0
374+
; GFX8-NEXT: v_and_b32_e32 v0, s0, v0
362375
; GFX8-NEXT: ; return to shader part epilog
363376
;
364377
; GFX10-LABEL: s_v_v_bfi_sha256_ch:
365378
; GFX10: ; %bb.0: ; %entry
366-
; GFX10-NEXT: v_bfi_b32 v0, s0, v0, v1
379+
; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
380+
; GFX10-NEXT: s_not_b32 s1, s1
381+
; GFX10-NEXT: s_nand_b32 s0, s1, s0
382+
; GFX10-NEXT: v_and_b32_e32 v0, s0, v0
367383
; GFX10-NEXT: ; return to shader part epilog
368384
;
369385
; GFX8-GISEL-LABEL: s_v_v_bfi_sha256_ch:
370386
; GFX8-GISEL: ; %bb.0: ; %entry
371-
; GFX8-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1
387+
; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s0
388+
; GFX8-GISEL-NEXT: v_bfi_b32 v0, v1, s1, v0
372389
; GFX8-GISEL-NEXT: ; return to shader part epilog
373390
;
374391
; GFX10-GISEL-LABEL: s_v_v_bfi_sha256_ch:
375392
; GFX10-GISEL: ; %bb.0: ; %entry
376-
; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, v0, v1
393+
; GFX10-GISEL-NEXT: v_bfi_b32 v0, s0, s1, v0
377394
; GFX10-GISEL-NEXT: ; return to shader part epilog
378395
entry:
379396
%xor0 = xor i32 %y, %z
@@ -1008,24 +1025,32 @@ define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_1(i64 %a, i64 inreg %b, i6
10081025
define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_1(i64 inreg %a, i64 inreg %b, i64 %mask) {
10091026
; GFX7-LABEL: s_s_v_bitselect_i64_pat_1:
10101027
; GFX7: ; %bb.0:
1011-
; GFX7-NEXT: v_mov_b32_e32 v2, s1
1012-
; GFX7-NEXT: v_bfi_b32 v1, s3, v2, v1
1013-
; GFX7-NEXT: v_mov_b32_e32 v2, s0
1014-
; GFX7-NEXT: v_bfi_b32 v0, s2, v2, v0
1028+
; GFX7-NEXT: s_not_b64 s[0:1], s[0:1]
1029+
; GFX7-NEXT: v_or_b32_e32 v1, s3, v1
1030+
; GFX7-NEXT: v_or_b32_e32 v0, s2, v0
1031+
; GFX7-NEXT: s_nand_b64 s[0:1], s[0:1], s[2:3]
1032+
; GFX7-NEXT: v_and_b32_e32 v1, s1, v1
1033+
; GFX7-NEXT: v_and_b32_e32 v0, s0, v0
10151034
; GFX7-NEXT: ; return to shader part epilog
10161035
;
10171036
; GFX8-LABEL: s_s_v_bitselect_i64_pat_1:
10181037
; GFX8: ; %bb.0:
1019-
; GFX8-NEXT: v_mov_b32_e32 v2, s1
1020-
; GFX8-NEXT: v_bfi_b32 v1, s3, v2, v1
1021-
; GFX8-NEXT: v_mov_b32_e32 v2, s0
1022-
; GFX8-NEXT: v_bfi_b32 v0, s2, v2, v0
1038+
; GFX8-NEXT: s_not_b64 s[0:1], s[0:1]
1039+
; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
1040+
; GFX8-NEXT: v_or_b32_e32 v0, s2, v0
1041+
; GFX8-NEXT: s_nand_b64 s[0:1], s[0:1], s[2:3]
1042+
; GFX8-NEXT: v_and_b32_e32 v1, s1, v1
1043+
; GFX8-NEXT: v_and_b32_e32 v0, s0, v0
10231044
; GFX8-NEXT: ; return to shader part epilog
10241045
;
10251046
; GFX10-LABEL: s_s_v_bitselect_i64_pat_1:
10261047
; GFX10: ; %bb.0:
1027-
; GFX10-NEXT: v_bfi_b32 v0, s2, s0, v0
1028-
; GFX10-NEXT: v_bfi_b32 v1, s3, s1, v1
1048+
; GFX10-NEXT: v_or_b32_e32 v1, s3, v1
1049+
; GFX10-NEXT: v_or_b32_e32 v0, s2, v0
1050+
; GFX10-NEXT: s_not_b64 s[0:1], s[0:1]
1051+
; GFX10-NEXT: s_nand_b64 s[0:1], s[0:1], s[2:3]
1052+
; GFX10-NEXT: v_and_b32_e32 v0, s0, v0
1053+
; GFX10-NEXT: v_and_b32_e32 v1, s1, v1
10291054
; GFX10-NEXT: ; return to shader part epilog
10301055
;
10311056
; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_1:
@@ -1495,9 +1520,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
14951520
; GFX7-NEXT: s_mov_b32 s7, 0xf000
14961521
; GFX7-NEXT: s_mov_b32 s6, -1
14971522
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1498-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
14991523
; GFX7-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1500-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1524+
; GFX7-NEXT: s_andn2_b64 s[4:5], s[4:5], s[2:3]
1525+
; GFX7-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
15011526
; GFX7-NEXT: s_add_u32 s0, s0, 10
15021527
; GFX7-NEXT: s_addc_u32 s1, s1, 0
15031528
; GFX7-NEXT: v_mov_b32_e32 v0, s0
@@ -1510,9 +1535,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
15101535
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
15111536
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
15121537
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1513-
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
15141538
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1515-
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1539+
; GFX8-NEXT: s_andn2_b64 s[4:5], s[4:5], s[2:3]
1540+
; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
15161541
; GFX8-NEXT: s_add_u32 s0, s0, 10
15171542
; GFX8-NEXT: s_addc_u32 s1, s1, 0
15181543
; GFX8-NEXT: v_mov_b32_e32 v0, s0
@@ -1526,9 +1551,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
15261551
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
15271552
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
15281553
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1529-
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
15301554
; GFX10-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1531-
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1555+
; GFX10-NEXT: s_andn2_b64 s[4:5], s[4:5], s[2:3]
1556+
; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
15321557
; GFX10-NEXT: s_add_u32 s0, s0, 10
15331558
; GFX10-NEXT: s_addc_u32 s1, s1, 0
15341559
; GFX10-NEXT: v_mov_b32_e32 v0, s0
@@ -1583,9 +1608,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
15831608
; GFX7-NEXT: s_mov_b32 s7, 0xf000
15841609
; GFX7-NEXT: s_mov_b32 s6, -1
15851610
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
1586-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
15871611
; GFX7-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1588-
; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1612+
; GFX7-NEXT: s_andn2_b64 s[4:5], s[4:5], s[2:3]
1613+
; GFX7-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
15891614
; GFX7-NEXT: s_add_u32 s0, s0, 10
15901615
; GFX7-NEXT: s_addc_u32 s1, s1, 0
15911616
; GFX7-NEXT: v_mov_b32_e32 v0, s0
@@ -1598,9 +1623,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
15981623
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
15991624
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
16001625
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
1601-
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
16021626
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1603-
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1627+
; GFX8-NEXT: s_andn2_b64 s[4:5], s[4:5], s[2:3]
1628+
; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
16041629
; GFX8-NEXT: s_add_u32 s0, s0, 10
16051630
; GFX8-NEXT: s_addc_u32 s1, s1, 0
16061631
; GFX8-NEXT: v_mov_b32_e32 v0, s0
@@ -1614,9 +1639,9 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
16141639
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
16151640
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
16161641
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1617-
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
16181642
; GFX10-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
1619-
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
1643+
; GFX10-NEXT: s_andn2_b64 s[4:5], s[4:5], s[2:3]
1644+
; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
16201645
; GFX10-NEXT: s_add_u32 s0, s0, 10
16211646
; GFX10-NEXT: s_addc_u32 s1, s1, 0
16221647
; GFX10-NEXT: v_mov_b32_e32 v0, s0

0 commit comments

Comments
 (0)