Skip to content

Commit 1941f34

Browse files
authored
[TableGen][GISel] Import more "multi-level" patterns (#120332)
Previously, if the destination DAG has an untyped leaf, we would import the pattern only if that leaf is defined by the *top-level* source DAG. This is an unnecessary restriction. Here is an example of such pattern: ``` def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>; ``` Previously, it failed to import because `add` doesn't define neither `$vA` nor `$vB`. This change reduces the number of skipped patterns as follows: ``` AArch64: 8695 -> 8548 (-147) AMDGPU: 11333 -> 11240 (-93) ARM: 4297 -> 4278 (-1) PowerPC: 3955 -> 3010 (-945) ``` Other GISel-enabled targets are unaffected.
1 parent dd8e1ad commit 1941f34

File tree

8 files changed

+821
-1043
lines changed

8 files changed

+821
-1043
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ashr.s16.mir

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -274,24 +274,18 @@ body: |
274274
; GFX8-NEXT: {{ $}}
275275
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
276276
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
277+
; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
277278
; GFX8-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
278-
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
279-
; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
280-
; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
281-
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
282-
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
279+
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
283280
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
284281
; GFX9-LABEL: name: ashr_s16_vv_zext_to_s64
285282
; GFX9: liveins: $vgpr0, $vgpr1
286283
; GFX9-NEXT: {{ $}}
287284
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
288285
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
286+
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
289287
; GFX9-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 [[COPY1]], [[COPY]], implicit $exec
290-
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
291-
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
292-
; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_ASHRREV_I16_e64_]], implicit $exec
293-
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
294-
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
288+
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ASHRREV_I16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
295289
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
296290
; GFX10-LABEL: name: ashr_s16_vv_zext_to_s64
297291
; GFX10: liveins: $vgpr0, $vgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctpop.mir

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,8 @@ body: |
7979
; CHECK-NEXT: {{ $}}
8080
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
8181
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
82-
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
83-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
84-
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
82+
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
83+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
8584
%0:vgpr(s32) = COPY $vgpr0
8685
%1:vgpr(s32) = COPY $vgpr1
8786
%2:vgpr(s32) = G_CTPOP %0
@@ -104,9 +103,8 @@ body: |
104103
; CHECK-NEXT: {{ $}}
105104
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
106105
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
107-
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
108-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY1]], [[V_BCNT_U32_B32_e64_]], 0, implicit $exec
109-
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
106+
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
107+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
110108
%0:vgpr(s32) = COPY $vgpr0
111109
%1:vgpr(s32) = COPY $vgpr1
112110
%2:vgpr(s32) = G_CTPOP %0
@@ -155,9 +153,8 @@ body: |
155153
; CHECK-NEXT: {{ $}}
156154
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
157155
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
158-
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], 0, implicit $exec
159-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY1]], 0, implicit $exec
160-
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
156+
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
157+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
161158
%0:vgpr(s32) = COPY $vgpr0
162159
%1:sgpr(s32) = COPY $sgpr0
163160
%2:vgpr(s32) = G_CTPOP %0
@@ -181,9 +178,8 @@ body: |
181178
; CHECK-NEXT: {{ $}}
182179
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
183180
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
184-
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], 0, implicit $exec
185-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_BCNT_U32_B32_e64_]], [[COPY]], 0, implicit $exec
186-
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
181+
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY1]], [[COPY]], implicit $exec
182+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
187183
%0:vgpr(s32) = COPY $vgpr0
188184
%1:sgpr(s32) = COPY $sgpr0
189185
%2:vgpr(s32) = G_CTPOP %1
@@ -207,9 +203,8 @@ body: |
207203
; CHECK-NEXT: {{ $}}
208204
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
209205
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
210-
; CHECK-NEXT: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def dead $scc
211-
; CHECK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_BCNT1_I32_B32_]], [[COPY1]], 0, implicit $exec
212-
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
206+
; CHECK-NEXT: [[V_BCNT_U32_B32_e64_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e64 [[COPY]], [[COPY1]], implicit $exec
207+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_BCNT_U32_B32_e64_]]
213208
%0:sgpr(s32) = COPY $sgpr0
214209
%1:vgpr(s32) = COPY $vgpr0
215210
%2:sgpr(s32) = G_CTPOP %0

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-lshr.s16.mir

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -272,24 +272,18 @@ body: |
272272
; GFX8-NEXT: {{ $}}
273273
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
274274
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
275+
; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
275276
; GFX8-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
276-
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
277-
; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
278-
; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
279-
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
280-
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
277+
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
281278
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
282279
; GFX9-LABEL: name: lshr_s16_vv_zext_to_s64
283280
; GFX9: liveins: $vgpr0, $vgpr1
284281
; GFX9-NEXT: {{ $}}
285282
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
286283
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
284+
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
287285
; GFX9-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
288-
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
289-
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
290-
; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHRREV_B16_e64_]], implicit $exec
291-
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
292-
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
286+
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHRREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
293287
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
294288
; GFX10-LABEL: name: lshr_s16_vv_zext_to_s64
295289
; GFX10: liveins: $vgpr0, $vgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-shl.s16.mir

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -272,24 +272,18 @@ body: |
272272
; GFX8-NEXT: {{ $}}
273273
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
274274
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
275+
; GFX8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
275276
; GFX8-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
276-
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
277-
; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
278-
; GFX8-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
279-
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
280-
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
277+
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
281278
; GFX8-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
282279
; GFX9-LABEL: name: shl_s16_vv_zext_to_s64
283280
; GFX9: liveins: $vgpr0, $vgpr1
284281
; GFX9-NEXT: {{ $}}
285282
; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
286283
; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
284+
; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
287285
; GFX9-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 [[COPY1]], [[COPY]], implicit $exec
288-
; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
289-
; GFX9-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
290-
; GFX9-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[S_MOV_B32_1]], [[V_LSHLREV_B16_e64_]], implicit $exec
291-
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
292-
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
286+
; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_LSHLREV_B16_e64_]], %subreg.sub0, [[V_MOV_B32_e32_]], %subreg.sub1
293287
; GFX9-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
294288
; GFX10-LABEL: name: shl_s16_vv_zext_to_s64
295289
; GFX10: liveins: $vgpr0, $vgpr1

llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll

Lines changed: 118 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,37 @@ define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b)
204204
}
205205

206206
define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
207-
; GCN-LABEL: vector_xnor_i32_one_use:
208-
; GCN: ; %bb.0: ; %entry
209-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v1
211-
; GCN-NEXT: v_not_b32_e32 v0, v0
212-
; GCN-NEXT: s_setpc_b64 s[30:31]
207+
; GFX7-LABEL: vector_xnor_i32_one_use:
208+
; GFX7: ; %bb.0: ; %entry
209+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210+
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1
211+
; GFX7-NEXT: v_not_b32_e32 v0, v0
212+
; GFX7-NEXT: s_setpc_b64 s[30:31]
213+
;
214+
; GFX8-LABEL: vector_xnor_i32_one_use:
215+
; GFX8: ; %bb.0: ; %entry
216+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217+
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1
218+
; GFX8-NEXT: v_not_b32_e32 v0, v0
219+
; GFX8-NEXT: s_setpc_b64 s[30:31]
220+
;
221+
; GFX900-LABEL: vector_xnor_i32_one_use:
222+
; GFX900: ; %bb.0: ; %entry
223+
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
224+
; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1
225+
; GFX900-NEXT: v_not_b32_e32 v0, v0
226+
; GFX900-NEXT: s_setpc_b64 s[30:31]
227+
;
228+
; GFX906-LABEL: vector_xnor_i32_one_use:
229+
; GFX906: ; %bb.0: ; %entry
230+
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231+
; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1
232+
; GFX906-NEXT: s_setpc_b64 s[30:31]
213233
;
214234
; GFX10-LABEL: vector_xnor_i32_one_use:
215235
; GFX10: ; %bb.0: ; %entry
216236
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217-
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v1
218-
; GFX10-NEXT: v_not_b32_e32 v0, v0
237+
; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
219238
; GFX10-NEXT: s_setpc_b64 s[30:31]
220239
entry:
221240
%xor = xor i32 %a, %b
@@ -224,22 +243,45 @@ entry:
224243
}
225244

226245
define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
227-
; GCN-LABEL: vector_xnor_i64_one_use:
228-
; GCN: ; %bb.0: ; %entry
229-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230-
; GCN-NEXT: v_xor_b32_e32 v0, v0, v2
231-
; GCN-NEXT: v_xor_b32_e32 v1, v1, v3
232-
; GCN-NEXT: v_not_b32_e32 v0, v0
233-
; GCN-NEXT: v_not_b32_e32 v1, v1
234-
; GCN-NEXT: s_setpc_b64 s[30:31]
246+
; GFX7-LABEL: vector_xnor_i64_one_use:
247+
; GFX7: ; %bb.0: ; %entry
248+
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249+
; GFX7-NEXT: v_xor_b32_e32 v0, v0, v2
250+
; GFX7-NEXT: v_xor_b32_e32 v1, v1, v3
251+
; GFX7-NEXT: v_not_b32_e32 v0, v0
252+
; GFX7-NEXT: v_not_b32_e32 v1, v1
253+
; GFX7-NEXT: s_setpc_b64 s[30:31]
254+
;
255+
; GFX8-LABEL: vector_xnor_i64_one_use:
256+
; GFX8: ; %bb.0: ; %entry
257+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
258+
; GFX8-NEXT: v_xor_b32_e32 v0, v0, v2
259+
; GFX8-NEXT: v_xor_b32_e32 v1, v1, v3
260+
; GFX8-NEXT: v_not_b32_e32 v0, v0
261+
; GFX8-NEXT: v_not_b32_e32 v1, v1
262+
; GFX8-NEXT: s_setpc_b64 s[30:31]
263+
;
264+
; GFX900-LABEL: vector_xnor_i64_one_use:
265+
; GFX900: ; %bb.0: ; %entry
266+
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267+
; GFX900-NEXT: v_xor_b32_e32 v0, v0, v2
268+
; GFX900-NEXT: v_xor_b32_e32 v1, v1, v3
269+
; GFX900-NEXT: v_not_b32_e32 v0, v0
270+
; GFX900-NEXT: v_not_b32_e32 v1, v1
271+
; GFX900-NEXT: s_setpc_b64 s[30:31]
272+
;
273+
; GFX906-LABEL: vector_xnor_i64_one_use:
274+
; GFX906: ; %bb.0: ; %entry
275+
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276+
; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v2
277+
; GFX906-NEXT: v_xnor_b32_e32 v1, v1, v3
278+
; GFX906-NEXT: s_setpc_b64 s[30:31]
235279
;
236280
; GFX10-LABEL: vector_xnor_i64_one_use:
237281
; GFX10: ; %bb.0: ; %entry
238282
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239-
; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2
240-
; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3
241-
; GFX10-NEXT: v_not_b32_e32 v0, v0
242-
; GFX10-NEXT: v_not_b32_e32 v1, v1
283+
; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v2
284+
; GFX10-NEXT: v_xnor_b32_e32 v1, v1, v3
243285
; GFX10-NEXT: s_setpc_b64 s[30:31]
244286
entry:
245287
%xor = xor i64 %a, %b
@@ -248,16 +290,32 @@ entry:
248290
}
249291

250292
define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
251-
; GCN-LABEL: xnor_s_v_i32_one_use:
252-
; GCN: ; %bb.0:
253-
; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
254-
; GCN-NEXT: v_not_b32_e32 v0, v0
255-
; GCN-NEXT: ; return to shader part epilog
293+
; GFX7-LABEL: xnor_s_v_i32_one_use:
294+
; GFX7: ; %bb.0:
295+
; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
296+
; GFX7-NEXT: v_not_b32_e32 v0, v0
297+
; GFX7-NEXT: ; return to shader part epilog
298+
;
299+
; GFX8-LABEL: xnor_s_v_i32_one_use:
300+
; GFX8: ; %bb.0:
301+
; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
302+
; GFX8-NEXT: v_not_b32_e32 v0, v0
303+
; GFX8-NEXT: ; return to shader part epilog
304+
;
305+
; GFX900-LABEL: xnor_s_v_i32_one_use:
306+
; GFX900: ; %bb.0:
307+
; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
308+
; GFX900-NEXT: v_not_b32_e32 v0, v0
309+
; GFX900-NEXT: ; return to shader part epilog
310+
;
311+
; GFX906-LABEL: xnor_s_v_i32_one_use:
312+
; GFX906: ; %bb.0:
313+
; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
314+
; GFX906-NEXT: ; return to shader part epilog
256315
;
257316
; GFX10-LABEL: xnor_s_v_i32_one_use:
258317
; GFX10: ; %bb.0:
259-
; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
260-
; GFX10-NEXT: v_not_b32_e32 v0, v0
318+
; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
261319
; GFX10-NEXT: ; return to shader part epilog
262320
%xor = xor i32 %s, %v
263321
%d = xor i32 %xor, -1
@@ -266,16 +324,32 @@ define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
266324
}
267325

268326
define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
269-
; GCN-LABEL: xnor_v_s_i32_one_use:
270-
; GCN: ; %bb.0:
271-
; GCN-NEXT: v_xor_b32_e32 v0, s0, v0
272-
; GCN-NEXT: v_not_b32_e32 v0, v0
273-
; GCN-NEXT: ; return to shader part epilog
327+
; GFX7-LABEL: xnor_v_s_i32_one_use:
328+
; GFX7: ; %bb.0:
329+
; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0
330+
; GFX7-NEXT: v_not_b32_e32 v0, v0
331+
; GFX7-NEXT: ; return to shader part epilog
332+
;
333+
; GFX8-LABEL: xnor_v_s_i32_one_use:
334+
; GFX8: ; %bb.0:
335+
; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0
336+
; GFX8-NEXT: v_not_b32_e32 v0, v0
337+
; GFX8-NEXT: ; return to shader part epilog
338+
;
339+
; GFX900-LABEL: xnor_v_s_i32_one_use:
340+
; GFX900: ; %bb.0:
341+
; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0
342+
; GFX900-NEXT: v_not_b32_e32 v0, v0
343+
; GFX900-NEXT: ; return to shader part epilog
344+
;
345+
; GFX906-LABEL: xnor_v_s_i32_one_use:
346+
; GFX906: ; %bb.0:
347+
; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
348+
; GFX906-NEXT: ; return to shader part epilog
274349
;
275350
; GFX10-LABEL: xnor_v_s_i32_one_use:
276351
; GFX10: ; %bb.0:
277-
; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
278-
; GFX10-NEXT: v_not_b32_e32 v0, v0
352+
; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
279353
; GFX10-NEXT: ; return to shader part epilog
280354
%xor = xor i32 %v, %s
281355
%d = xor i32 %xor, -1
@@ -314,19 +388,15 @@ define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
314388
; GFX906-LABEL: xnor_i64_s_v_one_use:
315389
; GFX906: ; %bb.0: ; %entry
316390
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
317-
; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
318-
; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
319-
; GFX906-NEXT: v_not_b32_e32 v0, v0
320-
; GFX906-NEXT: v_not_b32_e32 v1, v1
391+
; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0
392+
; GFX906-NEXT: v_xnor_b32_e32 v1, s1, v1
321393
; GFX906-NEXT: ; return to shader part epilog
322394
;
323395
; GFX10-LABEL: xnor_i64_s_v_one_use:
324396
; GFX10: ; %bb.0: ; %entry
325397
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
326-
; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
327-
; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
328-
; GFX10-NEXT: v_not_b32_e32 v0, v0
329-
; GFX10-NEXT: v_not_b32_e32 v1, v1
398+
; GFX10-NEXT: v_xnor_b32_e32 v0, s0, v0
399+
; GFX10-NEXT: v_xnor_b32_e32 v1, s1, v1
330400
; GFX10-NEXT: ; return to shader part epilog
331401
entry:
332402
%b = shl i64 %b64, 29
@@ -367,19 +437,15 @@ define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
367437
; GFX906-LABEL: xnor_i64_v_s_one_use:
368438
; GFX906: ; %bb.0:
369439
; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
370-
; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0
371-
; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1
372-
; GFX906-NEXT: v_not_b32_e32 v0, v0
373-
; GFX906-NEXT: v_not_b32_e32 v1, v1
440+
; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0
441+
; GFX906-NEXT: v_xnor_b32_e64 v1, v1, s1
374442
; GFX906-NEXT: ; return to shader part epilog
375443
;
376444
; GFX10-LABEL: xnor_i64_v_s_one_use:
377445
; GFX10: ; %bb.0:
378446
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1]
379-
; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0
380-
; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1
381-
; GFX10-NEXT: v_not_b32_e32 v0, v0
382-
; GFX10-NEXT: v_not_b32_e32 v1, v1
447+
; GFX10-NEXT: v_xnor_b32_e64 v0, v0, s0
448+
; GFX10-NEXT: v_xnor_b32_e64 v1, v1, s1
383449
; GFX10-NEXT: ; return to shader part epilog
384450
%b = shl i64 %b64, 29
385451
%xor = xor i64 %b, %a
@@ -419,7 +485,7 @@ define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
419485
; GFX10-LABEL: vector_xor_na_b_i32_one_use:
420486
; GFX10: ; %bb.0: ; %entry
421487
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422-
; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1
488+
; GFX10-NEXT: v_xnor_b32_e32 v0, v0, v1
423489
; GFX10-NEXT: s_setpc_b64 s[30:31]
424490
entry:
425491
%na = xor i32 %a, -1
@@ -458,7 +524,7 @@ define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
458524
; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
459525
; GFX10: ; %bb.0: ; %entry
460526
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461-
; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0
527+
; GFX10-NEXT: v_xnor_b32_e32 v0, v1, v0
462528
; GFX10-NEXT: s_setpc_b64 s[30:31]
463529
entry:
464530
%nb = xor i32 %b, -1

0 commit comments

Comments
 (0)