Skip to content

Commit 4a42658

Browse files
committed
[VectorCombine][X86] shuffle-of-cmps.ll - tweak shuf_fcmp_oeq_v4i32 shuffle to be not so cheap
An upcoming patch will recognise this as a cheap INSERTPS shuffle - alter the shuffle to ensure the 2 x FCMP is still cheaper on SSE4 targets
1 parent 19c9348 commit 4a42658

File tree

1 file changed

+21
-13
lines changed

1 file changed

+21
-13
lines changed

llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
22
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
33
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
4-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
5-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
4+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
5+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512
66

77
declare void @use(<4 x i1>)
88

@@ -105,8 +105,8 @@ define <4 x i32> @shuf_icmp_ugt_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z,
105105
define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
106106
; SSE2-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
107107
; SSE2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
108-
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0>
109-
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
108+
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>
109+
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
110110
; SSE2-NEXT: [[S:%.*]] = fcmp oeq <4 x float> [[TMP1]], [[TMP2]]
111111
; SSE2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
112112
; SSE2-NEXT: ret <4 x i32> [[R]]
@@ -115,21 +115,29 @@ define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float
115115
; SSE4-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
116116
; SSE4-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
117117
; SSE4-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
118-
; SSE4-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
118+
; SSE4-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
119119
; SSE4-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
120120
; SSE4-NEXT: ret <4 x i32> [[R]]
121121
;
122-
; AVX-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
123-
; AVX-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
124-
; AVX-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
125-
; AVX-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
126-
; AVX-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
127-
; AVX-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
128-
; AVX-NEXT: ret <4 x i32> [[R]]
122+
; AVX2-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
123+
; AVX2-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
124+
; AVX2-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
125+
; AVX2-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
126+
; AVX2-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
127+
; AVX2-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
128+
; AVX2-NEXT: ret <4 x i32> [[R]]
129+
;
130+
; AVX512-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
131+
; AVX512-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
132+
; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 0>
133+
; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
134+
; AVX512-NEXT: [[S:%.*]] = fcmp oeq <4 x float> [[TMP1]], [[TMP2]]
135+
; AVX512-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
136+
; AVX512-NEXT: ret <4 x i32> [[R]]
129137
;
130138
%b0 = fcmp oeq <4 x float> %x, %y
131139
%b1 = fcmp oeq <4 x float> %x, %z
132-
%s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
140+
%s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 poison, i32 poison, i32 4, i32 0>
133141
%r = sext <4 x i1> %s to <4 x i32>
134142
ret <4 x i32> %r
135143
}

0 commit comments

Comments
 (0)