Skip to content

Commit b4c6607

Browse files
committed
[VectorCombine][X86] Add test showing foldShuffleOfShuffles folding shuffles that would be better separate
On AVX+ targets a broadcast load can be treated as free.
1 parent 132bf4a commit b4c6607

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
3-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
44

55
; fold to identity
66

@@ -43,3 +43,28 @@ define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) {
4343
%concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4444
ret <8 x i32> %concat
4545
}
46+
47+
define <4 x double> @blend_broadcasts_v4f64(ptr %p0, ptr %p1) {
48+
; SSE-LABEL: define <4 x double> @blend_broadcasts_v4f64(
49+
; SSE-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
50+
; SSE-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
51+
; SSE-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
52+
; SSE-NEXT: [[BCST0:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> undef, <4 x i32> zeroinitializer
53+
; SSE-NEXT: [[BCST1:%.*]] = shufflevector <4 x double> [[LD1]], <4 x double> undef, <4 x i32> zeroinitializer
54+
; SSE-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[BCST0]], <4 x double> [[BCST1]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
55+
; SSE-NEXT: ret <4 x double> [[BLEND]]
56+
;
57+
; AVX-LABEL: define <4 x double> @blend_broadcasts_v4f64(
58+
; AVX-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) #[[ATTR0]] {
59+
; AVX-NEXT: [[LD0:%.*]] = load <4 x double>, ptr [[P0]], align 32
60+
; AVX-NEXT: [[LD1:%.*]] = load <4 x double>, ptr [[P1]], align 32
61+
; AVX-NEXT: [[BLEND:%.*]] = shufflevector <4 x double> [[LD0]], <4 x double> [[LD1]], <4 x i32> <i32 0, i32 4, i32 4, i32 0>
62+
; AVX-NEXT: ret <4 x double> [[BLEND]]
63+
;
64+
%ld0 = load <4 x double>, ptr %p0, align 32
65+
%ld1 = load <4 x double>, ptr %p1, align 32
66+
%bcst0 = shufflevector <4 x double> %ld0, <4 x double> undef, <4 x i32> zeroinitializer
67+
%bcst1 = shufflevector <4 x double> %ld1, <4 x double> undef, <4 x i32> zeroinitializer
68+
%blend = shufflevector <4 x double> %bcst0, <4 x double> %bcst1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
69+
ret <4 x double> %blend
70+
}

0 commit comments

Comments
 (0)