|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s |
| 3 | + |
| 4 | +declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) |
| 5 | +declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) |
| 6 | +declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) |
| 7 | +declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) |
| 8 | + |
| 9 | +; |
| 10 | +; Demanded Elts |
| 11 | +; |
| 12 | + |
| 13 | + |
| 14 | +define double @elts_addsub_v2f64(<2 x double> %0, <2 x double> %1) { |
| 15 | +; CHECK-LABEL: @elts_addsub_v2f64( |
| 16 | +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP0:%.*]], <2 x double> undef, <2 x i32> zeroinitializer |
| 17 | +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP1:%.*]], <2 x double> undef, <2 x i32> zeroinitializer |
| 18 | +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> [[TMP3]], <2 x double> [[TMP4]]) |
| 19 | +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 |
| 20 | +; CHECK-NEXT: ret double [[TMP6]] |
| 21 | +; |
| 22 | + %3 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> <i32 0, i32 0> |
| 23 | + %4 = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> <i32 0, i32 0> |
| 24 | + %5 = tail call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %3, <2 x double> %4) |
| 25 | + %6 = extractelement <2 x double> %5, i32 0 |
| 26 | + ret double %6 |
| 27 | +} |
| 28 | + |
| 29 | +define float @elts_addsub_v4f32(<4 x float> %0, <4 x float> %1) { |
| 30 | +; CHECK-LABEL: @elts_addsub_v4f32( |
| 31 | +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> |
| 32 | +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> |
| 33 | +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP3]], <4 x float> [[TMP4]]) |
| 34 | +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> |
| 35 | +; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP5]], [[TMP6]] |
| 36 | +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP7]], i32 0 |
| 37 | +; CHECK-NEXT: ret float [[TMP8]] |
| 38 | +; |
| 39 | + %3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> |
| 40 | + %4 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> |
| 41 | + %5 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %3, <4 x float> %4) |
| 42 | + %6 = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> |
| 43 | + %7 = fadd <4 x float> %5, %6 |
| 44 | + %8 = extractelement <4 x float> %7, i32 0 |
| 45 | + ret float %8 |
| 46 | +} |
| 47 | + |
| 48 | +define double @elts_addsub_v4f64(<4 x double> %0, <4 x double> %1) { |
| 49 | +; CHECK-LABEL: @elts_addsub_v4f64( |
| 50 | +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP0:%.*]], <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> |
| 51 | +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP1:%.*]], <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> |
| 52 | +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> [[TMP3]], <4 x double> [[TMP4]]) |
| 53 | +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 |
| 54 | +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 |
| 55 | +; CHECK-NEXT: [[TMP8:%.*]] = fadd double [[TMP6]], [[TMP7]] |
| 56 | +; CHECK-NEXT: ret double [[TMP8]] |
| 57 | +; |
| 58 | + %3 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> |
| 59 | + %4 = shufflevector <4 x double> %1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> |
| 60 | + %5 = tail call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %3, <4 x double> %4) |
| 61 | + %6 = extractelement <4 x double> %5, i32 0 |
| 62 | + %7 = extractelement <4 x double> %5, i32 1 |
| 63 | + %8 = fadd double %6, %7 |
| 64 | + ret double %8 |
| 65 | +} |
| 66 | + |
| 67 | +define float @elts_addsub_v8f32(<8 x float> %0, <8 x float> %1) { |
| 68 | +; CHECK-LABEL: @elts_addsub_v8f32( |
| 69 | +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP0:%.*]], <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4> |
| 70 | +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1:%.*]], <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4> |
| 71 | +; CHECK-NEXT: [[TMP5:%.*]] = tail call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) |
| 72 | +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP5]], i32 0 |
| 73 | +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP5]], i32 1 |
| 74 | +; CHECK-NEXT: [[TMP8:%.*]] = fadd float [[TMP6]], [[TMP7]] |
| 75 | +; CHECK-NEXT: ret float [[TMP8]] |
| 76 | +; |
| 77 | + %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4> |
| 78 | + %4 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 4, i32 4, i32 4> |
| 79 | + %5 = tail call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %3, <8 x float> %4) |
| 80 | + %6 = extractelement <8 x float> %5, i32 0 |
| 81 | + %7 = extractelement <8 x float> %5, i32 1 |
| 82 | + %8 = fadd float %6, %7 |
| 83 | + ret float %8 |
| 84 | +} |
| 85 | + |
0 commit comments