|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
| 2 | +; RUN: opt -passes=slp-vectorizer -mtriple=x86_64 -mcpu=k8 -mattr=+sse4.1 -S < %s | FileCheck %s |
| 3 | + |
| 4 | +define void @foo(ptr %ptr) { |
| 5 | +; CHECK-LABEL: define void @foo( |
| 6 | +; CHECK-SAME: ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { |
| 7 | +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 328 |
| 8 | +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 334 |
| 9 | +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[GEP0]], align 8 |
| 10 | +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], <i16 -1, i16 -1> |
| 11 | +; CHECK-NEXT: [[TMP4:%.*]] = sitofp <2 x i16> [[TMP3]] to <2 x double> |
| 12 | +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i16>, ptr [[GEP3]], align 2 |
| 13 | +; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i32> |
| 14 | +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> |
| 15 | +; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP6]], [[TMP2]] |
| 16 | +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <2 x i32> [[TMP7]] to <2 x double> |
| 17 | +; CHECK-NEXT: [[TMP9:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP8]] |
| 18 | +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0 |
| 19 | +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1 |
| 20 | +; CHECK-NEXT: [[FCMP:%.*]] = fcmp olt double [[TMP11]], [[TMP10]] |
| 21 | +; CHECK-NEXT: ret void |
| 22 | +; |
| 23 | + %gep0 = getelementptr inbounds i8, ptr %ptr, i64 328 |
| 24 | + %gep1 = getelementptr inbounds i8, ptr %ptr, i64 330 |
| 25 | + |
| 26 | + %gep3 = getelementptr inbounds i8, ptr %ptr, i64 334 |
| 27 | + %gep4 = getelementptr inbounds i8, ptr %ptr, i64 336 |
| 28 | + |
| 29 | + %ld0 = load i16, ptr %gep0, align 8 |
| 30 | + %ld1 = load i16, ptr %gep1, align 2 |
| 31 | + |
| 32 | + %zext0 = zext i16 %ld0 to i32 |
| 33 | + %zext1 = zext i16 %ld1 to i32 |
| 34 | + |
| 35 | + %xor0 = xor i32 %zext0, 65535 |
| 36 | + %xor1 = xor i32 %zext1, 65535 |
| 37 | + |
| 38 | + %sitofp0 = sitofp i32 %xor0 to double |
| 39 | + %sitofp1 = sitofp i32 %xor1 to double |
| 40 | + |
| 41 | + %ld3 = load i16, ptr %gep3, align 2 |
| 42 | + %ld4 = load i16, ptr %gep4, align 8 |
| 43 | + |
| 44 | + %zext3 = zext i16 %ld3 to i32 |
| 45 | + %zext4 = zext i16 %ld4 to i32 |
| 46 | + |
| 47 | + %sub30 = sub nsw i32 %zext3, %zext0 |
| 48 | + %sub41 = sub nsw i32 %zext4, %zext1 |
| 49 | + |
| 50 | + %sitofp30 = sitofp i32 %sub30 to double |
| 51 | + %sitofp41 = sitofp i32 %sub41 to double |
| 52 | + |
| 53 | + %fdiv030 = fdiv double %sitofp0, %sitofp30 |
| 54 | + %fdiv141 = fdiv double %sitofp1, %sitofp41 |
| 55 | + %fcmp = fcmp olt double %fdiv141, %fdiv030 |
| 56 | + ret void |
| 57 | +} |
0 commit comments