@@ -38,13 +38,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
38
38
; SLM-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0
39
39
; SLM-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3
40
40
; SLM-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
41
- ; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
42
- ; SLM-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]])
41
+ ; SLM-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32( <8 x float> [[A]])
42
+ ; SLM-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
43
43
; SLM-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
44
- ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
45
- ; SLM-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
46
- ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
47
- ; SLM-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
44
+ ; SLM-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.ceil.v8f32( <8 x float> [[A]])
45
+ ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
46
+ ; SLM-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.floor.v8f32( <8 x float> [[A]])
47
+ ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
48
48
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
49
49
; SLM-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
50
50
; SLM-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -59,13 +59,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
59
59
; AVX-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0
60
60
; AVX-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3
61
61
; AVX-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
62
- ; AVX-NEXT: [[TMP8 :%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
63
- ; AVX-NEXT: [[TMP9:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP8]])
62
+ ; AVX-NEXT: [[TMP1 :%.*]] = call <8 x float> @llvm.floor.v8f32( <8 x float> [[A]])
63
+ ; AVX-NEXT: [[TMP9:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
64
64
; AVX-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
65
- ; AVX-NEXT: [[TMP1 :%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
66
- ; AVX-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP1]])
67
- ; AVX-NEXT: [[TMP3 :%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
68
- ; AVX-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP3]])
65
+ ; AVX-NEXT: [[TMP3 :%.*]] = call <8 x float> @llvm.ceil.v8f32( <8 x float> [[A]])
66
+ ; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
67
+ ; AVX-NEXT: [[TMP8 :%.*]] = call <8 x float> @llvm.floor.v8f32( <8 x float> [[A]])
68
+ ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
69
69
; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
70
70
; AVX-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
71
71
; AVX-NEXT: [[R2:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -80,13 +80,13 @@ define <8 x float> @ceil_floor(<8 x float> %a) {
80
80
; AVX2-NEXT: [[A0:%.*]] = extractelement <8 x float> [[A:%.*]], i64 0
81
81
; AVX2-NEXT: [[A3:%.*]] = extractelement <8 x float> [[A]], i64 3
82
82
; AVX2-NEXT: [[AB0:%.*]] = call float @llvm.ceil.f32(float [[A0]])
83
- ; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
84
- ; AVX2-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP1]])
83
+ ; AVX2-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.floor.v8f32( <8 x float> [[A]])
84
+ ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
85
85
; AVX2-NEXT: [[AB3:%.*]] = call float @llvm.ceil.f32(float [[A3]])
86
- ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
87
- ; AVX2-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.ceil.v2f32(<2 x float> [[TMP3]])
88
- ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
89
- ; AVX2-NEXT: [[TMP6:%.*]] = call <2 x float> @llvm.floor.v2f32(<2 x float> [[TMP5]])
86
+ ; AVX2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.ceil.v8f32( <8 x float> [[A]])
87
+ ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <2 x i32> <i32 4, i32 5>
88
+ ; AVX2-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.floor.v8f32( <8 x float> [[A]])
89
+ ; AVX2-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <2 x i32> <i32 6, i32 7>
90
90
; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x float> poison, float [[AB0]], i64 0
91
91
; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
92
92
; AVX2-NEXT: [[R23:%.*]] = shufflevector <8 x float> [[R0]], <8 x float> [[TMP7]], <8 x i32> <i32 0, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
0 commit comments