Skip to content

Commit b507509

Browse files
authored
[AArch64] Allow SVE code generation for fixed-width vectors (#67122)
This patch allows the generation of SVE code with masks that mimic Neon.
1 parent c7be2de commit b507509

File tree

8 files changed

+298
-78
lines changed

8 files changed

+298
-78
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5619,9 +5619,7 @@ SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
56195619
assert(LoadNode && "Expected custom lowering of a masked load node");
56205620
EVT VT = Op->getValueType(0);
56215621

5622-
if (useSVEForFixedLengthVectorVT(
5623-
VT,
5624-
/*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
5622+
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
56255623
return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
56265624

56275625
SDValue PassThru = LoadNode->getPassThru();

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
254254
return false;
255255

256256
// For fixed vectors, avoid scalarization if using SVE for them.
257-
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
257+
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() &&
258+
DataType->getPrimitiveSizeInBits() != 128)
258259
return false; // Fall back to scalarization of masked operations.
259260

260261
return isElementTypeLegalForScalableVector(DataType->getScalarType());
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mattr=+sve < %s | FileCheck %s
3+
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
;
8+
; Masked Load
9+
;
10+
11+
define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
12+
; CHECK-LABEL: masked_load_v16i8:
13+
; CHECK: // %bb.0:
14+
; CHECK-NEXT: ptrue p0.b, vl16
15+
; CHECK-NEXT: shl v0.16b, v0.16b, #7
16+
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
17+
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
18+
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
19+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
20+
; CHECK-NEXT: ret
21+
%load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer)
22+
ret <16 x i8> %load
23+
}
24+
25+
define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
26+
; CHECK-LABEL: masked_load_v8f16:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
29+
; CHECK-NEXT: ptrue p0.h, vl8
30+
; CHECK-NEXT: shl v0.8h, v0.8h, #15
31+
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
32+
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
33+
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
34+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
35+
; CHECK-NEXT: ret
36+
%load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer)
37+
ret <8 x half> %load
38+
}
39+
40+
define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
41+
; CHECK-LABEL: masked_load_v4f32:
42+
; CHECK: // %bb.0:
43+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
44+
; CHECK-NEXT: ptrue p0.s, vl4
45+
; CHECK-NEXT: shl v0.4s, v0.4s, #31
46+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
47+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
48+
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
49+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
50+
; CHECK-NEXT: ret
51+
%load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
52+
ret <4 x float> %load
53+
}
54+
55+
define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
56+
; CHECK-LABEL: masked_load_v2f64:
57+
; CHECK: // %bb.0:
58+
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
59+
; CHECK-NEXT: ptrue p0.d, vl2
60+
; CHECK-NEXT: shl v0.2d, v0.2d, #63
61+
; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
62+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
63+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
64+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
65+
; CHECK-NEXT: ret
66+
%load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer)
67+
ret <2 x double> %load
68+
}
69+
70+
define <2 x double> @masked_load_passthru_v2f64(ptr %src, <2 x i1> %mask, <2 x double> %passthru) {
71+
; CHECK-LABEL: masked_load_passthru_v2f64:
72+
; CHECK: // %bb.0:
73+
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
74+
; CHECK-NEXT: ptrue p0.d, vl2
75+
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
76+
; CHECK-NEXT: shl v0.2d, v0.2d, #63
77+
; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
78+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
79+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
80+
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
81+
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
82+
; CHECK-NEXT: ret
83+
%load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> %passthru)
84+
ret <2 x double> %load
85+
}
86+
87+
declare <16 x i8> @llvm.masked.load.v16i8(ptr, i32, <16 x i1>, <16 x i8>)
88+
declare <8 x half> @llvm.masked.load.v8f16(ptr, i32, <8 x i1>, <8 x half>)
89+
declare <4 x float> @llvm.masked.load.v4f32(ptr, i32, <4 x i1>, <4 x float>)
90+
declare <2 x double> @llvm.masked.load.v2f64(ptr, i32, <2 x i1>, <2 x double>)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mattr=+sve < %s | FileCheck %s
3+
4+
5+
target triple = "aarch64-unknown-linux-gnu"
6+
7+
;
8+
; Masked Store
9+
;
10+
11+
define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
12+
; CHECK-LABEL: masked_store_v16i8:
13+
; CHECK: // %bb.0:
14+
; CHECK-NEXT: ptrue p0.b, vl16
15+
; CHECK-NEXT: shl v0.16b, v0.16b, #7
16+
; CHECK-NEXT: movi v1.2d, #0000000000000000
17+
; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
18+
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
19+
; CHECK-NEXT: st1b { z1.b }, p0, [x0]
20+
; CHECK-NEXT: ret
21+
call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
22+
ret void
23+
}
24+
25+
define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
26+
; CHECK-LABEL: masked_store_v8f16:
27+
; CHECK: // %bb.0:
28+
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
29+
; CHECK-NEXT: ptrue p0.h, vl8
30+
; CHECK-NEXT: movi v1.2d, #0000000000000000
31+
; CHECK-NEXT: shl v0.8h, v0.8h, #15
32+
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
33+
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
34+
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
35+
; CHECK-NEXT: ret
36+
call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
37+
ret void
38+
}
39+
40+
define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
41+
; CHECK-LABEL: masked_store_v4f32:
42+
; CHECK: // %bb.0:
43+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
44+
; CHECK-NEXT: ptrue p0.s, vl4
45+
; CHECK-NEXT: movi v1.2d, #0000000000000000
46+
; CHECK-NEXT: shl v0.4s, v0.4s, #31
47+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
48+
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
49+
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
50+
; CHECK-NEXT: ret
51+
call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
52+
ret void
53+
}
54+
55+
define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
56+
; CHECK-LABEL: masked_store_v2f64:
57+
; CHECK: // %bb.0:
58+
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
59+
; CHECK-NEXT: ptrue p0.d, vl2
60+
; CHECK-NEXT: movi v1.2d, #0000000000000000
61+
; CHECK-NEXT: shl v0.2d, v0.2d, #63
62+
; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
63+
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
64+
; CHECK-NEXT: st1d { z1.d }, p0, [x0]
65+
; CHECK-NEXT: ret
66+
call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
67+
ret void
68+
}
69+
70+
declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>)
71+
declare void @llvm.masked.store.v8f16(<8 x half>, ptr, i32, <8 x i1>)
72+
declare void @llvm.masked.store.v4f32(<4 x float>, ptr, i32, <4 x i1>)
73+
declare void @llvm.masked.store.v2f64(<2 x double>, ptr, i32, <2 x i1>)

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
5050
ret <2 x float> %load
5151
}
5252

53-
define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
53+
define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
5454
; CHECK-LABEL: masked_load_v4f32:
5555
; CHECK: // %bb.0:
5656
; CHECK-NEXT: ptrue p0.s, vl4

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ define void @masked_store_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
4848
ret void
4949
}
5050

51-
define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
51+
define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
5252
; CHECK-LABEL: masked_store_v4f32:
5353
; CHECK: // %bb.0:
5454
; CHECK-NEXT: ptrue p0.s, vl4

llvm/test/Transforms/ScalarizeMaskedMemIntrin/AArch64/expand-masked-load.ll

Lines changed: 88 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK,CHECK-LE %s
3-
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK,CHECK-LE %s
4-
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK,CHECK-BE %s
2+
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu | FileCheck -check-prefixes=CHECK-LE-COMMON,CHECK-LE %s
3+
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck -check-prefixes=CHECK-LE-COMMON,CHECK-LE-SVE %s
4+
; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=aarch64_be-linux-gnu -data-layout="E-m:o-i64:64-i128:128-n32:64-S128" | FileCheck -check-prefixes=CHECK-BE %s
55

66
define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
77
; CHECK-LE-LABEL: @scalarize_v2i64(
@@ -28,6 +28,10 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
2828
; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i64> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
2929
; CHECK-LE-NEXT: ret <2 x i64> [[RES_PHI_ELSE3]]
3030
;
31+
; CHECK-LE-SVE-LABEL: @scalarize_v2i64(
32+
; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 128, <2 x i1> [[MASK:%.*]], <2 x i64> [[PASSTHRU:%.*]])
33+
; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
34+
;
3135
; CHECK-BE-LABEL: @scalarize_v2i64(
3236
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
3337
; CHECK-BE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], -2
@@ -57,58 +61,83 @@ define <2 x i64> @scalarize_v2i64(ptr %p, <2 x i1> %mask, <2 x i64> %passthru) {
5761
}
5862

5963
define <2 x i64> @scalarize_v2i64_ones_mask(ptr %p, <2 x i64> %passthru) {
60-
; CHECK-LABEL: @scalarize_v2i64_ones_mask(
61-
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
62-
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
64+
; CHECK-LE-LABEL: @scalarize_v2i64_ones_mask(
65+
; CHECK-LE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
66+
; CHECK-LE-NEXT: ret <2 x i64> [[TMP1]]
67+
;
68+
; CHECK-LE-SVE-LABEL: @scalarize_v2i64_ones_mask(
69+
; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
70+
; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
71+
;
72+
; CHECK-BE-LABEL: @scalarize_v2i64_ones_mask(
73+
; CHECK-BE-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 8
74+
; CHECK-BE-NEXT: ret <2 x i64> [[TMP1]]
6375
;
6476
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %passthru)
6577
ret <2 x i64> %ret
6678
}
6779

6880
define <2 x i64> @scalarize_v2i64_zero_mask(ptr %p, <2 x i64> %passthru) {
69-
; CHECK-LABEL: @scalarize_v2i64_zero_mask(
70-
; CHECK-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
81+
; CHECK-LE-LABEL: @scalarize_v2i64_zero_mask(
82+
; CHECK-LE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
83+
;
84+
; CHECK-LE-SVE-LABEL: @scalarize_v2i64_zero_mask(
85+
; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> zeroinitializer, <2 x i64> [[PASSTHRU:%.*]])
86+
; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
87+
;
88+
; CHECK-BE-LABEL: @scalarize_v2i64_zero_mask(
89+
; CHECK-BE-NEXT: ret <2 x i64> [[PASSTHRU:%.*]]
7190
;
7291
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> <i1 false, i1 false>, <2 x i64> %passthru)
7392
ret <2 x i64> %ret
7493
}
7594

7695
define <2 x i64> @scalarize_v2i64_const_mask(ptr %p, <2 x i64> %passthru) {
77-
; CHECK-LABEL: @scalarize_v2i64_const_mask(
78-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
79-
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
80-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
81-
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
96+
; CHECK-LE-LABEL: @scalarize_v2i64_const_mask(
97+
; CHECK-LE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
98+
; CHECK-LE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
99+
; CHECK-LE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
100+
; CHECK-LE-NEXT: ret <2 x i64> [[TMP3]]
101+
;
102+
; CHECK-LE-SVE-LABEL: @scalarize_v2i64_const_mask(
103+
; CHECK-LE-SVE-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[P:%.*]], i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> [[PASSTHRU:%.*]])
104+
; CHECK-LE-SVE-NEXT: ret <2 x i64> [[RET]]
105+
;
106+
; CHECK-BE-LABEL: @scalarize_v2i64_const_mask(
107+
; CHECK-BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i32 1
108+
; CHECK-BE-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
109+
; CHECK-BE-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[PASSTHRU:%.*]], i64 [[TMP2]], i64 1
110+
; CHECK-BE-NEXT: ret <2 x i64> [[TMP3]]
82111
;
83112
%ret = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %p, i32 8, <2 x i1> <i1 false, i1 true>, <2 x i64> %passthru)
84113
ret <2 x i64> %ret
85114
}
86115

87116
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
88117
define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
89-
; CHECK-LE-LABEL: @scalarize_v2i24(
90-
; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
91-
; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
92-
; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
93-
; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
94-
; CHECK-LE: cond.load:
95-
; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
96-
; CHECK-LE-NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
97-
; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
98-
; CHECK-LE-NEXT: br label [[ELSE]]
99-
; CHECK-LE: else:
100-
; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
101-
; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
102-
; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
103-
; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
104-
; CHECK-LE: cond.load1:
105-
; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
106-
; CHECK-LE-NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
107-
; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
108-
; CHECK-LE-NEXT: br label [[ELSE2]]
109-
; CHECK-LE: else2:
110-
; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
111-
; CHECK-LE-NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
118+
; CHECK-LE-COMMON-LABEL: @scalarize_v2i24(
119+
; CHECK-LE-COMMON-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
120+
; CHECK-LE-COMMON-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
121+
; CHECK-LE-COMMON-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
122+
; CHECK-LE-COMMON-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
123+
; CHECK-LE-COMMON: cond.load:
124+
; CHECK-LE-COMMON-NEXT: [[TMP3:%.*]] = getelementptr inbounds i24, ptr [[P:%.*]], i32 0
125+
; CHECK-LE-COMMON-NEXT: [[TMP4:%.*]] = load i24, ptr [[TMP3]], align 1
126+
; CHECK-LE-COMMON-NEXT: [[TMP5:%.*]] = insertelement <2 x i24> [[PASSTHRU:%.*]], i24 [[TMP4]], i64 0
127+
; CHECK-LE-COMMON-NEXT: br label [[ELSE]]
128+
; CHECK-LE-COMMON: else:
129+
; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i24> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
130+
; CHECK-LE-COMMON-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
131+
; CHECK-LE-COMMON-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
132+
; CHECK-LE-COMMON-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
133+
; CHECK-LE-COMMON: cond.load1:
134+
; CHECK-LE-COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i24, ptr [[P]], i32 1
135+
; CHECK-LE-COMMON-NEXT: [[TMP9:%.*]] = load i24, ptr [[TMP8]], align 1
136+
; CHECK-LE-COMMON-NEXT: [[TMP10:%.*]] = insertelement <2 x i24> [[RES_PHI_ELSE]], i24 [[TMP9]], i64 1
137+
; CHECK-LE-COMMON-NEXT: br label [[ELSE2]]
138+
; CHECK-LE-COMMON: else2:
139+
; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i24> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
140+
; CHECK-LE-COMMON-NEXT: ret <2 x i24> [[RES_PHI_ELSE3]]
112141
;
113142
; CHECK-BE-LABEL: @scalarize_v2i24(
114143
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
@@ -140,29 +169,29 @@ define <2 x i24> @scalarize_v2i24(ptr %p, <2 x i1> %mask, <2 x i24> %passthru) {
140169

141170
; This use a byte sized but non power of 2 element size. This used to crash due to bad alignment calculation.
142171
define <2 x i48> @scalarize_v2i48(ptr %p, <2 x i1> %mask, <2 x i48> %passthru) {
143-
; CHECK-LE-LABEL: @scalarize_v2i48(
144-
; CHECK-LE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
145-
; CHECK-LE-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
146-
; CHECK-LE-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
147-
; CHECK-LE-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
148-
; CHECK-LE: cond.load:
149-
; CHECK-LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
150-
; CHECK-LE-NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
151-
; CHECK-LE-NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
152-
; CHECK-LE-NEXT: br label [[ELSE]]
153-
; CHECK-LE: else:
154-
; CHECK-LE-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
155-
; CHECK-LE-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
156-
; CHECK-LE-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
157-
; CHECK-LE-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
158-
; CHECK-LE: cond.load1:
159-
; CHECK-LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
160-
; CHECK-LE-NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
161-
; CHECK-LE-NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
162-
; CHECK-LE-NEXT: br label [[ELSE2]]
163-
; CHECK-LE: else2:
164-
; CHECK-LE-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
165-
; CHECK-LE-NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
172+
; CHECK-LE-COMMON-LABEL: @scalarize_v2i48(
173+
; CHECK-LE-COMMON-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2
174+
; CHECK-LE-COMMON-NEXT: [[TMP1:%.*]] = and i2 [[SCALAR_MASK]], 1
175+
; CHECK-LE-COMMON-NEXT: [[TMP2:%.*]] = icmp ne i2 [[TMP1]], 0
176+
; CHECK-LE-COMMON-NEXT: br i1 [[TMP2]], label [[COND_LOAD:%.*]], label [[ELSE:%.*]]
177+
; CHECK-LE-COMMON: cond.load:
178+
; CHECK-LE-COMMON-NEXT: [[TMP3:%.*]] = getelementptr inbounds i48, ptr [[P:%.*]], i32 0
179+
; CHECK-LE-COMMON-NEXT: [[TMP4:%.*]] = load i48, ptr [[TMP3]], align 2
180+
; CHECK-LE-COMMON-NEXT: [[TMP5:%.*]] = insertelement <2 x i48> [[PASSTHRU:%.*]], i48 [[TMP4]], i64 0
181+
; CHECK-LE-COMMON-NEXT: br label [[ELSE]]
182+
; CHECK-LE-COMMON: else:
183+
; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE:%.*]] = phi <2 x i48> [ [[TMP5]], [[COND_LOAD]] ], [ [[PASSTHRU]], [[TMP0:%.*]] ]
184+
; CHECK-LE-COMMON-NEXT: [[TMP6:%.*]] = and i2 [[SCALAR_MASK]], -2
185+
; CHECK-LE-COMMON-NEXT: [[TMP7:%.*]] = icmp ne i2 [[TMP6]], 0
186+
; CHECK-LE-COMMON-NEXT: br i1 [[TMP7]], label [[COND_LOAD1:%.*]], label [[ELSE2:%.*]]
187+
; CHECK-LE-COMMON: cond.load1:
188+
; CHECK-LE-COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i48, ptr [[P]], i32 1
189+
; CHECK-LE-COMMON-NEXT: [[TMP9:%.*]] = load i48, ptr [[TMP8]], align 2
190+
; CHECK-LE-COMMON-NEXT: [[TMP10:%.*]] = insertelement <2 x i48> [[RES_PHI_ELSE]], i48 [[TMP9]], i64 1
191+
; CHECK-LE-COMMON-NEXT: br label [[ELSE2]]
192+
; CHECK-LE-COMMON: else2:
193+
; CHECK-LE-COMMON-NEXT: [[RES_PHI_ELSE3:%.*]] = phi <2 x i48> [ [[TMP10]], [[COND_LOAD1]] ], [ [[RES_PHI_ELSE]], [[ELSE]] ]
194+
; CHECK-LE-COMMON-NEXT: ret <2 x i48> [[RES_PHI_ELSE3]]
166195
;
167196
; CHECK-BE-LABEL: @scalarize_v2i48(
168197
; CHECK-BE-NEXT: [[SCALAR_MASK:%.*]] = bitcast <2 x i1> [[MASK:%.*]] to i2

0 commit comments

Comments
 (0)