Skip to content

Commit e00f189

Browse files
committed
[InstCombine] Revert rL226781 "Teach InstCombine to canonicalize loads which are only ever stored to always use a legal integer type if one is available." (PR47592)
(it was introduced in https://lists.llvm.org/pipermail/llvm-dev/2015-January/080956.html) This canonicalization seems dubious. Most importantly, while it does not create `inttoptr` casts by itself, it may cause them to appear later, see e.g. D88788. I think it's pretty obvious that it is an undesirable outcome, by now we've established that seemingly no-op `inttoptr`/`ptrtoint` casts are not no-op, and are no longer eager to look past them. Which e.g. means that given ``` %a = load i32 %b = inttoptr %a %c = inttoptr %a ``` we likely won't be able to tell that `%b` and `%c` is the same thing. As we can see in D88789 / D88788 / D88806 / D75505, we can't really teach SCEV about this (not without the https://bugs.llvm.org/show_bug.cgi?id=47592 at least) And we can't recover the situation post-inlining in instcombine. So it really does look like this fold is actively breaking otherwise-good IR, in a way that is not recoverable. And that means, this fold isn't helpful in exposing the passes that are otherwise unaware of these patterns it produces. Thusly, i propose to simply not perform such a canonicalization. The original motivational RFC does not state what larger problem that canonicalization was trying to solve, so i'm not sure how this plays out in the larger picture. On vanilla llvm test-suite + RawSpeed, this results in increase of asm instructions and final object size by ~+0.05% decreases final count of bitcasts by -4.79% (-28990), ptrtoint casts by -15.41% (-3423), and of inttoptr casts by -25.59% (-6919, *sic*). Overall, there's -0.04% less IR blocks, -0.39% instructions. See https://bugs.llvm.org/show_bug.cgi?id=47592 Differential Revision: https://reviews.llvm.org/D88789
1 parent 567462b commit e00f189

File tree

10 files changed

+107
-182
lines changed

10 files changed

+107
-182
lines changed

clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -255,22 +255,20 @@ svbool_t read_bool(struct struct_bool *s) {
255255
// CHECK-256-NEXT: entry:
256256
// CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
257257
// CHECK-256-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]]
258-
// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to i32*
259-
// CHECK-256-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, [[TBAA6]]
260-
// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1
261-
// CHECK-256-NEXT: [[TMP2:%.*]] = bitcast [3 x <4 x i8>]* [[Y]] to i32*
262-
// CHECK-256-NEXT: store i32 [[TMP1]], i32* [[TMP2]], align 2, [[TBAA6]]
258+
// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <4 x i8>*
259+
// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 16, [[TBAA6]]
260+
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
261+
// CHECK-256-NEXT: store <4 x i8> [[TMP1]], <4 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
263262
// CHECK-256-NEXT: ret void
264263
//
265264
// CHECK-512-LABEL: @write_bool(
266265
// CHECK-512-NEXT: entry:
267266
// CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
268267
// CHECK-512-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA15:!tbaa !.*]]
269-
// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to i64*
270-
// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]]
271-
// CHECK-512-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1
272-
// CHECK-512-NEXT: [[TMP2:%.*]] = bitcast [3 x <8 x i8>]* [[Y]] to i64*
273-
// CHECK-512-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 2, [[TBAA6]]
268+
// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <8 x i8>*
269+
// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
270+
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
271+
// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
274272
// CHECK-512-NEXT: ret void
275273
//
276274
void write_bool(struct struct_bool *s, svbool_t x) {

clang/test/CodeGen/attr-arm-sve-vector-bits-call.c

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -169,28 +169,24 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_
169169
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
170170
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
171171
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
172-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64*
173-
// CHECK-NEXT: [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
174-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP2]] to <vscale x 16 x i1>*
175-
// CHECK-NEXT: store <vscale x 16 x i1> [[OP2_COERCE:%.*]], <vscale x 16 x i1>* [[TMP2]], align 16
176-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP2]] to i64*
177-
// CHECK-NEXT: [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, [[TBAA6]]
178-
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64*
179-
// CHECK-NEXT: store i64 [[OP113]], i64* [[TMP4]], align 16, [[TBAA6]]
180-
// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to i64*
181-
// CHECK-NEXT: store i64 [[OP224]], i64* [[TMP5]], align 16, [[TBAA6]]
182-
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
183-
// CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP6]], align 16, [[TBAA6]]
184-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to <vscale x 16 x i1>*
185-
// CHECK-NEXT: [[TMP9:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP8]], align 16, [[TBAA6]]
186-
// CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP9]])
187-
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP10]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]]
188-
// CHECK-NEXT: [[TMP11:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
189-
// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, [[TBAA6]]
190-
// CHECK-NEXT: [[TMP13:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
191-
// CHECK-NEXT: store i64 [[TMP12]], i64* [[TMP13]], align 16
192-
// CHECK-NEXT: [[TMP14:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
193-
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP14]]
172+
// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]]
173+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP2]] to <vscale x 16 x i1>*
174+
// CHECK-NEXT: store <vscale x 16 x i1> [[OP2_COERCE:%.*]], <vscale x 16 x i1>* [[TMP1]], align 16
175+
// CHECK-NEXT: [[OP22:%.*]] = load <8 x i8>, <8 x i8>* [[OP2]], align 16, [[TBAA6]]
176+
// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]]
177+
// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[OP2_ADDR]], align 16, [[TBAA6]]
178+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
179+
// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP2]], align 16, [[TBAA6]]
180+
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to <vscale x 16 x i1>*
181+
// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP4]], align 16, [[TBAA6]]
182+
// CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]])
183+
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13:!tbaa !.*]]
184+
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
185+
// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]]
186+
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
187+
// CHECK-NEXT: store <8 x i8> [[TMP7]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
188+
// CHECK-NEXT: [[TMP8:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
189+
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]]
194190
//
195191
fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) {
196192
return svsel(pg, op1, op2);
@@ -260,20 +256,18 @@ fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op
260256
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
261257
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
262258
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
263-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64*
264-
// CHECK-NEXT: [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
265-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64*
266-
// CHECK-NEXT: store i64 [[OP112]], i64* [[TMP2]], align 16, [[TBAA6]]
267-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
268-
// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP3]], align 16, [[TBAA6]]
269-
// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[OP2:%.*]])
270-
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]]
271-
// CHECK-NEXT: [[TMP6:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
272-
// CHECK-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, [[TBAA6]]
273-
// CHECK-NEXT: [[TMP8:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
274-
// CHECK-NEXT: store i64 [[TMP7]], i64* [[TMP8]], align 16
275-
// CHECK-NEXT: [[TMP9:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
276-
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP9]]
259+
// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, [[TBAA6]]
260+
// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, [[TBAA6]]
261+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
262+
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]], align 16, [[TBAA6]]
263+
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[OP2:%.*]])
264+
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]]
265+
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
266+
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]]
267+
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
268+
// CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
269+
// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
270+
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP5]]
277271
//
278272
fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) {
279273
return svsel(pg, op1, op2);
@@ -325,12 +319,12 @@ fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) {
325319
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
326320
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
327321
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, [[TBAA13]]
328-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
329-
// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
330-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
331-
// CHECK-NEXT: store i64 [[TMP2]], i64* [[TMP3]], align 16
332-
// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
333-
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
322+
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
323+
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, [[TBAA6]]
324+
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
325+
// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
326+
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
327+
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
334328
//
335329
fixed_bool_t call_bool_ss(svbool_t pg, svbool_t op1, svbool_t op2) {
336330
return svsel(pg, op1, op2);

clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,11 @@ fixed_float64_t from_svfloat64_t(svfloat64_t type) {
8181
// CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <8 x i8>, align 16
8282
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to <vscale x 16 x i1>*
8383
// CHECK-NEXT: store <vscale x 16 x i1> [[TYPE_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
84-
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE]] to i64*
85-
// CHECK-NEXT: [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, [[TBAA6]]
86-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to i64*
87-
// CHECK-NEXT: store i64 [[TYPE12]], i64* [[TMP2]], align 16, [[TBAA6]]
88-
// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to <vscale x 16 x i1>*
89-
// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP3]], align 16, [[TBAA6]]
90-
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
84+
// CHECK-NEXT: [[TYPE1:%.*]] = load <8 x i8>, <8 x i8>* [[TYPE]], align 16, [[TBAA6]]
85+
// CHECK-NEXT: store <8 x i8> [[TYPE1]], <8 x i8>* [[TYPE_ADDR]], align 16, [[TBAA6]]
86+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to <vscale x 16 x i1>*
87+
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]], align 16, [[TBAA6]]
88+
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
9189
//
9290
svbool_t to_svbool_t(fixed_bool_t type) {
9391
return type;
@@ -98,12 +96,12 @@ svbool_t to_svbool_t(fixed_bool_t type) {
9896
// CHECK-NEXT: [[TYPE_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
9997
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
10098
// CHECK-NEXT: store <vscale x 16 x i1> [[TYPE:%.*]], <vscale x 16 x i1>* [[TYPE_ADDR]], align 16, [[TBAA13:!tbaa !.*]]
101-
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[TYPE_ADDR]] to i64*
102-
// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA6]]
103-
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
104-
// CHECK-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 16
105-
// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
106-
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP3]]
99+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[TYPE_ADDR]] to <8 x i8>*
100+
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
101+
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
102+
// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
103+
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
104+
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP2]]
107105
//
108106
fixed_bool_t from_svbool_t(svbool_t type) {
109107
return type;

clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ void write_global_bf16(svbfloat16_t v) { global_bf16 = v; }
7272
// CHECK-512-NEXT: entry:
7373
// CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
7474
// CHECK-512-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, [[TBAA13:!tbaa !.*]]
75-
// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to i64*
76-
// CHECK-512-NEXT: [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, [[TBAA10]]
77-
// CHECK-512-NEXT: store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, [[TBAA10]]
75+
// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to <8 x i8>*
76+
// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA10]]
77+
// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* @global_bool, align 2, [[TBAA10]]
7878
// CHECK-512-NEXT: ret void
7979
//
8080
void write_global_bool(svbool_t v) { global_bool = v; }

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -554,42 +554,8 @@ static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
554554
if (LI.getPointerOperand()->isSwiftError())
555555
return nullptr;
556556

557-
Type *Ty = LI.getType();
558557
const DataLayout &DL = IC.getDataLayout();
559558

560-
// Try to canonicalize loads which are only ever stored to operate over
561-
// integers instead of any other type. We only do this when the loaded type
562-
// is sized and has a size exactly the same as its store size and the store
563-
// size is a legal integer type.
564-
// Do not perform canonicalization if minmax pattern is found (to avoid
565-
// infinite loop).
566-
Type *Dummy;
567-
if (!Ty->isIntegerTy() && Ty->isSized() && !isa<ScalableVectorType>(Ty) &&
568-
DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
569-
DL.typeSizeEqualsStoreSize(Ty) && !DL.isNonIntegralPointerType(Ty) &&
570-
!isMinMaxWithLoads(InstCombiner::peekThroughBitcast(
571-
LI.getPointerOperand(), /*OneUseOnly=*/true),
572-
Dummy)) {
573-
if (all_of(LI.users(), [&LI](User *U) {
574-
auto *SI = dyn_cast<StoreInst>(U);
575-
return SI && SI->getPointerOperand() != &LI &&
576-
!SI->getPointerOperand()->isSwiftError();
577-
})) {
578-
LoadInst *NewLoad = IC.combineLoadToNewType(
579-
LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
580-
// Replace all the stores with stores of the newly loaded value.
581-
for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
582-
auto *SI = cast<StoreInst>(*UI++);
583-
IC.Builder.SetInsertPoint(SI);
584-
combineStoreToNewValue(IC, *SI, NewLoad);
585-
IC.eraseInstFromFunction(*SI);
586-
}
587-
assert(LI.use_empty() && "Failed to remove all users of the load!");
588-
// Return the old load so the combiner can delete it safely.
589-
return &LI;
590-
}
591-
}
592-
593559
// Fold away bit casts of the loaded value by loading the desired type.
594560
// We can do this for BitCastInsts as well as casts from and to pointer types,
595561
// as long as those are noops (i.e., the source or dest type have the same

llvm/test/Transforms/InstCombine/atomic.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -325,11 +325,9 @@ declare void @clobber()
325325

326326
define i32 @test18(float* %p) {
327327
; CHECK-LABEL: @test18(
328-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to i32*
329-
; CHECK-NEXT: [[X1:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
328+
; CHECK-NEXT: [[X:%.*]] = load atomic float, float* [[P:%.*]] unordered, align 4
330329
; CHECK-NEXT: call void @clobber()
331-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P]] to i32*
332-
; CHECK-NEXT: store atomic i32 [[X1]], i32* [[TMP2]] unordered, align 4
330+
; CHECK-NEXT: store atomic float [[X]], float* [[P]] unordered, align 4
333331
; CHECK-NEXT: ret i32 0
334332
;
335333
%x = load atomic float, float* %p unordered, align 4
@@ -376,10 +374,8 @@ define i32 @test21(i32** %p, i8* %v) {
376374

377375
define void @pr27490a(i8** %p1, i8** %p2) {
378376
; CHECK-LABEL: @pr27490a(
379-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64*
380-
; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8
381-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64*
382-
; CHECK-NEXT: store volatile i64 [[L1]], i64* [[TMP2]], align 8
377+
; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
378+
; CHECK-NEXT: store volatile i8* [[L]], i8** [[P2:%.*]], align 8
383379
; CHECK-NEXT: ret void
384380
;
385381
%l = load i8*, i8** %p1
@@ -389,10 +385,8 @@ define void @pr27490a(i8** %p1, i8** %p2) {
389385

390386
define void @pr27490b(i8** %p1, i8** %p2) {
391387
; CHECK-LABEL: @pr27490b(
392-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8** [[P1:%.*]] to i64*
393-
; CHECK-NEXT: [[L1:%.*]] = load i64, i64* [[TMP1]], align 8
394-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[P2:%.*]] to i64*
395-
; CHECK-NEXT: store atomic i64 [[L1]], i64* [[TMP2]] seq_cst, align 8
388+
; CHECK-NEXT: [[L:%.*]] = load i8*, i8** [[P1:%.*]], align 8
389+
; CHECK-NEXT: store atomic i8* [[L]], i8** [[P2:%.*]] seq_cst, align 8
396390
; CHECK-NEXT: ret void
397391
;
398392
%l = load i8*, i8** %p1

0 commit comments

Comments
 (0)