Skip to content

Commit 6c1fc82

Browse files
authored
[InstCombine] fold sub(zext(ptrtoint),zext(ptrtoint)) (#115369)
On a 32-bit target if pointer arithmetic with `addrspace` is used in i64 computation, the missed folding in InstCombine results to suboptimal performance, unlike same code compiled for 64bit target.
1 parent 8ee638f commit 6c1fc82

File tree

2 files changed

+291
-1
lines changed

2 files changed

+291
-1
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2631,6 +2631,23 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
26312631
/* IsNUW */ false))
26322632
return replaceInstUsesWith(I, Res);
26332633

2634+
if (match(Op0, m_ZExt(m_PtrToIntSameSize(DL, m_Value(LHSOp)))) &&
2635+
match(Op1, m_ZExtOrSelf(m_PtrToInt(m_Value(RHSOp))))) {
2636+
if (auto *GEP = dyn_cast<GEPOperator>(LHSOp)) {
2637+
if (GEP->getPointerOperand() == RHSOp) {
2638+
if (GEP->hasNoUnsignedWrap() || GEP->hasNoUnsignedSignedWrap()) {
2639+
Value *Offset = EmitGEPOffset(GEP);
2640+
Value *Res = GEP->hasNoUnsignedWrap()
2641+
? Builder.CreateZExt(
2642+
Offset, I.getType(), "",
2643+
/*IsNonNeg=*/GEP->hasNoUnsignedSignedWrap())
2644+
: Builder.CreateSExt(Offset, I.getType());
2645+
return replaceInstUsesWith(I, Res);
2646+
}
2647+
}
2648+
}
2649+
}
2650+
26342651
// Canonicalize a shifty way to code absolute value to the common pattern.
26352652
// There are 2 potential commuted variants.
26362653
// We're relying on the fact that we only do this transform when the shift has

llvm/test/Transforms/InstCombine/sub-gep.ll

Lines changed: 274 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt -S -passes=instcombine < %s | FileCheck %s
33

4-
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
4+
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-p2:32:32"
55

66
define i64 @test_inbounds(ptr %base, i64 %idx) {
77
; CHECK-LABEL: @test_inbounds(
@@ -270,6 +270,70 @@ define i64 @test25(ptr %P, i64 %A){
270270
ret i64 %G
271271
}
272272

273+
define i64 @zext_ptrtoint_sub_ptrtoint(ptr %p, i32 %offset) {
274+
; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint(
275+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET:%.*]] to i64
276+
; CHECK-NEXT: [[A:%.*]] = getelementptr bfloat, ptr @Arr, i64 [[TMP1]]
277+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
278+
; CHECK-NEXT: [[C:%.*]] = and i64 [[TMP2]], 4294967294
279+
; CHECK-NEXT: [[D:%.*]] = sub i64 [[C]], ptrtoint (ptr @Arr to i64)
280+
; CHECK-NEXT: ret i64 [[D]]
281+
;
282+
%A = getelementptr bfloat, ptr @Arr, i32 %offset
283+
%B = ptrtoint ptr %A to i32
284+
%C = zext i32 %B to i64
285+
%D = sub i64 %C, ptrtoint (ptr @Arr to i64)
286+
ret i64 %D
287+
}
288+
289+
define i64 @ptrtoint_sub_zext_ptrtoint(ptr %p, i32 %offset) {
290+
; CHECK-LABEL: @ptrtoint_sub_zext_ptrtoint(
291+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET:%.*]] to i64
292+
; CHECK-NEXT: [[A:%.*]] = getelementptr bfloat, ptr @Arr, i64 [[TMP1]]
293+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
294+
; CHECK-NEXT: [[C:%.*]] = and i64 [[TMP2]], 4294967294
295+
; CHECK-NEXT: [[D:%.*]] = sub i64 ptrtoint (ptr @Arr to i64), [[C]]
296+
; CHECK-NEXT: ret i64 [[D]]
297+
;
298+
%A = getelementptr bfloat, ptr @Arr, i32 %offset
299+
%B = ptrtoint ptr %A to i32
300+
%C = zext i32 %B to i64
301+
%D = sub i64 ptrtoint (ptr @Arr to i64), %C
302+
ret i64 %D
303+
}
304+
305+
define i64 @negative_zext_ptrtoint_sub_ptrtoint(ptr %p, i32 %offset) {
306+
; CHECK-LABEL: @negative_zext_ptrtoint_sub_ptrtoint(
307+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET:%.*]] to i64
308+
; CHECK-NEXT: [[A:%.*]] = getelementptr bfloat, ptr @Arr, i64 [[TMP1]]
309+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
310+
; CHECK-NEXT: [[C:%.*]] = and i64 [[TMP2]], 65534
311+
; CHECK-NEXT: [[D:%.*]] = sub i64 [[C]], ptrtoint (ptr @Arr to i64)
312+
; CHECK-NEXT: ret i64 [[D]]
313+
;
314+
%A = getelementptr bfloat, ptr @Arr, i32 %offset
315+
%B = ptrtoint ptr %A to i16
316+
%C = zext i16 %B to i64
317+
%D = sub i64 %C, ptrtoint (ptr @Arr to i64)
318+
ret i64 %D
319+
}
320+
321+
define i64 @negative_ptrtoint_sub_zext_ptrtoint(ptr %p, i32 %offset) {
322+
; CHECK-LABEL: @negative_ptrtoint_sub_zext_ptrtoint(
323+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[OFFSET:%.*]] to i64
324+
; CHECK-NEXT: [[A:%.*]] = getelementptr bfloat, ptr @Arr, i64 [[TMP1]]
325+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A]] to i64
326+
; CHECK-NEXT: [[C:%.*]] = and i64 [[TMP2]], 65534
327+
; CHECK-NEXT: [[D:%.*]] = sub i64 ptrtoint (ptr @Arr to i64), [[C]]
328+
; CHECK-NEXT: ret i64 [[D]]
329+
;
330+
%A = getelementptr bfloat, ptr @Arr, i32 %offset
331+
%B = ptrtoint ptr %A to i16
332+
%C = zext i16 %B to i64
333+
%D = sub i64 ptrtoint (ptr @Arr to i64), %C
334+
ret i64 %D
335+
}
336+
273337
@Arr_as1 = external addrspace(1) global [42 x i16]
274338

275339
define i16 @test25_as1(ptr addrspace(1) %P, i64 %A) {
@@ -285,6 +349,215 @@ define i16 @test25_as1(ptr addrspace(1) %P, i64 %A) {
285349
ret i16 %G
286350
}
287351

352+
@Arr_as2 = external addrspace(2) global [42 x i16]
353+
354+
define i64 @ptrtoint_sub_zext_ptrtoint_as2_inbounds(i32 %offset) {
355+
; CHECK-LABEL: @ptrtoint_sub_zext_ptrtoint_as2_inbounds(
356+
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds bfloat, ptr addrspace(2) @Arr_as2, i32 [[OFFSET:%.*]]
357+
; CHECK-NEXT: [[B:%.*]] = ptrtoint ptr addrspace(2) [[A]] to i32
358+
; CHECK-NEXT: [[C:%.*]] = zext i32 [[B]] to i64
359+
; CHECK-NEXT: [[D:%.*]] = sub nsw i64 ptrtoint (ptr addrspace(2) @Arr_as2 to i64), [[C]]
360+
; CHECK-NEXT: ret i64 [[D]]
361+
;
362+
%A = getelementptr inbounds bfloat, ptr addrspace(2) @Arr_as2, i32 %offset
363+
%B = ptrtoint ptr addrspace(2) %A to i32
364+
%C = zext i32 %B to i64
365+
%D = sub i64 ptrtoint (ptr addrspace(2) @Arr_as2 to i64), %C
366+
ret i64 %D
367+
}
368+
369+
define i64 @zext_ptrtoint_sub_ptrtoint_as2_nusw(i32 %offset) {
370+
; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint_as2_nusw(
371+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw i32 [[OFFSET:%.*]], 1
372+
; CHECK-NEXT: [[D:%.*]] = sext i32 [[A_IDX]] to i64
373+
; CHECK-NEXT: ret i64 [[D]]
374+
;
375+
%A = getelementptr nusw bfloat, ptr addrspace(2) @Arr_as2, i32 %offset
376+
%B = ptrtoint ptr addrspace(2) %A to i32
377+
%C = zext i32 %B to i64
378+
%D = sub i64 %C, ptrtoint (ptr addrspace(2) @Arr_as2 to i64)
379+
ret i64 %D
380+
}
381+
382+
define i64 @zext_ptrtoint_sub_ptrtoint_as2_nuw(i32 %offset) {
383+
; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint_as2_nuw(
384+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nuw i32 [[OFFSET:%.*]], 1
385+
; CHECK-NEXT: [[D:%.*]] = zext i32 [[A_IDX]] to i64
386+
; CHECK-NEXT: ret i64 [[D]]
387+
;
388+
%A = getelementptr nuw bfloat, ptr addrspace(2) @Arr_as2, i32 %offset
389+
%B = ptrtoint ptr addrspace(2) %A to i32
390+
%C = zext i32 %B to i64
391+
%D = sub i64 %C, ptrtoint (ptr addrspace(2) @Arr_as2 to i64)
392+
ret i64 %D
393+
}
394+
395+
define i64 @zext_ptrtoint_sub_ptrtoint_as2_nusw_nuw(i32 %offset) {
396+
; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint_as2_nusw_nuw(
397+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nuw nsw i32 [[OFFSET:%.*]], 1
398+
; CHECK-NEXT: [[D:%.*]] = zext nneg i32 [[A_IDX]] to i64
399+
; CHECK-NEXT: ret i64 [[D]]
400+
;
401+
%A = getelementptr nusw nuw bfloat, ptr addrspace(2) @Arr_as2, i32 %offset
402+
%B = ptrtoint ptr addrspace(2) %A to i32
403+
%C = zext i32 %B to i64
404+
%D = sub i64 %C, ptrtoint (ptr addrspace(2) @Arr_as2 to i64)
405+
ret i64 %D
406+
}
407+
408+
define i64 @zext_ptrtoint_sub_zext_ptrtoint_as2_nusw(i32 %offset) {
409+
; CHECK-LABEL: @zext_ptrtoint_sub_zext_ptrtoint_as2_nusw(
410+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw i32 [[OFFSET:%.*]], 1
411+
; CHECK-NEXT: [[E:%.*]] = sext i32 [[A_IDX]] to i64
412+
; CHECK-NEXT: ret i64 [[E]]
413+
;
414+
%A = getelementptr nusw bfloat, ptr addrspace(2) @Arr_as2, i32 %offset
415+
%B = ptrtoint ptr addrspace(2) %A to i32
416+
%C = zext i32 %B to i64
417+
%D = zext i32 ptrtoint (ptr addrspace(2) @Arr_as2 to i32) to i64
418+
%E = sub i64 %C, %D
419+
ret i64 %E
420+
}
421+
422+
define i64 @zext_ptrtoint_sub_zext_ptrtoint_as2_nuw(i32 %offset) {
423+
; CHECK-LABEL: @zext_ptrtoint_sub_zext_ptrtoint_as2_nuw(
424+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nuw i32 [[OFFSET:%.*]], 1
425+
; CHECK-NEXT: [[E:%.*]] = zext i32 [[A_IDX]] to i64
426+
; CHECK-NEXT: ret i64 [[E]]
427+
;
428+
%A = getelementptr nuw bfloat, ptr addrspace(2) @Arr_as2, i32 %offset
429+
%B = ptrtoint ptr addrspace(2) %A to i32
430+
%C = zext i32 %B to i64
431+
%D = zext i32 ptrtoint (ptr addrspace(2) @Arr_as2 to i32) to i64
432+
%E = sub i64 %C, %D
433+
ret i64 %E
434+
}
435+
436+
define i64 @negative_zext_ptrtoint_sub_ptrtoint_as2_nuw(i32 %offset) {
437+
; CHECK-LABEL: @negative_zext_ptrtoint_sub_ptrtoint_as2_nuw(
438+
; CHECK-NEXT: [[A:%.*]] = getelementptr nuw bfloat, ptr addrspace(2) @Arr_as2, i32 [[OFFSET:%.*]]
439+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[A]] to i32
440+
; CHECK-NEXT: [[B_MASK:%.*]] = and i32 [[TMP1]], 65534
441+
; CHECK-NEXT: [[C:%.*]] = zext nneg i32 [[B_MASK]] to i64
442+
; CHECK-NEXT: [[D:%.*]] = sub nsw i64 [[C]], ptrtoint (ptr addrspace(2) @Arr_as2 to i64)
443+
; CHECK-NEXT: ret i64 [[D]]
444+
;
445+
%A = getelementptr nuw bfloat, ptr addrspace(2) @Arr_as2, i32 %offset
446+
%B = ptrtoint ptr addrspace(2) %A to i16
447+
%C = zext i16 %B to i64
448+
%D = sub i64 %C, ptrtoint (ptr addrspace(2) @Arr_as2 to i64)
449+
ret i64 %D
450+
}
451+
452+
define i64 @ptrtoint_sub_zext_ptrtoint_as2_inbounds_local(ptr addrspace(2) %p, i32 %offset) {
453+
; CHECK-LABEL: @ptrtoint_sub_zext_ptrtoint_as2_inbounds_local(
454+
; CHECK-NEXT: [[A:%.*]] = getelementptr inbounds bfloat, ptr addrspace(2) [[P:%.*]], i32 [[OFFSET:%.*]]
455+
; CHECK-NEXT: [[B:%.*]] = ptrtoint ptr addrspace(2) [[A]] to i32
456+
; CHECK-NEXT: [[C:%.*]] = zext i32 [[B]] to i64
457+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[P]] to i32
458+
; CHECK-NEXT: [[CC:%.*]] = zext i32 [[TMP1]] to i64
459+
; CHECK-NEXT: [[D:%.*]] = sub nsw i64 [[CC]], [[C]]
460+
; CHECK-NEXT: ret i64 [[D]]
461+
;
462+
%A = getelementptr inbounds bfloat, ptr addrspace(2) %p, i32 %offset
463+
%B = ptrtoint ptr addrspace(2) %A to i32
464+
%C = zext i32 %B to i64
465+
%CC = ptrtoint ptr addrspace(2) %p to i64
466+
%D = sub i64 %CC, %C
467+
ret i64 %D
468+
}
469+
470+
define i64 @zext_ptrtoint_sub_ptrtoint_as2_nusw_local(ptr addrspace(2) %p, i32 %offset) {
471+
; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint_as2_nusw_local(
472+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw i32 [[OFFSET:%.*]], 1
473+
; CHECK-NEXT: [[D:%.*]] = sext i32 [[A_IDX]] to i64
474+
; CHECK-NEXT: ret i64 [[D]]
475+
;
476+
%A = getelementptr nusw bfloat, ptr addrspace(2) %p, i32 %offset
477+
%B = ptrtoint ptr addrspace(2) %A to i32
478+
%C = zext i32 %B to i64
479+
%CC = ptrtoint ptr addrspace(2) %p to i64
480+
%D = sub i64 %C, %CC
481+
ret i64 %D
482+
}
483+
484+
define i64 @zext_ptrtoint_sub_ptrtoint_as2_nuw_local(ptr addrspace(2) %p, i32 %offset) {
485+
; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint_as2_nuw_local(
486+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nuw i32 [[OFFSET:%.*]], 1
487+
; CHECK-NEXT: [[D:%.*]] = zext i32 [[A_IDX]] to i64
488+
; CHECK-NEXT: ret i64 [[D]]
489+
;
490+
%A = getelementptr nuw bfloat, ptr addrspace(2) %p, i32 %offset
491+
%B = ptrtoint ptr addrspace(2) %A to i32
492+
%C = zext i32 %B to i64
493+
%CC = ptrtoint ptr addrspace(2) %p to i64
494+
%D = sub i64 %C, %CC
495+
ret i64 %D
496+
}
497+
498+
define i64 @zext_ptrtoint_sub_ptrtoint_as2_nusw_nuw_local(ptr addrspace(2) %p, i32 %offset) {
499+
; CHECK-LABEL: @zext_ptrtoint_sub_ptrtoint_as2_nusw_nuw_local(
500+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nuw nsw i32 [[OFFSET:%.*]], 1
501+
; CHECK-NEXT: [[D:%.*]] = zext nneg i32 [[A_IDX]] to i64
502+
; CHECK-NEXT: ret i64 [[D]]
503+
;
504+
%A = getelementptr nusw nuw bfloat, ptr addrspace(2) %p, i32 %offset
505+
%B = ptrtoint ptr addrspace(2) %A to i32
506+
%C = zext i32 %B to i64
507+
%CC = ptrtoint ptr addrspace(2) %p to i64
508+
%D = sub i64 %C, %CC
509+
ret i64 %D
510+
}
511+
512+
define i64 @zext_ptrtoint_sub_zext_ptrtoint_as2_nusw_local(ptr addrspace(2) %p, i32 %offset) {
513+
; CHECK-LABEL: @zext_ptrtoint_sub_zext_ptrtoint_as2_nusw_local(
514+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nsw i32 [[OFFSET:%.*]], 1
515+
; CHECK-NEXT: [[E:%.*]] = sext i32 [[A_IDX]] to i64
516+
; CHECK-NEXT: ret i64 [[E]]
517+
;
518+
%A = getelementptr nusw bfloat, ptr addrspace(2) %p, i32 %offset
519+
%B = ptrtoint ptr addrspace(2) %A to i32
520+
%C = zext i32 %B to i64
521+
%CC = ptrtoint ptr addrspace(2) %p to i32
522+
%D = zext i32 %CC to i64
523+
%E = sub i64 %C, %D
524+
ret i64 %E
525+
}
526+
527+
define i64 @zext_ptrtoint_sub_zext_ptrtoint_as2_nuw_local(ptr addrspace(2) %p, i32 %offset) {
528+
; CHECK-LABEL: @zext_ptrtoint_sub_zext_ptrtoint_as2_nuw_local(
529+
; CHECK-NEXT: [[A_IDX:%.*]] = shl nuw i32 [[OFFSET:%.*]], 1
530+
; CHECK-NEXT: [[E:%.*]] = zext i32 [[A_IDX]] to i64
531+
; CHECK-NEXT: ret i64 [[E]]
532+
;
533+
%A = getelementptr nuw bfloat, ptr addrspace(2) %p, i32 %offset
534+
%B = ptrtoint ptr addrspace(2) %A to i32
535+
%C = zext i32 %B to i64
536+
%CC = ptrtoint ptr addrspace(2) %p to i32
537+
%D = zext i32 %CC to i64
538+
%E = sub i64 %C, %D
539+
ret i64 %E
540+
}
541+
542+
define i64 @negative_zext_ptrtoint_sub_ptrtoint_as2_nuw_local(ptr addrspace(2) %p, i32 %offset) {
543+
; CHECK-LABEL: @negative_zext_ptrtoint_sub_ptrtoint_as2_nuw_local(
544+
; CHECK-NEXT: [[A:%.*]] = getelementptr nuw bfloat, ptr addrspace(2) [[P:%.*]], i32 [[OFFSET:%.*]]
545+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(2) [[A]] to i32
546+
; CHECK-NEXT: [[B_MASK:%.*]] = and i32 [[TMP1]], 65535
547+
; CHECK-NEXT: [[C:%.*]] = zext nneg i32 [[B_MASK]] to i64
548+
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(2) [[P]] to i32
549+
; CHECK-NEXT: [[CC:%.*]] = zext i32 [[TMP2]] to i64
550+
; CHECK-NEXT: [[D:%.*]] = sub nsw i64 [[C]], [[CC]]
551+
; CHECK-NEXT: ret i64 [[D]]
552+
;
553+
%A = getelementptr nuw bfloat, ptr addrspace(2) %p, i32 %offset
554+
%B = ptrtoint ptr addrspace(2) %A to i16
555+
%C = zext i16 %B to i64
556+
%CC = ptrtoint ptr addrspace(2) %p to i64
557+
%D = sub i64 %C, %CC
558+
ret i64 %D
559+
}
560+
288561
define i64 @test30(ptr %foo, i64 %i, i64 %j) {
289562
; CHECK-LABEL: @test30(
290563
; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2

0 commit comments

Comments
 (0)