Skip to content

Commit 10eefc4

Browse files
author
Jenkins
committed
merge main into amd-staging
Change-Id: Ie5a156bd75f226b592ad6bb10ff9b853b55c4c34
2 parents ea8dcd3 + 4804805 commit 10eefc4

File tree

11 files changed

+1331
-596
lines changed

11 files changed

+1331
-596
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3883,7 +3883,7 @@ mlir::Value IntrinsicLibrary::genIeeeClass(mlir::Type resultType,
38833883
int pos = 3 + highSignificandSize;
38843884
mlir::Value index = builder.create<mlir::arith::AndIOp>(
38853885
loc, builder.create<mlir::arith::ShRUIOp>(loc, intVal, signShift),
3886-
createIntegerConstant(1 << pos));
3886+
createIntegerConstant(1ULL << pos));
38873887

38883888
// [e] exponent != 0
38893889
mlir::Value exponent =
@@ -3895,7 +3895,7 @@ mlir::Value IntrinsicLibrary::genIeeeClass(mlir::Type resultType,
38953895
loc,
38963896
builder.create<mlir::arith::CmpIOp>(
38973897
loc, mlir::arith::CmpIPredicate::ne, exponent, zero),
3898-
createIntegerConstant(1 << --pos), zero));
3898+
createIntegerConstant(1ULL << --pos), zero));
38993899

39003900
// [m] exponent == 1..1 (max exponent)
39013901
index = builder.create<mlir::arith::OrIOp>(
@@ -3904,7 +3904,7 @@ mlir::Value IntrinsicLibrary::genIeeeClass(mlir::Type resultType,
39043904
loc,
39053905
builder.create<mlir::arith::CmpIOp>(
39063906
loc, mlir::arith::CmpIPredicate::eq, exponent, exponentMask),
3907-
createIntegerConstant(1 << --pos), zero));
3907+
createIntegerConstant(1ULL << --pos), zero));
39083908

39093909
// [l] low-order significand != 0
39103910
index = builder.create<mlir::arith::OrIOp>(
@@ -3916,7 +3916,7 @@ mlir::Value IntrinsicLibrary::genIeeeClass(mlir::Type resultType,
39163916
builder.create<mlir::arith::AndIOp>(loc, intVal,
39173917
lowSignificandMask),
39183918
zero),
3919-
createIntegerConstant(1 << --pos), zero));
3919+
createIntegerConstant(1ULL << --pos), zero));
39203920

39213921
// [h] high-order significand (1 or 2 bits)
39223922
index = builder.create<mlir::arith::OrIOp>(

lld/ELF/Arch/X86_64.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,10 @@ bool X86_64::relaxOnce(int pass) const {
328328
if (rel.expr != R_RELAX_GOT_PC)
329329
continue;
330330

331-
uint64_t v = sec->getRelocTargetVA(
332-
sec->file, rel.type, rel.addend,
333-
sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr);
331+
uint64_t v = sec->getRelocTargetVA(sec->file, rel.type, rel.addend,
332+
sec->getOutputSection()->addr +
333+
sec->outSecOff + rel.offset,
334+
*rel.sym, rel.expr);
334335
if (isInt<32>(v))
335336
continue;
336337
if (rel.sym->auxIdx == 0) {

lld/test/ELF/x86-64-gotpc-relax-too-far.s

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
# RUN: llvm-objdump --no-print-imm-hex -d %t/bin | FileCheck --check-prefix=DISASM %s
66
# RUN: llvm-readelf -S %t/bin | FileCheck --check-prefixes=GOT %s
77
# RUN: ld.lld -T %t/lds2 %t/a.o -o %t/bin2
8-
# RUN: llvm-readelf -S %t/bin2 | FileCheck --check-prefixes=UNNECESSARY-GOT %s
8+
# RUN: llvm-objdump --no-print-imm-hex -d %t/bin2 | FileCheck --check-prefix=DISASM %s
9+
# RUN: llvm-readelf -S %t/bin2 | FileCheck --check-prefixes=GOT %s
10+
# RUN: ld.lld -T %t/lds3 %t/a.o -o %t/bin3
11+
# RUN: llvm-readelf -S %t/bin3 | FileCheck --check-prefixes=UNNECESSARY-GOT %s
912

1013
# DISASM: <_foo>:
1114
# DISASM-NEXT: movl 2097146(%rip), %eax
@@ -47,6 +50,13 @@ SECTIONS {
4750
data 0x80200000 : { *(data) }
4851
}
4952
#--- lds2
53+
SECTIONS {
54+
.text.foo 0x100000 : { *(.text.foo) }
55+
.text 0x1ff000 : { . = . + 0x1000 ; *(.text) }
56+
.got 0x300000 : { *(.got) }
57+
data 0x80200000 : { *(data) }
58+
}
59+
#--- lds3
5060
SECTIONS {
5161
.text.foo 0x100000 : { *(.text.foo) }
5262
.text 0x200000 : { *(.text) }

llvm/include/llvm/IR/InstrTypes.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,17 @@ class CmpInst : public Instruction {
10581058
static CmpInst *Create(OtherOps Op, Predicate predicate, Value *S1,
10591059
Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
10601060

1061+
/// Construct a compare instruction, given the opcode, the predicate,
1062+
/// the two operands and the instruction to copy the flags from. Optionally
1063+
/// (if InstBefore is specified) insert the instruction into a BasicBlock
1064+
/// right before the specified instruction. The specified Instruction is
1065+
/// allowed to be a dereferenced end iterator. Create a CmpInst
1066+
static CmpInst *CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1,
1067+
Value *S2,
1068+
const Instruction *FlagsSource,
1069+
const Twine &Name = "",
1070+
Instruction *InsertBefore = nullptr);
1071+
10611072
/// Get the opcode casted to the right type
10621073
OtherOps getOpcode() const {
10631074
return static_cast<OtherOps>(Instruction::getOpcode());

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,20 +2529,28 @@ static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
25292529
return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
25302530
}
25312531

2532-
// Attempt to form avgceilu(A, B) from (A | B) - ((A ^ B) >> 1)
2533-
static SDValue combineFixedwidthToAVGCEILU(SDNode *N, SelectionDAG &DAG) {
2532+
// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2533+
static SDValue combineFixedwidthToAVGCEIL(SDNode *N, SelectionDAG &DAG) {
25342534
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
25352535
SDValue N0 = N->getOperand(0);
25362536
EVT VT = N0.getValueType();
25372537
SDLoc DL(N);
2538+
SDValue A, B;
2539+
25382540
if (TLI.isOperationLegal(ISD::AVGCEILU, VT)) {
2539-
SDValue A, B;
25402541
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
25412542
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
25422543
m_SpecificInt(1))))) {
25432544
return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
25442545
}
25452546
}
2547+
if (TLI.isOperationLegal(ISD::AVGCEILS, VT)) {
2548+
if (sd_match(N, m_Sub(m_Or(m_Value(A), m_Value(B)),
2549+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2550+
m_SpecificInt(1))))) {
2551+
return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2552+
}
2553+
}
25462554
return SDValue();
25472555
}
25482556

@@ -2837,20 +2845,29 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
28372845
return SDValue();
28382846
}
28392847

2840-
// Attempt to form avgflooru(A, B) from (A & B) + ((A ^ B) >> 1)
2841-
static SDValue combineFixedwidthToAVGFLOORU(SDNode *N, SelectionDAG &DAG) {
2848+
// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2849+
static SDValue combineFixedwidthToAVGFLOOR(SDNode *N, SelectionDAG &DAG) {
28422850
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
28432851
SDValue N0 = N->getOperand(0);
28442852
EVT VT = N0.getValueType();
28452853
SDLoc DL(N);
2854+
SDValue A, B;
2855+
28462856
if (TLI.isOperationLegal(ISD::AVGFLOORU, VT)) {
2847-
SDValue A, B;
28482857
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
28492858
m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)),
28502859
m_SpecificInt(1))))) {
28512860
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
28522861
}
28532862
}
2863+
if (TLI.isOperationLegal(ISD::AVGFLOORS, VT)) {
2864+
if (sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
2865+
m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)),
2866+
m_SpecificInt(1))))) {
2867+
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2868+
}
2869+
}
2870+
28542871
return SDValue();
28552872
}
28562873

@@ -2869,8 +2886,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
28692886
if (SDValue V = foldAddSubOfSignBit(N, DAG))
28702887
return V;
28712888

2872-
// Try to match AVGFLOORU fixedwidth pattern
2873-
if (SDValue V = combineFixedwidthToAVGFLOORU(N, DAG))
2889+
// Try to match AVGFLOOR fixedwidth pattern
2890+
if (SDValue V = combineFixedwidthToAVGFLOOR(N, DAG))
28742891
return V;
28752892

28762893
// fold (a+b) -> (a|b) iff a and b share no bits.
@@ -3868,8 +3885,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
38683885
if (SDValue V = foldAddSubOfSignBit(N, DAG))
38693886
return V;
38703887

3871-
// Try to match AVGCEILU fixedwidth pattern
3872-
if (SDValue V = combineFixedwidthToAVGCEILU(N, DAG))
3888+
// Try to match AVGCEIL fixedwidth pattern
3889+
if (SDValue V = combineFixedwidthToAVGCEIL(N, DAG))
38733890
return V;
38743891

38753892
if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))

llvm/lib/IR/Instructions.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4623,6 +4623,16 @@ CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
46234623
S1, S2, Name);
46244624
}
46254625

4626+
CmpInst *CmpInst::CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1,
4627+
Value *S2,
4628+
const Instruction *FlagsSource,
4629+
const Twine &Name,
4630+
Instruction *InsertBefore) {
4631+
CmpInst *Inst = Create(Op, Pred, S1, S2, Name, InsertBefore);
4632+
Inst->copyIRFlags(FlagsSource);
4633+
return Inst;
4634+
}
4635+
46264636
void CmpInst::swapOperands() {
46274637
if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
46284638
IC->swapOperands();

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,9 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
487487
// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
488488
Value *E0 = Builder.CreateExtractElement(X, Index);
489489
Value *E1 = Builder.CreateExtractElement(Y, Index);
490-
return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
490+
CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
491+
return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
492+
SrcCmpInst);
491493
}
492494

493495
if (auto *I = dyn_cast<Instruction>(SrcVec)) {

llvm/test/CodeGen/AArch64/hadd-combine.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,18 @@ define <8 x i16> @sub_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
341341
ret <8 x i16> %res
342342
}
343343

344+
define <8 x i16> @srhadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
345+
; CHECK-LABEL: srhadd_fixedwidth_v8i16:
346+
; CHECK: // %bb.0:
347+
; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h
348+
; CHECK-NEXT: ret
349+
%or = or <8 x i16> %a0, %a1
350+
%xor = xor <8 x i16> %a0, %a1
351+
%srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
352+
%res = sub <8 x i16> %or, %srl
353+
ret <8 x i16> %res
354+
}
355+
344356
define <8 x i16> @rhaddu_base(<8 x i16> %src1, <8 x i16> %src2) {
345357
; CHECK-LABEL: rhaddu_base:
346358
; CHECK: // %bb.0:
@@ -879,6 +891,18 @@ define <8 x i16> @uhadd_fixedwidth_v4i32(<8 x i16> %a0, <8 x i16> %a1) {
879891
ret <8 x i16> %res
880892
}
881893

894+
define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
895+
; CHECK-LABEL: shadd_fixedwidth_v8i16:
896+
; CHECK: // %bb.0:
897+
; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h
898+
; CHECK-NEXT: ret
899+
%and = and <8 x i16> %a0, %a1
900+
%xor = xor <8 x i16> %a0, %a1
901+
%srl = ashr <8 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
902+
%res = add <8 x i16> %and, %srl
903+
ret <8 x i16> %res
904+
}
905+
882906
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
883907
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
884908
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)

llvm/test/CodeGen/AArch64/setcc_knownbits.ll

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2-
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
2+
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
34

45
define i1 @load_bv_v4i8(i1 zeroext %a) {
56
; CHECK-LABEL: load_bv_v4i8:
@@ -11,18 +12,31 @@ define i1 @load_bv_v4i8(i1 zeroext %a) {
1112
}
1213

1314
define noundef i1 @logger(i32 noundef %logLevel, ptr %ea, ptr %pll) {
14-
; CHECK-LABEL: logger:
15-
; CHECK: // %bb.0: // %entry
16-
; CHECK-NEXT: ldr w8, [x2]
17-
; CHECK-NEXT: cmp w8, w0
18-
; CHECK-NEXT: b.ls .LBB1_2
19-
; CHECK-NEXT: // %bb.1:
20-
; CHECK-NEXT: mov w0, wzr
21-
; CHECK-NEXT: ret
22-
; CHECK-NEXT: .LBB1_2: // %land.rhs
23-
; CHECK-NEXT: ldr x8, [x1]
24-
; CHECK-NEXT: ldrb w0, [x8]
25-
; CHECK-NEXT: ret
15+
; CHECK-SD-LABEL: logger:
16+
; CHECK-SD: // %bb.0: // %entry
17+
; CHECK-SD-NEXT: ldr w8, [x2]
18+
; CHECK-SD-NEXT: cmp w8, w0
19+
; CHECK-SD-NEXT: b.ls .LBB1_2
20+
; CHECK-SD-NEXT: // %bb.1:
21+
; CHECK-SD-NEXT: mov w0, wzr
22+
; CHECK-SD-NEXT: ret
23+
; CHECK-SD-NEXT: .LBB1_2: // %land.rhs
24+
; CHECK-SD-NEXT: ldr x8, [x1]
25+
; CHECK-SD-NEXT: ldrb w0, [x8]
26+
; CHECK-SD-NEXT: ret
27+
;
28+
; CHECK-GI-LABEL: logger:
29+
; CHECK-GI: // %bb.0: // %entry
30+
; CHECK-GI-NEXT: ldr w8, [x2]
31+
; CHECK-GI-NEXT: cmp w8, w0
32+
; CHECK-GI-NEXT: mov w0, wzr
33+
; CHECK-GI-NEXT: b.hi .LBB1_2
34+
; CHECK-GI-NEXT: // %bb.1: // %land.rhs
35+
; CHECK-GI-NEXT: ldr x8, [x1]
36+
; CHECK-GI-NEXT: ldrb w8, [x8]
37+
; CHECK-GI-NEXT: and w0, w8, #0x1
38+
; CHECK-GI-NEXT: .LBB1_2: // %land.end
39+
; CHECK-GI-NEXT: ret
2640
entry:
2741
%0 = load i32, ptr %pll, align 4
2842
%cmp.not = icmp ugt i32 %0, %logLevel
@@ -44,30 +58,51 @@ land.end: ; preds = %land.rhs, %entry
4458

4559
declare i64 @llvm.ctlz.i64(i64 %in, i1)
4660
define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
47-
; CHECK-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
48-
; CHECK: // %bb.0:
49-
; CHECK-NEXT: clz x8, x0
50-
; CHECK-NEXT: lsr x0, x8, #6
51-
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
52-
; CHECK-NEXT: ret
61+
; CHECK-SD-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
62+
; CHECK-SD: // %bb.0:
63+
; CHECK-SD-NEXT: clz x8, x0
64+
; CHECK-SD-NEXT: lsr x0, x8, #6
65+
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 killed $x0
66+
; CHECK-SD-NEXT: ret
67+
;
68+
; CHECK-GI-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
69+
; CHECK-GI: // %bb.0:
70+
; CHECK-GI-NEXT: clz x8, x0
71+
; CHECK-GI-NEXT: lsr x8, x8, #6
72+
; CHECK-GI-NEXT: cmp x8, #1
73+
; CHECK-GI-NEXT: cset w0, eq
74+
; CHECK-GI-NEXT: ret
5375
%ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
5476
%lshr = lshr i64 %ctlz, 6
5577
%icmp = icmp eq i64 %lshr, 1
5678
ret i1 %icmp
5779
}
5880

5981
define i32 @PR17487(i1 %tobool) {
60-
; CHECK-LABEL: PR17487:
61-
; CHECK: // %bb.0:
62-
; CHECK-NEXT: dup v0.2s, w0
63-
; CHECK-NEXT: mov w8, #1 // =0x1
64-
; CHECK-NEXT: dup v1.2d, x8
65-
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
66-
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
67-
; CHECK-NEXT: mov x8, v0.d[1]
68-
; CHECK-NEXT: cmp x8, #1
69-
; CHECK-NEXT: cset w0, ne
70-
; CHECK-NEXT: ret
82+
; CHECK-SD-LABEL: PR17487:
83+
; CHECK-SD: // %bb.0:
84+
; CHECK-SD-NEXT: dup v0.2s, w0
85+
; CHECK-SD-NEXT: mov w8, #1 // =0x1
86+
; CHECK-SD-NEXT: dup v1.2d, x8
87+
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
88+
; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b
89+
; CHECK-SD-NEXT: mov x8, v0.d[1]
90+
; CHECK-SD-NEXT: cmp x8, #1
91+
; CHECK-SD-NEXT: cset w0, ne
92+
; CHECK-SD-NEXT: ret
93+
;
94+
; CHECK-GI-LABEL: PR17487:
95+
; CHECK-GI: // %bb.0:
96+
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
97+
; CHECK-GI-NEXT: mov v0.d[1], x0
98+
; CHECK-GI-NEXT: adrp x8, .LCPI3_0
99+
; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
100+
; CHECK-GI-NEXT: bic v0.16b, v1.16b, v0.16b
101+
; CHECK-GI-NEXT: mov d0, v0.d[1]
102+
; CHECK-GI-NEXT: fmov x8, d0
103+
; CHECK-GI-NEXT: cmp x8, #1
104+
; CHECK-GI-NEXT: cset w0, ne
105+
; CHECK-GI-NEXT: ret
71106
%tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1
72107
%tmp1 = zext <2 x i1> %tmp to <2 x i64>
73108
%tmp2 = xor <2 x i64> %tmp1, <i64 1, i64 1>

0 commit comments

Comments
 (0)